|
|
|
@ -9,47 +9,51 @@ pd.set_option('display.width',None)
|
|
|
|
|
def normal(x):
|
|
|
|
|
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
|
|
|
|
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
|
|
|
|
return x[(x<=high)&(x>=low)]
|
|
|
|
|
return (x<=high)&(x>=low)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
|
|
|
|
|
df.columns = df.columns.map(lambda x:x.strip())
|
|
|
|
|
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True)
|
|
|
|
|
df.dropna(subset=['city_name','county_name'],inplace=True)
|
|
|
|
|
print(df.info())
|
|
|
|
|
print(df.columns)
|
|
|
|
|
|
|
|
|
|
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
|
|
|
|
|
yc_org_list = []
|
|
|
|
|
list_fl = []
|
|
|
|
|
list_org = []
|
|
|
|
|
for city in df['市'].drop_duplicates():
|
|
|
|
|
df_ct = df[df['市']==city]
|
|
|
|
|
# wb = Workbook()
|
|
|
|
|
# wb.save(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx')
|
|
|
|
|
for org in df_ct['org_name'].drop_duplicates():
|
|
|
|
|
list1 = []
|
|
|
|
|
for city in df['city_name'].drop_duplicates():
|
|
|
|
|
wb = Workbook()
|
|
|
|
|
wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx')
|
|
|
|
|
|
|
|
|
|
for org in df['county_name'].drop_duplicates():
|
|
|
|
|
if org.strip()[-4:] != '供电公司':
|
|
|
|
|
continue
|
|
|
|
|
df_org = df_ct[df_ct['org_name']==org]
|
|
|
|
|
df_org['1-10kv'] /= 10000
|
|
|
|
|
df_org['35kv'] /= 10000
|
|
|
|
|
df_org['0.4kv及以下'] /= 10000
|
|
|
|
|
s1 = df_org[['日期','0.4kv及以下']]
|
|
|
|
|
df_org = df[df['county_name']==org]
|
|
|
|
|
city = df_org['city_name'].iloc[0]
|
|
|
|
|
df_result = pd.DataFrame({})
|
|
|
|
|
for level in df_org.columns[3:]:
|
|
|
|
|
s1 = df_org[['pt_date',level]]
|
|
|
|
|
s1.replace(0,np.NaN,inplace=True)
|
|
|
|
|
s1.dropna(how='any',inplace=True)
|
|
|
|
|
|
|
|
|
|
# plt.plot(range(len(s1)),s1['1-10kv'])
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# 更改列名,更改为Prophet指定的列名ds和y
|
|
|
|
|
dd = s1.rename(columns={'日期':'ds','0.4kv及以下':'y'})
|
|
|
|
|
dd = s1.rename(columns={'pt_date':'ds',level:'y'})
|
|
|
|
|
dd['ds'] = dd['ds'].map(lambda x:x.strip())
|
|
|
|
|
dd['ds'] = pd.to_datetime(dd['ds'])
|
|
|
|
|
dd.drop_duplicates(inplace=True)
|
|
|
|
|
|
|
|
|
|
# 划分数据,划分为训练集和验证集,预测的数据设置为未来4天
|
|
|
|
|
df_train = dd[(dd['ds']>='2023-01-01')&(dd['ds']<='2023-11-30')]
|
|
|
|
|
|
|
|
|
|
# 划分数据,划分为训练集和验证集,预测的数据设置为未来3天
|
|
|
|
|
df_train = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][:-3]
|
|
|
|
|
df_train = df_train.loc[normal(df_train['y']).index]
|
|
|
|
|
if df_train.shape[0] <= 180:
|
|
|
|
|
# df_train = df_train.loc[normal(df_train['y']).index]
|
|
|
|
|
df_train['y'] = df_train['y'].where(normal(df_train['y']),other=np.nan).bfill()
|
|
|
|
|
|
|
|
|
|
if df_train.shape[0] <= 90:
|
|
|
|
|
yc_org_list.append(org)
|
|
|
|
|
continue
|
|
|
|
|
df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:]
|
|
|
|
|
# df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:]
|
|
|
|
|
# 数据的变动会受到季节、周、天的影响,存在一定的规律性,因此我们将这三个参数设置为True
|
|
|
|
|
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
|
|
|
|
# 采用中国的假期模式,其余参数均保持默认
|
|
|
|
@ -57,7 +61,7 @@ for city in df['市'].drop_duplicates():
|
|
|
|
|
|
|
|
|
|
model.fit(df_train)
|
|
|
|
|
# make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳
|
|
|
|
|
future = model.make_future_dataframe(periods=3, freq='D')
|
|
|
|
|
future = model.make_future_dataframe(periods=4, freq='D')
|
|
|
|
|
|
|
|
|
|
# 进行预测,返回预测的结果forecast
|
|
|
|
|
forecast = model.predict(future)
|
|
|
|
@ -65,37 +69,43 @@ for city in df['市'].drop_duplicates():
|
|
|
|
|
# 有:forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
|
|
|
|
|
# 因此:forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
|
|
|
|
|
# 如果有节假日因素,那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
|
|
|
|
|
# print(forecast)
|
|
|
|
|
|
|
|
|
|
# 测试,把ds列,即data_series列设置为索引列
|
|
|
|
|
df_test = df_test.set_index('ds')
|
|
|
|
|
# df_test = df_test.set_index('ds')
|
|
|
|
|
|
|
|
|
|
# 把预测到的数据取出ds列,预测值列yhat,同样把ds列设置为索引列。
|
|
|
|
|
forecast = forecast[['ds','yhat']].set_index('ds')
|
|
|
|
|
forecast = forecast[['ds','yhat']].set_index('ds').sort_index(ascending=True).loc['2023-11']
|
|
|
|
|
|
|
|
|
|
# 将预测列前25天替换为真实值
|
|
|
|
|
forecast.loc['2023-11'][:25] = dd.set_index('ds').loc['2023-11'][:25]
|
|
|
|
|
|
|
|
|
|
if len(forecast) < 334:
|
|
|
|
|
list1.append(org)
|
|
|
|
|
# join:按照索引进行连接,
|
|
|
|
|
# dropna:能够找到DataFrame类型数据的空值(缺失值),将空值所在的行/列删除后,将新的DataFrame作为返回值返回。
|
|
|
|
|
df_all = forecast.join(dd.set_index('ds')).dropna()
|
|
|
|
|
df_all['org_name'] = org
|
|
|
|
|
df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
|
|
|
|
|
df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True)
|
|
|
|
|
df_all = df_all[['org_name','真实值','预测值','偏差率']]
|
|
|
|
|
forecast.columns = [level]
|
|
|
|
|
|
|
|
|
|
df_result = pd.concat([df_result,forecast],axis=1)
|
|
|
|
|
|
|
|
|
|
# df_all = forecast.join(dd.set_index('ds')).dropna()
|
|
|
|
|
# df_all['org_name'] = org
|
|
|
|
|
# df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
|
|
|
|
|
# df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True)
|
|
|
|
|
# df_all = df_all[['org_name','真实值','预测值','偏差率']]
|
|
|
|
|
|
|
|
|
|
list_org.append(org)
|
|
|
|
|
try:
|
|
|
|
|
result = df_all.loc['2023-7']
|
|
|
|
|
result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
|
|
|
|
|
list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
|
|
|
|
|
|
|
|
|
|
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
|
|
|
|
|
# result.to_excel(writer,sheet_name=f'{org}')
|
|
|
|
|
except:
|
|
|
|
|
yc_org_list.append(org)
|
|
|
|
|
|
|
|
|
|
# result = df_all.loc['2023-7']
|
|
|
|
|
# result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
|
|
|
|
|
# list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
|
|
|
|
|
|
|
|
|
|
with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
|
|
|
|
|
df_result.to_excel(writer,sheet_name=f'{org}')
|
|
|
|
|
|
|
|
|
|
print(yc_org_list)
|
|
|
|
|
df = pd.DataFrame({'org':list_org,'goal':list_fl})
|
|
|
|
|
print(df)
|
|
|
|
|
print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False))
|
|
|
|
|
# print(yc_org_list)
|
|
|
|
|
|
|
|
|
|
# # 创建一个ExcelWriter对象
|
|
|
|
|
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer:
|
|
|
|
|
# # 将不同的子文件写入同一个Excel文件的不同工作表
|
|
|
|
@ -107,4 +117,3 @@ print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|