输出预测结果

main
鸽子 10 months ago
parent a810f88dad
commit 709ee960a5

Binary file not shown.

@ -87,8 +87,8 @@ import numpy as np
X_eval = np.array([ X_eval = np.array([
[21,10,10,0,0], [21,10,10,0,0],
[21, 11, 10, 0, 0], [21, 11, 10, 0, 0],
[18, 7, 10, 0, 0], [20, 8, 10, 0, 0],
[17, 9, 10, 0, 0], [20, 8, 10, 0, 0],
[17, 10, 10, 0, 0] [17, 10, 10, 0, 0]
]) ])
print(model.predict(X_eval)) print(model.predict(X_eval))

@ -85,7 +85,7 @@ model.save_model('taizhou.bin')
import numpy as np import numpy as np
X_eval = np.array([ X_eval = np.array([
[19,11,10,0,0], [19,11,10,1,0],
[21, 7, 10, 0, 0], [21, 7, 10, 0, 0],
[19, 5, 10, 0, 0], [19, 5, 10, 0, 0],
[17, 8, 10, 0, 0], [17, 8, 10, 0, 0],

@ -45,7 +45,7 @@ data = data.loc[normal(data['售电量']).index]
# print(list0,list1,list2) # print(list0,list1,list2)
data['season'] = data.index.map(season) data['season'] = data.index.map(season)
df_eval = data.loc['2023-10'] df_eval = data.loc['2023-11']
# data = data.loc[:'2023-8'] # data = data.loc[:'2023-8']
df_train = data[450:-1] df_train = data[450:-1]
# df_train = data[450:][:-3] # df_train = data[450:][:-3]
@ -85,16 +85,16 @@ print(goal2)
# x = goal2 # x = goal2
# print(best_i,best_goal,x) # print(best_i,best_goal,x)
print(result_eval) print(result_eval)
# 保存模型 # # 保存模型
model.save_model('shaoxing.bin') # model.save_model('shaoxing.bin')
loaded_model = xgb.XGBRegressor() # loaded_model = xgb.XGBRegressor()
loaded_model.load_model('shaoxing.bin') # loaded_model.load_model('shaoxing.bin')
import numpy as np import numpy as np
X_eval = np.array([ X_eval = np.array([
[17, 9, 10, 0, 0], [16.2, 8.2, 10, 1, 0],
[20, 6, 10, 0, 0], [20, 6, 10, 0, 0],
[17, 5, 10, 0, 0], [19, 7, 10, 0, 0],
[16, 8, 10, 0, 0], [16, 8, 10, 0, 0],
[12, 7, 10, 0, 0] [12, 7, 10, 0, 0]
]) ])

@ -60,10 +60,31 @@
# df = pd.concat(list1,ignore_index=True) # df = pd.concat(list1,ignore_index=True)
# df.to_csv('各市行业电量预测结果.csv',encoding='gbk') # df.to_csv('各市行业电量预测结果.csv',encoding='gbk')
# print(df) # print(df)
import os
from openpyxl import Workbook
import pandas as pd import pandas as pd
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1) # df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1)
print(df.head()) # print(df.head())
print(df[df.columns[2:]].groupby(df['city_name']).sum().T) # print(df[df.columns[2:]].groupby(df['city_name']).sum().T)
df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T # df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T
df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') # df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
for file in os.listdir(file_dir):
city = file[:-5]
wb = Workbook()
wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx')
for file in os.listdir(file_dir):
city = file[:-5]
excel_file = pd.ExcelFile(os.path.join(file_dir,file))
sheet_names = excel_file.sheet_names[1:]
for sheet in sheet_names:
df = excel_file.parse(sheet)
df_result = df[df.columns[1:]].sum()
df_result = pd.DataFrame(df_result)
df_result.columns = ['售电量']
with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl',
if_sheet_exists='replace') as writer:
df_result.to_excel(writer, sheet_name=f'{sheet}')

@ -9,47 +9,51 @@ pd.set_option('display.width',None)
def normal(x): def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%']) high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%']) low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)] return (x<=high)&(x>=low)
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv') df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
df.columns = df.columns.map(lambda x:x.strip()) df.columns = df.columns.map(lambda x:x.strip())
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True) df.dropna(subset=['city_name','county_name'],inplace=True)
print(df.info())
print(df.columns) print(df.columns)
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns]))) print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
yc_org_list = [] yc_org_list = []
list_fl = [] list_fl = []
list_org = [] list_org = []
for city in df[''].drop_duplicates(): list1 = []
df_ct = df[df['']==city] for city in df['city_name'].drop_duplicates():
# wb = Workbook() wb = Workbook()
# wb.save(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx') wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx')
for org in df_ct['org_name'].drop_duplicates():
if org.strip()[-4:] != '供电公司': for org in df['county_name'].drop_duplicates():
continue if org.strip()[-4:] != '供电公司':
df_org = df_ct[df_ct['org_name']==org] continue
df_org['1-10kv'] /= 10000 df_org = df[df['county_name']==org]
df_org['35kv'] /= 10000 city = df_org['city_name'].iloc[0]
df_org['0.4kv及以下'] /= 10000 df_result = pd.DataFrame({})
s1 = df_org[['日期','0.4kv及以下']] for level in df_org.columns[3:]:
s1 = df_org[['pt_date',level]]
s1.replace(0,np.NaN,inplace=True) s1.replace(0,np.NaN,inplace=True)
s1.dropna(how='any',inplace=True) s1.dropna(how='any',inplace=True)
# plt.plot(range(len(s1)),s1['1-10kv'])
# plt.show()
# 更改列名更改为Prophet指定的列名ds和y # 更改列名更改为Prophet指定的列名ds和y
dd = s1.rename(columns={'日期':'ds','0.4kv及以下':'y'}) dd = s1.rename(columns={'pt_date':'ds',level:'y'})
dd['ds'] = dd['ds'].map(lambda x:x.strip())
dd['ds'] = pd.to_datetime(dd['ds']) dd['ds'] = pd.to_datetime(dd['ds'])
dd.drop_duplicates(inplace=True)
# 划分数据划分为训练集和验证集预测的数据设置为未来4天
df_train = dd[(dd['ds']>='2023-01-01')&(dd['ds']<='2023-11-30')]
# 划分数据划分为训练集和验证集预测的数据设置为未来3天 # df_train = df_train.loc[normal(df_train['y']).index]
df_train = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][:-3] df_train['y'] = df_train['y'].where(normal(df_train['y']),other=np.nan).bfill()
df_train = df_train.loc[normal(df_train['y']).index]
if df_train.shape[0] <= 180: if df_train.shape[0] <= 90:
yc_org_list.append(org) yc_org_list.append(org)
continue continue
df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:] # df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:]
# 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True # 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
# 采用中国的假期模式,其余参数均保持默认 # 采用中国的假期模式,其余参数均保持默认
@ -57,7 +61,7 @@ for city in df['市'].drop_duplicates():
model.fit(df_train) model.fit(df_train)
# make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳 # make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳
future = model.make_future_dataframe(periods=3, freq='D') future = model.make_future_dataframe(periods=4, freq='D')
# 进行预测返回预测的结果forecast # 进行预测返回预测的结果forecast
forecast = model.predict(future) forecast = model.predict(future)
@ -65,37 +69,43 @@ for city in df['市'].drop_duplicates():
# 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。 # 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
# 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。 # 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
# 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。 # 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
# print(forecast)
# 测试把ds列即data_series列设置为索引列 # 测试把ds列即data_series列设置为索引列
df_test = df_test.set_index('ds') # df_test = df_test.set_index('ds')
# 把预测到的数据取出ds列预测值列yhat同样把ds列设置为索引列。 # 把预测到的数据取出ds列预测值列yhat同样把ds列设置为索引列。
forecast = forecast[['ds','yhat']].set_index('ds') forecast = forecast[['ds','yhat']].set_index('ds').sort_index(ascending=True).loc['2023-11']
# 将预测列前25天替换为真实值
forecast.loc['2023-11'][:25] = dd.set_index('ds').loc['2023-11'][:25]
if len(forecast) < 334:
list1.append(org)
# join:按照索引进行连接, # join:按照索引进行连接,
# dropna能够找到DataFrame类型数据的空值缺失值将空值所在的行/列删除后将新的DataFrame作为返回值返回。 forecast.columns = [level]
df_all = forecast.join(dd.set_index('ds')).dropna()
df_all['org_name'] = org df_result = pd.concat([df_result,forecast],axis=1)
df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True) # df_all = forecast.join(dd.set_index('ds')).dropna()
df_all = df_all[['org_name','真实值','预测值','偏差率']] # df_all['org_name'] = org
# df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
# df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True)
# df_all = df_all[['org_name','真实值','预测值','偏差率']]
list_org.append(org) list_org.append(org)
try:
result = df_all.loc['2023-7']
result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
# result.to_excel(writer,sheet_name=f'{org}')
except:
yc_org_list.append(org)
# result = df_all.loc['2023-7']
# result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
# list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
df_result.to_excel(writer,sheet_name=f'{org}')
print(yc_org_list)
df = pd.DataFrame({'org':list_org,'goal':list_fl}) df = pd.DataFrame({'org':list_org,'goal':list_fl})
print(df) print(df)
print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False)) print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False))
# print(yc_org_list)
# # 创建一个ExcelWriter对象 # # 创建一个ExcelWriter对象
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer: # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer:
# # 将不同的子文件写入同一个Excel文件的不同工作表 # # 将不同的子文件写入同一个Excel文件的不同工作表
@ -107,4 +117,3 @@ print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05
# plt.show() # plt.show()

Loading…
Cancel
Save