Merge remote-tracking branch 'origin/main'

main
get 1 year ago
commit f6878b8505

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="pytorch_gpu" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="C:\anaconda\envs\pytorch" project-jdk-type="Python SDK" />
</project>

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="pytorch_gpu" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="C:\anaconda\envs\pytorch" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

@ -1,120 +0,0 @@
import xgboost as xgb
import pandas as pd
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def hf_season(x):
list1= []
for i in range(1,13):
if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']:
list1.append(i)
return list1
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
def month(x):
if str(x)[5:7] in ('08','09','10','12','01','02'):
return 1
else:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# for i in range(1,13):
# plt.plot(range(len(data['售电量'][f'2022-{i}'])),data['售电量'][f'2022-{i}'])
# plt.show()
print(data['售电量']['2022-9'])
plt.plot(range(len(data['售电量']['2022-7'])),data['售电量']['2022-7'])
plt.plot(range(len(data['售电量']['2022-7']),len(data['售电量']['2022-7'])+len(data['售电量']['2023-7'])),data['售电量']['2023-7'])
# plt.plot(range(len(data['售电量'][['2022-9','2023-9']])),data['售电量'][['2022-9','2023-9']])
plt.show()
# print(hf_season(data.loc['2021']['售电量']))
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
print(data.head(50))
df_eval = data.loc['2023-7']
df_train = data.loc['2021-1':'2023-6']
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
print(data.drop(columns='city_name').corr(method='pearson')['售电量'])
df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
print(y.describe())
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('hangzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('hangzhou.bin')
# model.predict(X_eval)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,32 +0,0 @@
dtdate,eval,pred
2023-08-01,4781.72,4638.4565
2023-08-02,5264.18,4224.5635
2023-08-03,5308.39,5036.8037
2023-08-04,5531.27,5441.4995
2023-08-05,5989.36,6265.5225
2023-08-06,6373.8,5753.8174
2023-08-07,5688.18,4972.277
2023-08-08,5287.83,4424.5815
2023-08-09,5560.11,4310.7837
2023-08-10,5706.55,4657.085
2023-08-11,5923.97,5702.3916
2023-08-12,6238.88,5897.044
2023-08-13,5961.14,4939.6694
2023-08-14,5316.45,3566.0615
2023-08-15,4802.99,3005.286
2023-08-16,4908.05,3805.3303
2023-08-17,4792.48,3044.6094
2023-08-18,4380.25,3086.1318
2023-08-19,4490.53,4237.0283
2023-08-20,4577.54,3911.61
2023-08-21,4784.33,4044.5312
2023-08-22,4517.86,3943.1465
2023-08-23,4327.74,4588.3257
2023-08-24,4736.04,4383.0825
2023-08-25,4981.34,4765.4146
2023-08-26,4967.04,4744.9272
2023-08-27,5044.84,4771.1987
2023-08-28,4919.99,4644.142
2023-08-29,3611.24,3359.0356
2023-08-30,3184.04,3217.3503
2023-08-31,3026.0,3217.8718
1 dtdate eval pred
2 2023-08-01 4781.72 4638.4565
3 2023-08-02 5264.18 4224.5635
4 2023-08-03 5308.39 5036.8037
5 2023-08-04 5531.27 5441.4995
6 2023-08-05 5989.36 6265.5225
7 2023-08-06 6373.8 5753.8174
8 2023-08-07 5688.18 4972.277
9 2023-08-08 5287.83 4424.5815
10 2023-08-09 5560.11 4310.7837
11 2023-08-10 5706.55 4657.085
12 2023-08-11 5923.97 5702.3916
13 2023-08-12 6238.88 5897.044
14 2023-08-13 5961.14 4939.6694
15 2023-08-14 5316.45 3566.0615
16 2023-08-15 4802.99 3005.286
17 2023-08-16 4908.05 3805.3303
18 2023-08-17 4792.48 3044.6094
19 2023-08-18 4380.25 3086.1318
20 2023-08-19 4490.53 4237.0283
21 2023-08-20 4577.54 3911.61
22 2023-08-21 4784.33 4044.5312
23 2023-08-22 4517.86 3943.1465
24 2023-08-23 4327.74 4588.3257
25 2023-08-24 4736.04 4383.0825
26 2023-08-25 4981.34 4765.4146
27 2023-08-26 4967.04 4744.9272
28 2023-08-27 5044.84 4771.1987
29 2023-08-28 4919.99 4644.142
30 2023-08-29 3611.24 3359.0356
31 2023-08-30 3184.04 3217.3503
32 2023-08-31 3026.0 3217.8718

Binary file not shown.

Binary file not shown.

@ -12,10 +12,6 @@ mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ['07', '08']:
return 2

@ -5,38 +5,55 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('01', '10', '11'):
return 0
elif str(x)[5:7] in ('02', '03', '04', '05', '06', '09', '12'):
return 1
else:
return 0
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate')
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/丽水.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# print(data.loc[month_index]['售电量'].max(),data['售电量'].describe()['75%'])
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head())
df_eval = data.loc['2023-9']
df_train = data.loc['2021-1':'2023-8']
# df_train = df[500:850]
# df_train = data.loc['2021-1':'2023-8']
df_train = data[500:850]
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
@ -46,7 +63,8 @@ y = df_train['售电量']
# best_i = {}
# for i in range(200):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -58,12 +76,12 @@ print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
# print(result_eval)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
print((result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum())
print(result_eval)
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print((result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum())
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
@ -73,7 +91,7 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'丽水月末3天偏差率{goal},9号-月底偏差率:{goal2}')
# # 保存模型
# model.save_model('lishui.bin')
model.save_model('lishui.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('lishui.bin')

@ -5,39 +5,47 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\台州数据(1).xlsx',index_col='dtdate')
def season(x):
if str(x)[5:7] in ('01', '02', '10', '11'):
return 0
elif str(x)[5:7] in ('03', '04', '05', '06', '09', '12'):
return 1
else:
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
list2 = []
list0 = []
list1 = []
for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
list2.append(i)
elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
list0.append(i)
else:
list1.append(i)
print(list0,list1,list2)
data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head())
df_eval = data.loc['2023-8']
# df_train = data.loc['2021-1':'2023-7']
df_train = data[500:850]
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_train = data[500:850]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -60,22 +68,17 @@ eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
# print(result_eval)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
print((result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum())
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print((result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum())
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
# print(best_i,best_goal)
#
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'台州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('taizhou.bin')
# 保存模型
model.save_model('taizhou.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('taizhou.bin')

@ -5,41 +5,50 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('04', '10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
pd.set_option('display.width',None)
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/嘉兴.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\嘉兴数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df['season'] = df.index.map(season)
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = df[df.index.str[:7]=='2023-08']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
df_train = df[500:850]
print(len(df_eval),len(df_train),len(df))
df_eval = data.loc['2023-08']
df_train = data.iloc[500:850]
# df_train = data.loc['2021-01':'2023-07']
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
@ -50,7 +59,7 @@ y = df_train['售电量']
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=272)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -61,14 +70,11 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
# print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal,goal2)
print(result_eval)
# print(goal2)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
@ -77,13 +83,8 @@ print(goal,goal2)
#
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv')
# with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'嘉兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('jiaxing.bin')
model.save_model('jiaxing.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('jiaxing.bin')
import numpy as np

@ -5,42 +5,47 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\宁波数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df_eval = df.loc['2023-09']
df_train = df.loc['2021-01':'2023-08']
# df_train = df[400:850]
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
def season(x):
if str(x)[5:7] in ('01', '04', '10'):
return 0
elif str(x)[5:7] in ('02', '03', '05', '06', '09', '11', '12'):
return 1
else:
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/宁波.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-09']
df_train = data.loc['2021-01':'2023-08']
# df_train = data[400:850]
print(len(df_eval),len(df_train))
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -53,8 +58,6 @@ y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=18)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
@ -76,12 +79,8 @@ print(goal2)
#
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'宁波月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('ningbo.bin')
model.save_model('ningbo.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('ningbo.bin')

@ -7,23 +7,15 @@ from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def hf_season(x):
list1= []
for i in range(1,13):
if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']:
list1.append(i)
return list1
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('04', '10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'):
return 1
else:
return 0
return 2
def month(x):
if str(x)[5:7] in ('08','09','10','12','01','02'):
return 1
@ -34,47 +26,45 @@ def normal(nd):
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/杭州.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022'])
# plt.show()
# print(hf_season(data.loc['2021']['售电量']))
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
# data['month'] = data.index.strftime('%Y-%m-%d').str[6]
# data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
print(data.tail(50))
df_eval = data.loc['2022-9':'2023-9']
df_train = data.loc['2021-1':'2022-8']
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
print(data.drop(columns='city_name').corr(method='pearson')['售电量'])
df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']]
# df_train = df[500:850]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_train = data.loc['2021-01':'2023-08']
df_eval = data.loc['2023-9']
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
print(y.describe())
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=142)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -83,20 +73,17 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
# eval_pred = model.predict(X_eval)
#
# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
#
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
#
# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
# print('goal:',goal)
#
# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
#
# print('goal2:',goal2)
# print(result_eval)
# print('r2:',r2_score(y_test,y_pred))
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
#
# # result_eval.to_csv('asda.csv',encoding='gbk')
# # if abs(goal) < best_goal:
@ -111,17 +98,17 @@ print(abs(y_test - y_pred).mean() / y_test.mean())
# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# # f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('hangzhou.bin')
model.save_model('hangzhou.bin')
# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
# df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
# df_eval.columns = df_eval.columns.map(lambda x:x.strip())
# df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
# df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
# df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
print(df_hangzhou)
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('hangzhou.bin')

@ -5,41 +5,50 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\温州数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
def season(x):
if str(x)[5:7] in ('01', '02', '10'):
return 0
elif str(x)[5:7] in ('03', '04', '05', '06', '11', '12'):
return 1
else:
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/温州.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = df.loc['2023-9']
df_eval = data.loc['2023-9']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
df_train = df[400:850]
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
df_train = data[400:850]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
@ -54,7 +63,6 @@ x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
@ -69,18 +77,14 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'温州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('wenzhou.bin')
model.save_model('wenzhou.bin')
loaded_model = xgb.XGBRegressor()

@ -5,42 +5,55 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
import random
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '04', '05', '06', '09', '11', '12'):
return 1
else:
return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/湖州.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# print(list0,list1,list2)
data['season'] = data.index.map(season)
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_eval = data.loc['2023-9']
df_train = data.loc['2021-1':'2023-8']
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -48,7 +61,6 @@ model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
@ -57,19 +69,11 @@ result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'湖州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('huzhou.bin')
model.save_model('huzhou.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('huzhou.bin')
import numpy as np

@ -5,48 +5,57 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ('01', '02', '10', '11'):
return 0
elif str(x)[5:7] in ('03', '04', '05', '06', '09', '12'):
return 1
else:
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/绍兴.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\绍兴数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index ,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df_eval = df.loc['2023-9']
df_train = df.loc['2021-1':'2023-8']
df_eval = data.loc['2023-9']
df_train = data.loc['2021-1':'2023-8']
# df_train = df[400:850]
print(len(df_eval),len(df_train),len(df))
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(400):
@ -74,16 +83,8 @@ print(goal2)
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'绍兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('shaoxing.bin')
loaded_model = xgb.XGBRegressor()

@ -5,49 +5,59 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
import random
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('01', '02', '03', '04', '05', '06', '09', '10', '11', '12'):
return 1
else:
return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\舟山数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/舟山.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
# df_train = data.loc['2021-1':'2023-8']
df_train = data.iloc[500:850]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# result_test.to_csv(r'C:\Users\鸽子\Desktop\test.csv',encoding='utf-8')
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
@ -61,9 +71,7 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'舟山月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
model.save_model('zhoushan.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('zhoushan.bin')

@ -5,46 +5,47 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
if str(x)[5:7] in ('01', '02'):
return 0
elif str(x)[5:7] in ('03', '04', '05', '06', '09', '10', '11', '12'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\衢州数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# df_eval = df[(df.index.str[:10]=='2023-08-29')|(df.index.str[:10]=='2023-08-30')|(df.index.str[:10]=='2023-08-31')]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# df_train = df[(df.index.str[:7]!='2023-09')&(df.index.str!='2023-08-29')&(df.index.str!='2023-08-30')&(df.index.str!='2023-08-31')]
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = df[df.index.str[:7]=='2023-07']
df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
# df_train = df[450:900]
# max_8,min_8 = df_eval['售电量'].max(),df_eval['售电量'].min()
print(len(df_eval),len(df_train),len(df))
df_eval = data.loc['2023-08']
df_train = data.iloc[450:900]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
high = df['售电量'].describe()['75%'] + 1.5*IQR
low = df['售电量'].describe()['25%'] - 1.5*IQR
print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -66,17 +67,8 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum())
# #
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'衢州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
#
# 保存模型
# model.save_model('quzhou.bin')
model.save_model('quzhou.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('quzhou.bin')
import numpy as np
@ -89,22 +81,6 @@ X_eval = np.array([[24.0,15.6,23,0,0],
print(model.predict(X_eval))
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_squared_error
# rf = RandomForestRegressor(n_estimators=150,max_depth=6)
#
# # 在训练集上训练模型
# rf.fit(x_train, y_train)
#
# # 在测试集上进行预测
# y_pred = rf.predict(x_test)
# eval_pred = rf.predict(X_eval)
# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
# print(result_eval)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum())
# import torch
# from torch import nn

@ -5,43 +5,48 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\金华数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df_eval = df[df.index.str[:7]=='2023-09']
df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
def season(x):
if str(x)[5:7] in ('01', '02', '04', '10'):
return 0
elif str(x)[5:7] in ('03', '05', '06', '09', '11', '12'):
return 1
else:
return 2
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/金华.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
#
# print(list0,list1,list2)
data['season'] = data.index.map(season)
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
df_eval = data.loc['2023-09']
# df_train = data.loc['2021-01':'2023-08']
df_train = data.iloc[450:900]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -51,11 +56,10 @@ y = df_train['售电量']
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=142)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
@ -75,13 +79,8 @@ print(goal2)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'金华月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('jinhua.bin')
model.save_model('jinhua.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('jinhua.bin')
import numpy as np

@ -1,7 +1,18 @@
import numpy as np
import pandas as pd
n1 = np.array([[1,1,1]])
n2 = np.array([1,1,1]).reshape(1,-1)
print(n2)
n2 = np.array([]).reshape(3,-1)
print(np.max([[1,2,3],[4,5,6]]))
print(np.max([[1,2,3],[4,5,6]]))
file_dir = r'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据'
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江省各地市日电量数据21-23年 .csv',encoding='gbk')
df.columns = df.columns.map(lambda x:x.strip())
for city in df['地市'].drop_duplicates():
df_city = df[df['地市']== city]
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y/%m/%d')
df_city = df_city[df_city.columns[:-1]]
df_city['stat_date'] = df_city['stat_date'].astype('str')
df_city.to_excel(fr'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False)

@ -17,8 +17,8 @@ class LSTM_Regression(nn.Module):
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, _x):
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 一批多少条样本 多少批样本 每一个样本的输入特征大小10
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 经过lstm计算后输出为隐藏层大小
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size)
x = x.view(s * b, h)
x = self.fc(x)
x = x.view(s, b, -1) # 把形状改回来
@ -44,141 +44,117 @@ def data_preprocessing(data):
data.sort_index(inplace=True)
data = data.loc['2021-01':'2023-08']
data.drop(columns=[i for i in data.columns if (data[i] == 0).sum() / len(data) >= 0.5], inplace=True) # 去除0值列
data = data[data.values != 0]
data = data.astype(float)
for col in data.columns:
data[col] = normal(data[col])
return data
# 拼接数据集
file_dir = r'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据'
excel = os.listdir(file_dir)[0]
data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
data.drop(columns='地市',inplace=True)
data = data_preprocessing(data)
if __name__ == '__main__':
# 拼接数据集
file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据'
excel = os.listdir(file_dir)[0]
df = data[data.columns[0]]
df.dropna(inplace = True)
dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ')
for level in data.columns[1:]:
df = data[level]
df.dropna(inplace=True)
x, y = create_dataset(df, DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x, x))
dataset_y = np.concatenate((dataset_y, y))
data = data_preprocessing(data)
df = data[data.columns[0]]
df.dropna(inplace = True)
dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
for excel in os.listdir(file_dir)[1:]:
for level in data.columns[1:]:
data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col='stat_date')
data.drop(columns='地市', inplace=True)
data = data_preprocessing(data)
for level in data.columns:
df = data[level]
df.dropna(inplace=True)
x, y = create_dataset(df, DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x, x))
dataset_y = np.concatenate((dataset_y, y))
for excel in os.listdir(file_dir)[1:]:
data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ')
data = data_preprocessing(data)
for level in data.columns:
df = data[level]
df.dropna(inplace=True)
x,y = create_dataset(df,DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x,x))
dataset_y = np.concatenate((dataset_y,y))
print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
# 训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 标准化到0~1
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
# 划分训练集和测试集
train_size = int(len(dataset_x)*0.7)
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
# 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
train_y = train_y.reshape(-1, 1, 5)
# 转为pytorch的tensor对象
train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
# for i in range(1500):
# out = model(train_x)
# loss = loss_function(out, train_y)
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()
# train_loss.append(loss.item())
# # print(loss)
# # 保存模型
# torch.save(model.state_dict(),'dy5.pth')
model.load_state_dict(torch.load('dy5.pth'))
# for test
model = model.eval() # 转换成测试模式
# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32)
pred_test = model(dataset_x) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1)
pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
# plt.plot(pred_test.reshape(-1), 'r', label='prediction')
# plt.plot(dataset_y.reshape(-1), 'b', label='real')
# plt.plot((train_size*5, train_size*5), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
# plt.legend(loc='best')
# plt.show()
x,y = create_dataset(df,DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x,x))
dataset_y = np.concatenate((dataset_y,y))
print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
# 训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 标准化到0~1
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
# 划分训练集和测试集
train_size = int(len(dataset_x)*0.7)
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
# 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
train_y = train_y.reshape(-1, 1, 5)
# 转为pytorch的tensor对象
train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
for i in range(1500):
out = model(train_x)
loss = loss_function(out, train_y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
print(loss)
# 保存模型
torch.save(model.state_dict(),'dy5.pth')
# for test
model = model.eval() # 转换成测试模式
# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
dataset_x = torch.from_numpy(dataset_x).to(device)
pred_test = model(dataset_x) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1)
pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
plt.plot(pred_test, 'r', label='prediction')
plt.plot(df, 'b', label='real')
plt.plot((train_size, train_size), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
plt.legend(loc='best')
plt.show()
# 创建测试集
# result_list = []
# 以x为基础实际数据滚动预测未来3天
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval.index = pd.to_datetime(df_eval.index)
x,y = create_dataset(df_eval.loc['2023-7']['10kv以下'],10)
x = (x - min_value) / (max_value - min_value)
x = x.reshape(-1,1,10)
x = torch.from_numpy(x).type(torch.float32).to(device)
pred = model(x)
# x = torch.from_numpy(df[-14:-4]).to(device)
# pred = model(x.reshape(-1,1,DAYS_FOR_TRAIN)).view(-1).detach().numpy()
# 反归一化
pred = pred * (max_value - min_value) + min_value
# pred = pred * (max_value - min_value) + min_value
# df = df * (max_value - min_value) + min_value
print(pred,y)
df = pd.DataFrame({'real':y.reshape(-1),'pred':pred.view(-1).cpu().detach().numpy()})
df.to_csv('7月预测.csv',encoding='gbk')
# 打印指标
# print(pred)
# # 打印指标
# print(abs(pred - df[-3:]).mean() / df[-3:].mean())
# result_eight = pd.DataFrame({'pred': np.round(pred,1),'real': df[-3:]})
# target = (result_eight['pred'].sum() - result_eight['real'].sum()) / df[-31:].sum()

Loading…
Cancel
Save