diff --git a/各地级市日电量模型/hangzhou.bin b/各地级市日电量模型/hangzhou.bin index 3df6ddc..03fd8df 100644 Binary files a/各地级市日电量模型/hangzhou.bin and b/各地级市日电量模型/hangzhou.bin differ diff --git a/各地级市日电量模型/huzhou.bin b/各地级市日电量模型/huzhou.bin index a60096c..a4b9fc9 100644 Binary files a/各地级市日电量模型/huzhou.bin and b/各地级市日电量模型/huzhou.bin differ diff --git a/各地级市日电量模型/jiaxing.bin b/各地级市日电量模型/jiaxing.bin index 3375d37..f64d1b2 100644 Binary files a/各地级市日电量模型/jiaxing.bin and b/各地级市日电量模型/jiaxing.bin differ diff --git a/各地级市日电量模型/jinhua.bin b/各地级市日电量模型/jinhua.bin index e35d168..73c5047 100644 Binary files a/各地级市日电量模型/jinhua.bin and b/各地级市日电量模型/jinhua.bin differ diff --git a/各地级市日电量模型/lishui.bin b/各地级市日电量模型/lishui.bin index 6c4be98..14f5aa8 100644 Binary files a/各地级市日电量模型/lishui.bin and b/各地级市日电量模型/lishui.bin differ diff --git a/各地级市日电量模型/ningbo.bin b/各地级市日电量模型/ningbo.bin index fdf04d4..b5e94ed 100644 Binary files a/各地级市日电量模型/ningbo.bin and b/各地级市日电量模型/ningbo.bin differ diff --git a/各地级市日电量模型/quzhou.bin b/各地级市日电量模型/quzhou.bin index 5a4d40d..3cde125 100644 Binary files a/各地级市日电量模型/quzhou.bin and b/各地级市日电量模型/quzhou.bin differ diff --git a/各地级市日电量模型/shaoxing.bin b/各地级市日电量模型/shaoxing.bin index 9220a7b..8b0d020 100644 Binary files a/各地级市日电量模型/shaoxing.bin and b/各地级市日电量模型/shaoxing.bin differ diff --git a/各地级市日电量模型/taizhou.bin b/各地级市日电量模型/taizhou.bin index 887f183..e6e74ad 100644 Binary files a/各地级市日电量模型/taizhou.bin and b/各地级市日电量模型/taizhou.bin differ diff --git a/各地级市日电量模型/wenzhou.bin b/各地级市日电量模型/wenzhou.bin index a112774..3f75ee1 100644 Binary files a/各地级市日电量模型/wenzhou.bin and b/各地级市日电量模型/wenzhou.bin differ diff --git a/各地级市日电量模型/zhoushan.bin b/各地级市日电量模型/zhoushan.bin index 549ebf4..2f61b61 100644 Binary files a/各地级市日电量模型/zhoushan.bin and b/各地级市日电量模型/zhoushan.bin differ diff --git a/各地级市日电量模型/丽水.py b/各地级市日电量模型/丽水.py index e42e244..fcbc0a2 100644 --- a/各地级市日电量模型/丽水.py +++ b/各地级市日电量模型/丽水.py @@ -6,48 +6,56 @@ from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt mpl.rcParams['font.sans-serif']=['kaiti'] +pd.set_option('display.width',None) def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('01', '10', '11'): + return 0 + elif str(x)[5:7] in ('02', '03', '04', '05', '06', '09', '12'): return 1 else: - return 0 + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + -pd.set_option('display.width',None) parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) data = pd.read_excel(os.path.join(parent_dir,'入模数据/丽水.xlsx'),index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') -data['season'] = data.index.map(season) - -print(data.head()) - -df_eval = data.loc['2023-9'] -df_train = data.loc['2021-1':'2023-8'] -# df_train = df[500:850] -print(len(df_eval),len(df_train),len(data)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +data = data.loc[normal(data['售电量']).index] # list2 = [] # list0 = [] # list1 = [] # for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): -# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' -# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']: +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# print(data.loc[month_index]['售电量'].max(),data['售电量'].describe()['75%']) +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: # list2.append(i) -# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']: +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: # list0.append(i) # else: # list1.append(i) # print(list0,list1,list2) + +data['season'] = data.index.map(season) + +print(data.head()) + +df_eval = data.loc['2023-9'] +# df_train = data.loc['2021-1':'2023-8'] +df_train = data[500:850] +print(len(df_eval),len(df_train),len(data)) +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] + + + + X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] @@ -56,7 +64,7 @@ y = df_train['售电量'] # for i in range(200): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -68,12 +76,12 @@ print(abs(y_test - y_pred).mean() / y_test.mean()) eval_pred = model.predict(X_eval) result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) -# print(result_eval) -# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) -print((result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()) +print(result_eval) + goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -print((result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()) +print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() +print(goal2) # if abs(goal) < best_goal: # best_goal = abs(goal) # best_i['best_i'] = i @@ -83,7 +91,7 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: # f.write(f'丽水月末3天偏差率:{goal},9号-月底偏差率:{goal2}') # # 保存模型 -# model.save_model('lishui.bin') +model.save_model('lishui.bin') import numpy as np loaded_model = xgb.XGBRegressor() loaded_model.load_model('lishui.bin') diff --git a/各地级市日电量模型/台州.py b/各地级市日电量模型/台州.py index 16a208a..86d9e2d 100644 --- a/各地级市日电量模型/台州.py +++ b/各地级市日电量模型/台州.py @@ -5,39 +5,47 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt - -def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): - return 1 - else: - return 0 mpl.rcParams['font.sans-serif']=['kaiti'] pd.set_option('display.width',None) -data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\台州数据(1).xlsx',index_col='dtdate') +def season(x): + if str(x)[5:7] in ('01', '02', '10', '11'): + return 0 + elif str(x)[5:7] in ('03', '04', '05', '06', '09', '12'): + return 1 + else: + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx'),index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +list2 = [] +list0 = [] +list1 = [] +for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): + month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' + if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: + list2.append(i) + elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: + list0.append(i) + else: + list1.append(i) +print(list0,list1,list2) + data['season'] = data.index.map(season) -# plt.plot(range(len(data)),data['售电量']) -# plt.show() -print(data.head()) df_eval = data.loc['2023-8'] -# df_train = data.loc['2021-1':'2023-7'] -df_train = data[500:850] -print(len(df_eval),len(df_train),len(data)) - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - - -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_train = data[500:850] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] @@ -60,22 +68,17 @@ eval_pred = model.predict(X_eval) result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) # print(result_eval) # print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) -print((result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()) goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -print((result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()) +print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() +print(goal2) # if abs(goal) < best_goal: # best_goal = abs(goal) # best_i['best_i'] = i # print(best_i,best_goal) - -# -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'台州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') -# # 保存模型 -# model.save_model('taizhou.bin') +# 保存模型 +model.save_model('taizhou.bin') import numpy as np loaded_model = xgb.XGBRegressor() loaded_model.load_model('taizhou.bin') diff --git a/各地级市日电量模型/嘉兴.py b/各地级市日电量模型/嘉兴.py index 1be7f4c..2bf21bf 100644 --- a/各地级市日电量模型/嘉兴.py +++ b/各地级市日电量模型/嘉兴.py @@ -5,41 +5,50 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +mpl.rcParams['font.sans-serif']=['kaiti'] +pd.set_option('display.width',None) + def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('04', '10'): + return 0 + elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'): return 1 else: - return 0 -mpl.rcParams['font.sans-serif']=['kaiti'] + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] -pd.set_option('display.width',None) +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/嘉兴.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\嘉兴数据.xlsx') -df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string') -df.set_index('dtdate',inplace=True) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) -df['season'] = df.index.map(season) +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# print(list0,list1,list2) +data['season'] = data.index.map(season) -df_eval = df[df.index.str[:7]=='2023-08'] -# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -df_train = df[500:850] -print(len(df_eval),len(df_train),len(df)) +df_eval = data.loc['2023-08'] +df_train = data.iloc[500:850] +# df_train = data.loc['2021-01':'2023-07'] +print(len(df_eval),len(df_train),len(data)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - - -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] @@ -50,7 +59,7 @@ y = df_train['售电量'] # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=272) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -61,14 +70,11 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) # 指标打印 print(abs(y_test - y_pred).mean() / y_test.mean()) eval_pred = model.predict(X_eval) - result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) - goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -# print(goal) - goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal,goal2) +print(result_eval) # print(goal2) # if abs(goal) < best_goal : # best_goal = abs(goal) @@ -77,13 +83,8 @@ print(goal,goal2) # # print(best_i,best_goal,x) - - -# result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv') -# with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'嘉兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -# model.save_model('jiaxing.bin') +model.save_model('jiaxing.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('jiaxing.bin') import numpy as np diff --git a/各地级市日电量模型/宁波.py b/各地级市日电量模型/宁波.py index 86d1569..b26424c 100644 --- a/各地级市日电量模型/宁波.py +++ b/各地级市日电量模型/宁波.py @@ -5,42 +5,47 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt -def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): - return 1 - else: - return 0 mpl.rcParams['font.sans-serif']=['kaiti'] - - pd.set_option('display.width',None) - -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\宁波数据.xlsx',index_col='dtdate') -df.index = pd.to_datetime(df.index,format='%Y-%m-%d') -df['season'] = df.index.map(season) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) - - -df_eval = df.loc['2023-09'] -df_train = df.loc['2021-01':'2023-08'] -# df_train = df[400:850] -print(len(df_eval),len(df_train),len(df)) - - - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - - -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] - +def season(x): + if str(x)[5:7] in ('01', '04', '10'): + return 0 + elif str(x)[5:7] in ('02', '03', '05', '06', '09', '11', '12'): + return 1 + else: + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/宁波.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# print(list0,list1,list2) + + +data['season'] = data.index.map(season) +df_eval = data.loc['2023-09'] +df_train = data.loc['2021-01':'2023-08'] +# df_train = data[400:850] +print(len(df_eval),len(df_train)) + +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] @@ -53,8 +58,6 @@ y = df_train['售电量'] x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=18) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) - - y_pred = model.predict(x_test) result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) @@ -76,12 +79,8 @@ print(goal2) # # print(best_i,best_goal,x) - -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'宁波月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -# model.save_model('ningbo.bin') +model.save_model('ningbo.bin') import numpy as np loaded_model = xgb.XGBRegressor() loaded_model.load_model('ningbo.bin') diff --git a/各地级市日电量模型/杭州.py b/各地级市日电量模型/杭州.py index edb1a40..638d785 100644 --- a/各地级市日电量模型/杭州.py +++ b/各地级市日电量模型/杭州.py @@ -7,23 +7,15 @@ from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt mpl.rcParams['font.sans-serif']=['kaiti'] - pd.set_option('display.width',None) -def hf_season(x): - list1= [] - for i in range(1,13): - if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']: - list1.append(i) - return list1 - - - def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('04', '10'): + return 0 + elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'): return 1 else: - return 0 + return 2 def month(x): if str(x)[5:7] in ('08','09','10','12','01','02'): return 1 @@ -34,47 +26,45 @@ def normal(nd): low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) return nd[(ndlow)] - -data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate') +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/杭州.xlsx'),index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') data = data.loc[normal(data['售电量']).index] -# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022']) -# plt.show() - -# print(hf_season(data.loc['2021']['售电量'])) -data['month'] = data.index.strftime('%Y-%m-%d').str[6] -data['month'] = data['month'].astype('int') -data['season'] = data.index.map(season) -print(data.tail(50)) +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# print(list0,list1,list2) -df_eval = data.loc['2022-9':'2023-9'] -df_train = data.loc['2021-1':'2022-8'] -# df_train = df[500:850] -print(len(df_eval),len(df_train),len(data)) -print(data.drop(columns='city_name').corr(method='pearson')['售电量']) +# data['month'] = data.index.strftime('%Y-%m-%d').str[6] +# data['month'] = data['month'].astype('int') +data['season'] = data.index.map(season) -df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']] +# df_train = df[500:850] -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_train = data.loc['2021-01':'2023-08'] +df_eval = data.loc['2023-9'] X = df_train[['tem_max','tem_min','24ST','holiday','season']] X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] y = df_train['售电量'] -print(y.describe()) + # best_goal = 1 # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=142) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -83,20 +73,17 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) # 指标打印 print(abs(y_test - y_pred).mean() / y_test.mean()) -# eval_pred = model.predict(X_eval) -# -# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) -# -# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) -# -# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -# print('goal:',goal) -# -# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() -# -# print('goal2:',goal2) -# print(result_eval) -# print('r2:',r2_score(y_test,y_pred)) +eval_pred = model.predict(X_eval) +result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) + +goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() +print('goal:',goal) + +goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() + +print('goal2:',goal2) +print(result_eval) +print('r2:',r2_score(y_test,y_pred)) # # # result_eval.to_csv('asda.csv',encoding='gbk') # # if abs(goal) < best_goal: @@ -111,30 +98,17 @@ print(abs(y_test - y_pred).mean() / y_test.mean()) # # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: # # f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -# model.save_model('hangzhou.bin') +model.save_model('hangzhou.bin') # X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] -df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx') -df_eval.columns = df_eval.columns.map(lambda x:x.strip()) -df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']] -df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip()) -df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate') +# df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx') +# df_eval.columns = df_eval.columns.map(lambda x:x.strip()) +# df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']] +# df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip()) +# df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate') -# list2 = [] -# list0 = [] -# list1 = [] -# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): -# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' -# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']: -# list2.append(i) -# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']: -# list0.append(i) -# else: -# list1.append(i) -# print(list0,list1,list2) -print(df_hangzhou) loaded_model = xgb.XGBRegressor() loaded_model.load_model('hangzhou.bin') diff --git a/各地级市日电量模型/温州.py b/各地级市日电量模型/温州.py index 8c72a08..1b518ab 100644 --- a/各地级市日电量模型/温州.py +++ b/各地级市日电量模型/温州.py @@ -5,41 +5,50 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt -def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): - return 1 - else: - return 0 mpl.rcParams['font.sans-serif']=['kaiti'] - - pd.set_option('display.width',None) - -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\温州数据(1).xlsx',index_col='dtdate') -df.index = pd.to_datetime(df.index,format='%Y-%m-%d') -df['season'] = df.index.map(season) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) +def season(x): + if str(x)[5:7] in ('01', '02', '10'): + return 0 + elif str(x)[5:7] in ('03', '04', '05', '06', '11', '12'): + return 1 + else: + return 2 + +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/温州.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# +# print(list0,list1,list2) +data['season'] = data.index.map(season) -df_eval = df.loc['2023-9'] +df_eval = data.loc['2023-9'] # df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -df_train = df[400:850] -print(len(df_eval),len(df_train),len(df)) - - - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] +df_train = data[400:850] -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] @@ -54,7 +63,6 @@ x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state= model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) - y_pred = model.predict(x_test) result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) @@ -69,18 +77,14 @@ print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal2) + # if abs(goal) < best_goal : # best_goal = abs(goal) # best_i['best_i'] = i # x = goal2 - # print(best_i,best_goal,x) - -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'温州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -# model.save_model('wenzhou.bin') +model.save_model('wenzhou.bin') loaded_model = xgb.XGBRegressor() diff --git a/各地级市日电量模型/湖州.py b/各地级市日电量模型/湖州.py index 07d7d53..88674d4 100644 --- a/各地级市日电量模型/湖州.py +++ b/各地级市日电量模型/湖州.py @@ -5,42 +5,55 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl mpl.rcParams['font.sans-serif']=['kaiti'] +pd.set_option('display.width',None) import random import matplotlib.pyplot as plt + def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('10'): + return 0 + elif str(x)[5:7] in ('01', '02', '03', '04', '05', '06', '09', '11', '12'): return 1 else: - return 0 -pd.set_option('display.width',None) - - -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate') -df.index = pd.to_datetime(df.index,format='%Y-%m-%d') -df['season'] = df.index.map(season) -print(df.head()) - -df_eval = df.loc['2023-9'] -df_train = df.loc['2021-1':'2023-8'] -print(len(df_eval),len(df_train),len(df)) - - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] + return 2 + +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/湖州.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# +# print(list0,list1,list2) +data['season'] = data.index.map(season) -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_eval = data.loc['2023-9'] +df_train = data.loc['2021-1':'2023-8'] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -48,7 +61,6 @@ model.fit(x_train,y_train) y_pred = model.predict(x_test) result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) - # 指标打印 print(abs(y_test - y_pred).mean() / y_test.mean()) eval_pred = model.predict(X_eval) @@ -57,19 +69,11 @@ result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index= goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() print(goal) - goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal2) - - -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'湖州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') - # 保存模型 - -# model.save_model('huzhou.bin') +model.save_model('huzhou.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('huzhou.bin') import numpy as np diff --git a/各地级市日电量模型/绍兴.py b/各地级市日电量模型/绍兴.py index 0dbdc6e..8be13f0 100644 --- a/各地级市日电量模型/绍兴.py +++ b/各地级市日电量模型/绍兴.py @@ -5,48 +5,57 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt -def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): - return 1 - else: - return 0 mpl.rcParams['font.sans-serif']=['kaiti'] - - pd.set_option('display.width',None) +def season(x): + if str(x)[5:7] in ('01', '02', '10', '11'): + return 0 + elif str(x)[5:7] in ('03', '04', '05', '06', '09', '12'): + return 1 + else: + return 2 + +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/绍兴.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# +# print(list0,list1,list2) +data['season'] = data.index.map(season) -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\绍兴数据(1).xlsx',index_col='dtdate') -df.index = pd.to_datetime(df.index ,format='%Y-%m-%d') -df['season'] = df.index.map(season) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) - - -df_eval = df.loc['2023-9'] -df_train = df.loc['2021-1':'2023-8'] +df_eval = data.loc['2023-9'] +df_train = data.loc['2021-1':'2023-8'] # df_train = df[400:850] -print(len(df_eval),len(df_train),len(df)) - +print(len(df_eval),len(df_train),len(data)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] - X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] - # best_goal = 1 # best_i = {} # for i in range(400): @@ -74,16 +83,8 @@ print(goal2) # best_goal = abs(goal) # best_i['best_i'] = i # x = goal2 - # print(best_i,best_goal,x) - - -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'绍兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') - - # 保存模型 model.save_model('shaoxing.bin') loaded_model = xgb.XGBRegressor() diff --git a/各地级市日电量模型/舟山.py b/各地级市日电量模型/舟山.py index 8535146..d9b8d08 100644 --- a/各地级市日电量模型/舟山.py +++ b/各地级市日电量模型/舟山.py @@ -5,49 +5,59 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl mpl.rcParams['font.sans-serif']=['kaiti'] +pd.set_option('display.width',None) import random import matplotlib.pyplot as plt + def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('01', '02', '03', '04', '05', '06', '09', '10', '11', '12'): return 1 else: - return 0 -pd.set_option('display.width',None) - - -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\舟山数据(1).xlsx',index_col='dtdate') -df.index = pd.to_datetime(df.index,format='%Y-%m-%d') -df['season'] = df.index.map(season) -print(df.head()) - -df_eval = df.loc['2023-9'] -df_train = df.loc['2021-1':'2023-8'] -print(len(df_eval),len(df_train),len(df)) - - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - - -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] - - + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/舟山.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# +# print(list0,list1,list2) + +data['season'] = data.index.map(season) +df_eval = data.loc['2023-9'] +# df_train = data.loc['2021-1':'2023-8'] +df_train = data.iloc[500:850] + +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) y_pred = model.predict(x_test) result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) -# result_test.to_csv(r'C:\Users\鸽子\Desktop\test.csv',encoding='utf-8') + # 指标打印 print(abs(y_test - y_pred).mean() / y_test.mean()) @@ -61,9 +71,7 @@ print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal2) -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'舟山月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') + model.save_model('zhoushan.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('zhoushan.bin') diff --git a/各地级市日电量模型/衢州.py b/各地级市日电量模型/衢州.py index 948779a..e62c84e 100644 --- a/各地级市日电量模型/衢州.py +++ b/各地级市日电量模型/衢州.py @@ -5,46 +5,47 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +mpl.rcParams['font.sans-serif']=['kaiti'] +pd.set_option('display.width',None) def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): + if str(x)[5:7] in ('01', '02'): + return 0 + elif str(x)[5:7] in ('03', '04', '05', '06', '09', '10', '11', '12'): return 1 else: - return 0 - -mpl.rcParams['font.sans-serif']=['kaiti'] - - -pd.set_option('display.width',None) + return 2 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\衢州数据.xlsx') -df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string') -df.set_index('dtdate',inplace=True) -df['season'] = df.index.map(season) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] -# df_eval = df[(df.index.str[:10]=='2023-08-29')|(df.index.str[:10]=='2023-08-30')|(df.index.str[:10]=='2023-08-31')] +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) # -# df_train = df[(df.index.str[:7]!='2023-09')&(df.index.str!='2023-08-29')&(df.index.str!='2023-08-30')&(df.index.str!='2023-08-31')] +# print(list0,list1,list2) +data['season'] = data.index.map(season) -df_eval = df[df.index.str[:7]=='2023-07'] -df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -# df_train = df[450:900] -# max_8,min_8 = df_eval['售电量'].max(),df_eval['售电量'].min() -print(len(df_eval),len(df_train),len(df)) +df_eval = data.loc['2023-08'] +df_train = data.iloc[450:900] -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] - -IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -high = df['售电量'].describe()['75%'] + 1.5*IQR -low = df['售电量'].describe()['25%'] - 1.5*IQR -print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) - -df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] @@ -66,17 +67,8 @@ print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal2) - -# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) -# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum()) -# # -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'衢州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') - -# # 保存模型 -# model.save_model('quzhou.bin') +model.save_model('quzhou.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('quzhou.bin') import numpy as np @@ -89,22 +81,6 @@ X_eval = np.array([[24.0,15.6,23,0,0], print(model.predict(X_eval)) -# from sklearn.ensemble import RandomForestRegressor -# from sklearn.metrics import mean_squared_error -# rf = RandomForestRegressor(n_estimators=150,max_depth=6) -# -# # 在训练集上训练模型 -# rf.fit(x_train, y_train) -# -# # 在测试集上进行预测 -# y_pred = rf.predict(x_test) -# eval_pred = rf.predict(X_eval) -# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) -# print(result_eval) -# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) -# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum()) - - # import torch # from torch import nn diff --git a/各地级市日电量模型/金华.py b/各地级市日电量模型/金华.py index b99fe5e..9daae93 100644 --- a/各地级市日电量模型/金华.py +++ b/各地级市日电量模型/金华.py @@ -5,43 +5,48 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt -def season(x): - if str(x)[5:7] in ('06','07','08','12','01','02'): - return 1 - else: - return 0 mpl.rcParams['font.sans-serif']=['kaiti'] - - pd.set_option('display.width',None) - -df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\金华数据.xlsx') -df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string') -df.set_index('dtdate',inplace=True) -df['season'] = df.index.map(season) -plt.plot(range(len(df)),df['售电量']) -plt.show() -print(df.head()) - - -df_eval = df[df.index.str[:7]=='2023-09'] -df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -# df_train = df[500:850] -print(len(df_eval),len(df_train),len(df)) - - - -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] +def season(x): + if str(x)[5:7] in ('01', '02', '04', '10'): + return 0 + elif str(x)[5:7] in ('03', '05', '06', '09', '11', '12'): + return 1 + else: + return 2 + +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + +parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) +data = pd.read_excel(os.path.join(parent_dir,'入模数据/金华.xlsx'),index_col='dtdate') +data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = data.loc[normal(data['售电量']).index] + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']: +# list2.append(i) +# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# +# print(list0,list1,list2) +data['season'] = data.index.map(season) -# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] -# high = df['售电量'].describe()['75%'] + 1.5*IQR -# low = df['售电量'].describe()['25%'] - 1.5*IQR -# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) -# -# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] +df_eval = data.loc['2023-09'] +# df_train = data.loc['2021-01':'2023-08'] +df_train = data.iloc[450:900] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] @@ -51,11 +56,10 @@ y = df_train['售电量'] # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=142) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) - y_pred = model.predict(x_test) result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) @@ -75,13 +79,8 @@ print(goal2) # best_i['best_i'] = i # x = goal2 -# print(best_i,best_goal,x) -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'金华月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') - # # 保存模型 -# model.save_model('jinhua.bin') +model.save_model('jinhua.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('jinhua.bin') import numpy as np