diff --git a/各地级市日电量模型/杭州.py b/各地级市日电量模型/杭州.py index f44c991..3e7661f 100644 --- a/各地级市日电量模型/杭州.py +++ b/各地级市日电量模型/杭州.py @@ -9,20 +9,52 @@ mpl.rcParams['font.sans-serif']=['kaiti'] pd.set_option('display.width',None) -data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate') +def hf_season(x): + list1= [] + for i in range(1,13): + if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']: + list1.append(i) + return list1 + + + +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 +def month(x): + if str(x)[5:7] in ('08','09','10','12','01','02'): + return 1 + else: + return 0 +def normal(nd): + high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%']) + return nd[(ndlow)] + + +data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') - +data = data.loc[normal(data['售电量']).index] plt.plot(range(len(data)),data['售电量']) plt.show() -print(data.head()) + +# print(hf_season(data.loc['2021']['售电量'])) + +data['month'] = data.index.strftime('%Y-%m-%d').str[6] +data['month'] = data['month'].astype('int') +data['season'] = data.index.map(season) +print(data.head(50)) df_eval = data.loc['2023-9'] df_train = data.loc['2021-1':'2023-8'] # df_train = df[500:850] print(len(df_eval),len(df_train),len(data)) +print(data.drop(columns='city_name').corr(method='pearson')['season']) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -33,14 +65,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','24ST','holiday','season']] +X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] y = df_train['售电量'] +print(y.describe()) # best_goal = 1 # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=216) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=209) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -53,24 +86,25 @@ eval_pred = model.predict(X_eval) result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) # print(result_eval) -# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) +print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -print(goal) +print('goal:',goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() -print(goal2) -# if abs(goal) < best_goal: -# best_goal = abs(goal) -# best_i['best_i'] = i -# x = goal2 +print('goal2:',goal2) +print('r2:',r2_score(y_test,y_pred)) +# if abs(goal) < best_goal: +# best_goal = abs(goal) +# best_i['best_i'] = i +# x = goal2 # print(best_i,best_goal,x) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # # 保存模型 # model.save_model('hangzhou.bin') # loaded_model = xgb.XGBRegressor() diff --git a/文档处理.py b/文档处理.py index 2ee569c..bfe4db5 100644 --- a/文档处理.py +++ b/文档处理.py @@ -1,62 +1,62 @@ -import pandas as pd -import os -import re -file_dir1 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\偏差率' -file_dir2 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\月底3天预测结果' -file_dir3 = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\偏差' -import numpy as np -np.set_printoptions(threshold=np.inf) - -# print(os.listdir(file_dir3)) -# str1 = '丽水电压等级10kv以下月底偏差率:0.00229' -# -# print(re.split('电压等级|月底偏差率:',str1)) -# with open(os.path.join(file_dir3,'9月底偏差率.txt'),'r',encoding='utf-8') as f: -# lines = f.readlines() -# list_city = [] -# list_industry = [] -# list_loss = [] -# for i in lines: -# i = re.split(':|:|其中', i) -# print(i) -# list_city.append(i[0][:2]) -# list_industry.append(i[-2].replace(i[0][:2],'')) -# list_loss.append(i[-1][:-2]) -# df_level = pd.DataFrame({'城市':list_city,'行业':list_industry,'偏差':list_loss}) -# # df_level.to_csv(os.path.join(file_dir3,'9月底偏差率.csv'),encoding='gbk') -# print(df_level) -file_dir = r'C:\python-project\pytorch3\浙江行业电量\浙江所有地市133行业数据' -# print(os.listdir(file_dir)) -dict1 = {} - -for file in os.listdir(file_dir): - - df = pd.read_excel(os.path.join(file_dir,file),index_col=' stat_date ') - - col_list = df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns - dict1[file[:2]] = col_list - print(dict1) - - # print(len(df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns)) - -read_path = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\月底预测结果' -list1 = [] -for i in os.listdir(read_path): - print(i) - data = pd.read_csv(os.path.join(read_path, i), sep='\t',header=None) - data = data[data.columns[1:]] - - - for j,step in enumerate(range(0, len(data), 4)): - df = data.iloc[step+1:step + 4, :] - df.columns = ['预测值', '实际值', '偏差率'] - try: - df['行业'] = dict1[i[2:4]][j] - except: - pass - df['城市'] = i[2:4] - list1.append(df) - print(df) -df = pd.concat(list1,ignore_index=True) -df.to_csv('各市行业电量预测结果.csv',encoding='gbk') -print(df) \ No newline at end of file +# import pandas as pd +# import os +# import re +# file_dir1 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\偏差率' +# file_dir2 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\月底3天预测结果' +# file_dir3 = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\偏差' +# import numpy as np +# np.set_printoptions(threshold=np.inf) +# +# # print(os.listdir(file_dir3)) +# # str1 = '丽水电压等级10kv以下月底偏差率:0.00229' +# # +# # print(re.split('电压等级|月底偏差率:',str1)) +# # with open(os.path.join(file_dir3,'9月底偏差率.txt'),'r',encoding='utf-8') as f: +# # lines = f.readlines() +# # list_city = [] +# # list_industry = [] +# # list_loss = [] +# # for i in lines: +# # i = re.split(':|:|其中', i) +# # print(i) +# # list_city.append(i[0][:2]) +# # list_industry.append(i[-2].replace(i[0][:2],'')) +# # list_loss.append(i[-1][:-2]) +# # df_level = pd.DataFrame({'城市':list_city,'行业':list_industry,'偏差':list_loss}) +# # # df_level.to_csv(os.path.join(file_dir3,'9月底偏差率.csv'),encoding='gbk') +# # print(df_level) +# file_dir = r'C:\python-project\pytorch3\浙江行业电量\浙江所有地市133行业数据' +# # print(os.listdir(file_dir)) +# dict1 = {} +# +# for file in os.listdir(file_dir): +# +# df = pd.read_excel(os.path.join(file_dir,file),index_col=' stat_date ') +# +# col_list = df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns +# dict1[file[:2]] = col_list +# print(dict1) +# +# # print(len(df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns)) +# +# read_path = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\月底预测结果' +# list1 = [] +# for i in os.listdir(read_path): +# print(i) +# data = pd.read_csv(os.path.join(read_path, i), sep='\t',header=None) +# data = data[data.columns[1:]] +# +# +# for j,step in enumerate(range(0, len(data), 4)): +# df = data.iloc[step+1:step + 4, :] +# df.columns = ['预测值', '实际值', '偏差率'] +# try: +# df['行业'] = dict1[i[2:4]][j] +# except: +# pass +# df['城市'] = i[2:4] +# list1.append(df) +# print(df) +# df = pd.concat(list1,ignore_index=True) +# df.to_csv('各市行业电量预测结果.csv',encoding='gbk') +# print(df) diff --git a/浙江行业电量/浙江所有地市133行业数据/丽水133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/丽水133行业数据(全).xlsx index 802bea9..565865b 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/丽水133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/丽水133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/台州133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/台州133行业数据(全).xlsx index abf88f4..eb90c3e 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/台州133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/台州133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/嘉兴133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/嘉兴133行业数据(全).xlsx index d050fe1..848316b 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/嘉兴133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/嘉兴133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/宁波133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/宁波133行业数据(全).xlsx index e57dd5c..6136c09 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/宁波133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/宁波133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/杭州133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/杭州133行业数据(全).xlsx index 9d29e5e..f036ec4 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/杭州133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/杭州133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/温州133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/温州133行业数据(全).xlsx index ad8cab4..28463a2 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/温州133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/温州133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/湖州133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/湖州133行业数据(全).xlsx index 7972a34..e16cb31 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/湖州133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/湖州133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/绍兴133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/绍兴133行业数据(全).xlsx index 81696d1..61295f0 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/绍兴133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/绍兴133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/舟山133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/舟山133行业数据(全).xlsx index 2601aa5..5a1b19d 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/舟山133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/舟山133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/衢州133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/衢州133行业数据(全).xlsx index 61b975f..361006c 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/衢州133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/衢州133行业数据(全).xlsx differ diff --git a/浙江行业电量/浙江所有地市133行业数据/金华133行业数据(全).xlsx b/浙江行业电量/浙江所有地市133行业数据/金华133行业数据(全).xlsx index f59214c..01949d0 100644 Binary files a/浙江行业电量/浙江所有地市133行业数据/金华133行业数据(全).xlsx and b/浙江行业电量/浙江所有地市133行业数据/金华133行业数据(全).xlsx differ diff --git a/浙江行业电量/输出为3.py b/浙江行业电量/输出为3.py index 2a7d4aa..51bdfa9 100644 --- a/浙江行业电量/输出为3.py +++ b/浙江行业电量/输出为3.py @@ -147,6 +147,7 @@ def run(file_dir,excel): print(target) print(result_eight) final_df = pd.concat(list_app,ignore_index=True) + final_df.to_csv('市行业电量.csv',encoding='gbk') print(final_df) # result_eight.to_csv(f'./月底预测结果/9月{excel[:2]}.txt', sep='\t', mode='a')