From 376b797e45d7fd665190d269777818bf1cd1739d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B8=BD=E5=AD=90?= <2316994765@qq.com> Date: Thu, 9 Nov 2023 18:08:35 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BE=93=E5=87=BA=E9=A2=84=E6=B5=8B=E7=BB=93?= =?UTF-8?q?=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 浙江电压等级电量/400v_11市_xgb.py | 43 +++++++++++ 浙江电压等级电量/400v_杭州.py | 8 +- 浙江电压等级电量/400v_衢州.py | 75 +++++++++++++++++++ .../400v数据预处理.py | 14 ++-- 浙江电压等级电量/test1.py | 28 ++++--- 5 files changed, 146 insertions(+), 22 deletions(-) create mode 100644 浙江电压等级电量/400v_11市_xgb.py create mode 100644 浙江电压等级电量/400v_衢州.py diff --git a/浙江电压等级电量/400v_11市_xgb.py b/浙江电压等级电量/400v_11市_xgb.py new file mode 100644 index 0000000..40fd4b9 --- /dev/null +++ b/浙江电压等级电量/400v_11市_xgb.py @@ -0,0 +1,43 @@ +import pandas as pd +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score +import numpy as np + +df = pd.read_excel(r'./400v入模数据.xlsx') +df['stat_date'] = pd.to_datetime(df['stat_date']) + +print(df.corr()['0.4kv及以下']) + +X = df[(df['stat_date']>='2021-01-01')&(df['stat_date']<='2023-09-28')].drop(columns=['0.4kv及以下']).set_index('stat_date') + +y = df[(df['stat_date']>='2021-01-01')&(df['stat_date']<='2023-09-28')]['0.4kv及以下'] +x_eval = df[(df['stat_date']<='2023-09-30')&(df['stat_date']>='2023-09-01')].drop(columns=['0.4kv及以下']).set_index('stat_date') +print(x_eval) +y_eval = df[(df['stat_date']<='2023-09-30')&(df['stat_date']>='2023-09-01')][['0.4kv及以下','city']] + + +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) +model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=250) +model.fit(x_train,y_train) +y_pred = model.predict(x_test) +print(r2_score(y_test,y_pred)) + +predict = model.predict(x_eval) +result = pd.DataFrame({'real':y_eval.drop(columns='city').values.reshape(-1),'pred':predict},index=x_eval.index) +print(result.loc['2023-09-28':'2023-09-30']) + + +dict2 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10} +dict1 = {} +for city in x_eval['city'].drop_duplicates(): + eval_x = x_eval[x_eval['city']==city] + eval_y = y_eval[y_eval['city']==city]['0.4kv及以下'] + pred = model.predict(eval_x) + loss_rate = (np.sum(pred[-3:])-np.sum(eval_y[-3:]))/np.sum(eval_y) + dict1[city] = loss_rate + + +for key in dict2.keys(): + dict2[key] = dict1[dict2[key]] +print(dict2) diff --git a/浙江电压等级电量/400v_杭州.py b/浙江电压等级电量/400v_杭州.py index e0006db..5a5949c 100644 --- a/浙江电压等级电量/400v_杭州.py +++ b/浙江电压等级电量/400v_杭州.py @@ -38,10 +38,10 @@ df = df.loc[normal(df['0.4kv及以下']).index] print(df.head()) -x_train = df.loc['2022-7':'2023-7'].drop(columns='0.4kv及以下') -y_train = df.loc['2022-7':'2023-7']['0.4kv及以下'] -x_eval = df.loc['2023-8'].drop(columns='0.4kv及以下') -y_eval = df.loc['2023-8']['0.4kv及以下'] +x_train = df.loc['2022-7':'2023-8'].drop(columns='0.4kv及以下') +y_train = df.loc['2022-7':'2023-8']['0.4kv及以下'] +x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下') +y_eval = df.loc['2023-9']['0.4kv及以下'] x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42) model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150) diff --git a/浙江电压等级电量/400v_衢州.py b/浙江电压等级电量/400v_衢州.py new file mode 100644 index 0000000..ec07116 --- /dev/null +++ b/浙江电压等级电量/400v_衢州.py @@ -0,0 +1,75 @@ +import pandas as pd +import matplotlib.pyplot as plt +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score +def normal(x): + high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%']) + low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%']) + return x[(x<=high)&(x>=low)] + +def season(x): + if str(x)[5:7] in ('04', '05', '06', '11'): + return 0 + elif str(x)[5:7] in ('01', '02', '03', '09', '10', '12'): + return 1 + else: + return 2 + +df = pd.read_excel('./浙江各地市分电压日电量数据/衢州 .xlsx') +df = df[['stat_date','0.4kv及以下']] +df['0.4kv及以下'] = df['0.4kv及以下']/10000 +df['stat_date'] = df['stat_date'].map(lambda x:x.strip()) +df['stat_date'] = pd.to_datetime(df['stat_date']) + + +df_qw = pd.read_excel(r'C:\python-project\p1031\入模数据\衢州.xlsx') +df_qw.columns = df_qw.columns.map(lambda x:x.strip()) + +df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']] +df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate']) + + +df = pd.merge(df,df_qw,left_on='stat_date',right_on='dtdate',how='left') +df.drop(columns='dtdate',inplace=True) +df.set_index('stat_date',inplace=True) + + +# list2 = [] +# list0 = [] +# list1 = [] +# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'): +# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}' +# # print(df.loc[month_index]['0.4kv及以下'].max(),df['0.4kv及以下'].describe()['75%']) +# if df.loc[month_index]['0.4kv及以下'].mean() >= df['0.4kv及以下'].describe()['75%']: +# list2.append(i) +# elif df.loc[month_index]['0.4kv及以下'].mean() <= df['0.4kv及以下'].describe()['25%']: +# list0.append(i) +# else: +# list1.append(i) +# print(list0,list1,list2) + + +df['season'] = df.index.map(season) +df = df.loc[normal(df['0.4kv及以下']).index] + +x_train = df.loc['2021-7':'2023-9'][:-3].drop(columns='0.4kv及以下') + +y_train = df.loc['2021-7':'2023-9'][:-3]['0.4kv及以下'] +x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下') +y_eval = df.loc['2023-9']['0.4kv及以下'] + + +x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42) +model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150) +model.fit(x_train,y_train) +y_pred = model.predict(x_test) +print(r2_score(y_test,y_pred)) + +predict = model.predict(x_eval) +result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index) +print(result) +print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum()) + + + diff --git a/浙江电压等级电量/400v数据预处理.py b/浙江电压等级电量/400v数据预处理.py index f84c702..facb82a 100644 --- a/浙江电压等级电量/400v数据预处理.py +++ b/浙江电压等级电量/400v数据预处理.py @@ -16,6 +16,7 @@ for excel,qw_excel in zip(os.listdir(fir_dir),os.listdir(qw_dir)): df_city = df_city[['stat_date','0.4kv及以下']] df_city['0.4kv及以下'] = df_city['0.4kv及以下']/10000 + df_city = df_city.loc[normal(df_city['0.4kv及以下']).index] df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip()) df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) @@ -53,13 +54,14 @@ for excel,qw_excel in zip(os.listdir(fir_dir),os.listdir(qw_dir)): else: return 2 - + print(f'{excel[:2]}',list0) df['season'] = df.index.map(season) - dict1 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10} - df['city'] = dict1[excel[:2]] - df.reset_index(inplace=True) - result = pd.concat(result,df) + df.to_excel(f'./400v入模数据/{excel[:2]}.xlsx') + # dict1 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10} + # df['city'] = dict1[excel[:2]] + # df.reset_index(inplace=True) + # result = pd.concat([result,df]) + -print(df) diff --git a/浙江电压等级电量/test1.py b/浙江电压等级电量/test1.py index afd73f9..879f99a 100644 --- a/浙江电压等级电量/test1.py +++ b/浙江电压等级电量/test1.py @@ -1,5 +1,4 @@ import os - import numpy as np import pandas as pd n1 = np.array([[1,1,1]]) @@ -9,17 +8,17 @@ n2 = np.array([]).reshape(3,-1) print(np.max([[1,2,3],[4,5,6]])) -file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据' -df = pd.read_excel(r'C:\Users\user\Desktop\浙江省各地市日电量及分压数据21-23年.xlsx',sheet_name=1) -df.columns = df.columns.map(lambda x:x.strip()) -for city in df['地市'].drop_duplicates(): - df_city = df[df['地市']== city] - df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip()) - df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y-%m-%d') - df_city = df_city[df_city.columns[:-1]] - df_city.sort_values(by='stat_date',ascending=True,inplace=True) - df_city['stat_date'] = df_city['stat_date'].astype('str') - df_city.to_excel(fr'C:\Users\user\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False) +# file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据' +# df = pd.read_excel(r'C:\Users\user\Desktop\浙江省各地市日电量及分压数据21-23年.xlsx',sheet_name=1) +# df.columns = df.columns.map(lambda x:x.strip()) +# for city in df['地市'].drop_duplicates(): +# df_city = df[df['地市']== city] +# df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip()) +# df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y-%m-%d') +# df_city = df_city[df_city.columns[:-1]] +# df_city.sort_values(by='stat_date',ascending=True,inplace=True) +# df_city['stat_date'] = df_city['stat_date'].astype('str') +# df_city.to_excel(fr'C:\Users\user\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False) # file_Dir = r'C:\Users\鸽子\Desktop\浙江各地市行业电量数据' # for excel in os.listdir(file_Dir): # df1 = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市日电量数据-27-28).xlsx',sheet_name=1) @@ -34,3 +33,8 @@ for city in df['地市'].drop_duplicates(): # df2 = pd.concat((df2,df1),ignore_index=True) # df2.to_excel(fr'C:\Users\鸽子\Desktop\浙江各地市行业电量数据\{city}.xlsx') +df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv') +df.columns = df.columns.map(lambda x:x.strip()) +print(df.columns) + +print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))