diff --git a/入模数据/绍兴.xlsx b/入模数据/绍兴.xlsx index 4367359..c905a03 100644 Binary files a/入模数据/绍兴.xlsx and b/入模数据/绍兴.xlsx differ diff --git a/各地级市日电量模型/丽水.py b/各地级市日电量模型/丽水.py index b454534..2741b74 100644 --- a/各地级市日电量模型/丽水.py +++ b/各地级市日电量模型/丽水.py @@ -87,8 +87,8 @@ import numpy as np X_eval = np.array([ [21,10,10,0,0], [21, 11, 10, 0, 0], - [18, 7, 10, 0, 0], - [17, 9, 10, 0, 0], + [20, 8, 10, 0, 0], + [20, 8, 10, 0, 0], [17, 10, 10, 0, 0] ]) print(model.predict(X_eval)) diff --git a/各地级市日电量模型/台州.py b/各地级市日电量模型/台州.py index 2680d68..b0bf3e1 100644 --- a/各地级市日电量模型/台州.py +++ b/各地级市日电量模型/台州.py @@ -85,7 +85,7 @@ model.save_model('taizhou.bin') import numpy as np X_eval = np.array([ - [19,11,10,0,0], + [19,11,10,1,0], [21, 7, 10, 0, 0], [19, 5, 10, 0, 0], [17, 8, 10, 0, 0], diff --git a/各地级市日电量模型/绍兴.py b/各地级市日电量模型/绍兴.py index 65a1702..f218c85 100644 --- a/各地级市日电量模型/绍兴.py +++ b/各地级市日电量模型/绍兴.py @@ -45,7 +45,7 @@ data = data.loc[normal(data['售电量']).index] # print(list0,list1,list2) data['season'] = data.index.map(season) -df_eval = data.loc['2023-10'] +df_eval = data.loc['2023-11'] # data = data.loc[:'2023-8'] df_train = data[450:-1] # df_train = data[450:][:-3] @@ -85,16 +85,16 @@ print(goal2) # x = goal2 # print(best_i,best_goal,x) print(result_eval) -# 保存模型 -model.save_model('shaoxing.bin') -loaded_model = xgb.XGBRegressor() -loaded_model.load_model('shaoxing.bin') +# # 保存模型 +# model.save_model('shaoxing.bin') +# loaded_model = xgb.XGBRegressor() +# loaded_model.load_model('shaoxing.bin') import numpy as np X_eval = np.array([ - [17, 9, 10, 0, 0], + [16.2, 8.2, 10, 1, 0], [20, 6, 10, 0, 0], - [17, 5, 10, 0, 0], + [19, 7, 10, 0, 0], [16, 8, 10, 0, 0], [12, 7, 10, 0, 0] ]) diff --git a/文档处理.py b/文档处理.py index 5de17ae..462b18f 100644 --- a/文档处理.py +++ b/文档处理.py @@ -60,10 +60,31 @@ # df = pd.concat(list1,ignore_index=True) # df.to_csv('各市行业电量预测结果.csv',encoding='gbk') # print(df) +import os +from openpyxl import Workbook import pandas as pd -df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1) -print(df.head()) -print(df[df.columns[2:]].groupby(df['city_name']).sum().T) -df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T -df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') +# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1) +# print(df.head()) +# print(df[df.columns[2:]].groupby(df['city_name']).sum().T) +# df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T +# df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') +file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' +for file in os.listdir(file_dir): + city = file[:-5] + wb = Workbook() + wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx') + +for file in os.listdir(file_dir): + city = file[:-5] + excel_file = pd.ExcelFile(os.path.join(file_dir,file)) + sheet_names = excel_file.sheet_names[1:] + for sheet in sheet_names: + df = excel_file.parse(sheet) + df_result = df[df.columns[1:]].sum() + df_result = pd.DataFrame(df_result) + df_result.columns = ['售电量'] + + with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl', + if_sheet_exists='replace') as writer: + df_result.to_excel(writer, sheet_name=f'{sheet}') diff --git a/浙江电压等级电量/区县分压/区县分压.py b/浙江电压等级电量/区县分压/区县分压.py index bd4f4b9..639ac93 100644 --- a/浙江电压等级电量/区县分压/区县分压.py +++ b/浙江电压等级电量/区县分压/区县分压.py @@ -9,47 +9,51 @@ pd.set_option('display.width',None) def normal(x): high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%']) low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%']) - return x[(x<=high)&(x>=low)] + return (x<=high)&(x>=low) df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv') df.columns = df.columns.map(lambda x:x.strip()) -df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True) +df.dropna(subset=['city_name','county_name'],inplace=True) +print(df.info()) print(df.columns) print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns]))) yc_org_list = [] list_fl = [] list_org = [] -for city in df['市'].drop_duplicates(): - df_ct = df[df['市']==city] - # wb = Workbook() - # wb.save(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx') - for org in df_ct['org_name'].drop_duplicates(): - if org.strip()[-4:] != '供电公司': - continue - df_org = df_ct[df_ct['org_name']==org] - df_org['1-10kv'] /= 10000 - df_org['35kv'] /= 10000 - df_org['0.4kv及以下'] /= 10000 - s1 = df_org[['日期','0.4kv及以下']] +list1 = [] +for city in df['city_name'].drop_duplicates(): + wb = Workbook() + wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx') + +for org in df['county_name'].drop_duplicates(): + if org.strip()[-4:] != '供电公司': + continue + df_org = df[df['county_name']==org] + city = df_org['city_name'].iloc[0] + df_result = pd.DataFrame({}) + for level in df_org.columns[3:]: + s1 = df_org[['pt_date',level]] s1.replace(0,np.NaN,inplace=True) s1.dropna(how='any',inplace=True) - # plt.plot(range(len(s1)),s1['1-10kv']) - # plt.show() - # 更改列名,更改为Prophet指定的列名ds和y - dd = s1.rename(columns={'日期':'ds','0.4kv及以下':'y'}) + dd = s1.rename(columns={'pt_date':'ds',level:'y'}) + dd['ds'] = dd['ds'].map(lambda x:x.strip()) dd['ds'] = pd.to_datetime(dd['ds']) + dd.drop_duplicates(inplace=True) + + # 划分数据,划分为训练集和验证集,预测的数据设置为未来4天 + df_train = dd[(dd['ds']>='2023-01-01')&(dd['ds']<='2023-11-30')] - # 划分数据,划分为训练集和验证集,预测的数据设置为未来3天 - df_train = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][:-3] - df_train = df_train.loc[normal(df_train['y']).index] - if df_train.shape[0] <= 180: + # df_train = df_train.loc[normal(df_train['y']).index] + df_train['y'] = df_train['y'].where(normal(df_train['y']),other=np.nan).bfill() + + if df_train.shape[0] <= 90: yc_org_list.append(org) continue - df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:] + # df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:] # 数据的变动会受到季节、周、天的影响,存在一定的规律性,因此我们将这三个参数设置为True model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) # 采用中国的假期模式,其余参数均保持默认 @@ -57,7 +61,7 @@ for city in df['市'].drop_duplicates(): model.fit(df_train) # make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳 - future = model.make_future_dataframe(periods=3, freq='D') + future = model.make_future_dataframe(periods=4, freq='D') # 进行预测,返回预测的结果forecast forecast = model.predict(future) @@ -65,37 +69,43 @@ for city in df['市'].drop_duplicates(): # 有:forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。 # 因此:forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。 # 如果有节假日因素,那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。 - # print(forecast) # 测试,把ds列,即data_series列设置为索引列 - df_test = df_test.set_index('ds') + # df_test = df_test.set_index('ds') # 把预测到的数据取出ds列,预测值列yhat,同样把ds列设置为索引列。 - forecast = forecast[['ds','yhat']].set_index('ds') + forecast = forecast[['ds','yhat']].set_index('ds').sort_index(ascending=True).loc['2023-11'] + + # 将预测列前25天替换为真实值 + forecast.loc['2023-11'][:25] = dd.set_index('ds').loc['2023-11'][:25] + if len(forecast) < 334: + list1.append(org) # join:按照索引进行连接, - # dropna:能够找到DataFrame类型数据的空值(缺失值),将空值所在的行/列删除后,将新的DataFrame作为返回值返回。 - df_all = forecast.join(dd.set_index('ds')).dropna() - df_all['org_name'] = org - df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y'] - df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True) - df_all = df_all[['org_name','真实值','预测值','偏差率']] + forecast.columns = [level] + + df_result = pd.concat([df_result,forecast],axis=1) + + # df_all = forecast.join(dd.set_index('ds')).dropna() + # df_all['org_name'] = org + # df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y'] + # df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True) + # df_all = df_all[['org_name','真实值','预测值','偏差率']] list_org.append(org) - try: - result = df_all.loc['2023-7'] - result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum() - list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()) - - # with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: - # result.to_excel(writer,sheet_name=f'{org}') - except: - yc_org_list.append(org) + # result = df_all.loc['2023-7'] + # result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum() + # list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()) + + with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: + df_result.to_excel(writer,sheet_name=f'{org}') + +print(yc_org_list) df = pd.DataFrame({'org':list_org,'goal':list_fl}) print(df) print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False)) -# print(yc_org_list) + # # 创建一个ExcelWriter对象 # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer: # # 将不同的子文件写入同一个Excel文件的不同工作表 @@ -107,4 +117,3 @@ print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05 # plt.show() -