From 198bd291741854427c1ef5b49bba44fd03ef6bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B8=BD=E5=AD=90?= <2316994765@qq.com> Date: Wed, 24 Jan 2024 17:31:38 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BE=93=E5=87=BA=E9=A2=84=E6=B5=8B=E7=BB=93?= =?UTF-8?q?=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 北京安徽/北京_分压_移动平均.py | 59 ++++++++ 北京安徽/北京_行业_移动平均.py | 59 ++++++++ 北京安徽/北京区域电量.py | 93 ++++++++++++ .../北京行业电量_时间序列.py | 74 ++++++++++ 北京安徽/安徽_分压_移动平均.py | 62 ++++++++ 北京安徽/安徽_行业_移动平均.py | 59 ++++++++ 北京安徽/安徽区域电量.py | 90 ++++++++++++ .../安徽行业电量_时间序列.py | 74 ++++++++++ 区域电量19年至今数据.py | 6 +- 各地级市日电量模型/test01.py | 0 .../追加10月数据.py | 4 +- 文档处理.py | 137 ++++++++++++++---- ...电量_10kv.py => prophet_分压电量.py} | 0 浙江行业电量/prophet_行业电量.py | 42 +++--- 浙江行业电量/test1.py | 1 + .../行业电量_输出为3_步长为10.py | 5 +- 16 files changed, 710 insertions(+), 55 deletions(-) create mode 100644 北京安徽/北京_分压_移动平均.py create mode 100644 北京安徽/北京_行业_移动平均.py create mode 100644 北京安徽/北京区域电量.py create mode 100644 北京安徽/北京行业电量_时间序列.py create mode 100644 北京安徽/安徽_分压_移动平均.py create mode 100644 北京安徽/安徽_行业_移动平均.py create mode 100644 北京安徽/安徽区域电量.py create mode 100644 北京安徽/安徽行业电量_时间序列.py delete mode 100644 各地级市日电量模型/test01.py rename 浙江电压等级电量/{prophet_分压电量_10kv.py => prophet_分压电量.py} (100%) diff --git a/北京安徽/北京_分压_移动平均.py b/北京安徽/北京_分压_移动平均.py new file mode 100644 index 0000000..c25d9ae --- /dev/null +++ b/北京安徽/北京_分压_移动平均.py @@ -0,0 +1,59 @@ +import pandas as pd +pd.set_option('display.width',None) +df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=0) +df['pt_date'] = pd.to_datetime(df['pt_date']) + +# 移动平均 +for city in df['city_name'].drop_duplicates(): + + df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index() + + dict_big = {} + dict_ok = {} + resut_df = pd.DataFrame({}) + index_industry = [] + tq_list = [] + pred_list = [] + loss_list = [] + rate_list = [] + + for industry in df_city.columns[2:]: + + df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index) + + + future = pd.date_range(start='2023-12-29', periods=3, freq='D') + + for date in future: + df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values + + resut_df = pd.concat([resut_df, df_moving_avg], axis=1) + "result_df为明细数据" + print(city[-6:]) + + final_df = resut_df.sum() + final_df = pd.DataFrame(final_df,columns=['预测值']) + final_df['真实值'] = df_city[df_city.columns[2:]].sum() + final_df['偏差'] = final_df['真实值'] - final_df['预测值'] + final_df['偏差率'] = final_df['偏差'] / final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x)) + print(final_df) + # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum() + # tq_list.append(df_city1[industry].sum()) + # pred_list.append(df_moving_avg[industry].sum()) + # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum()) + # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum()) + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京分压_12月.xlsx', mode='a', if_sheet_exists='replace', + engine='openpyxl') as writer: + final_df.to_excel(writer, sheet_name=f'{city[-6:]}') + +# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry) +# print(resut_df) +# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx') + +# if loss.values >= 0.005: +# dict_big[industry] = loss.values[0] +# else: +# dict_ok[industry] = loss.values[0] +# print(len(dict_ok)) +# print(len(dict_big)) diff --git a/北京安徽/北京_行业_移动平均.py b/北京安徽/北京_行业_移动平均.py new file mode 100644 index 0000000..ae33433 --- /dev/null +++ b/北京安徽/北京_行业_移动平均.py @@ -0,0 +1,59 @@ +import pandas as pd +pd.set_option('display.width',None) +df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=0) +df['stat_date'] = pd.to_datetime(df['stat_date']) + +# 移动平均 +for city in df['city_name'].drop_duplicates(): + + df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index() + + dict_big = {} + dict_ok = {} + resut_df = pd.DataFrame({}) + index_industry = [] + tq_list = [] + pred_list = [] + loss_list = [] + rate_list = [] + + for industry in df_city.columns[2:]: + + df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index) + + + future = pd.date_range(start='2023-12-29', periods=3, freq='D') + + for date in future: + df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values + + resut_df = pd.concat([resut_df, df_moving_avg], axis=1) + "result_df为明细数据" + print(city[-6:]) + + final_df = resut_df.sum() + final_df = pd.DataFrame(final_df,columns=['预测值']) + final_df['真实值'] = df_city[df_city.columns[2:]].sum() + final_df['偏差'] = final_df['真实值'] - final_df['预测值'] + final_df['偏差率'] = final_df['偏差'] / final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x)) + print(final_df) + # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum() + # tq_list.append(df_city1[industry].sum()) + # pred_list.append(df_moving_avg[industry].sum()) + # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum()) + # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum()) + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京行业_12月.xlsx', mode='a', if_sheet_exists='replace', + engine='openpyxl') as writer: + final_df.to_excel(writer, sheet_name=f'{city[-6:]}') + +# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry) +# print(resut_df) +# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx') + +# if loss.values >= 0.005: +# dict_big[industry] = loss.values[0] +# else: +# dict_ok[industry] = loss.values[0] +# print(len(dict_ok)) +# print(len(dict_big)) diff --git a/北京安徽/北京区域电量.py b/北京安徽/北京区域电量.py new file mode 100644 index 0000000..beb5e28 --- /dev/null +++ b/北京安徽/北京区域电量.py @@ -0,0 +1,93 @@ +import pandas as pd +import datetime +import math +import chinese_calendar as cc +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score + + +def holiday_work(x): + if cc.is_workday(x): + return 0 + if cc.is_holiday(x): + return 1 + +def normal(nd): + high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%']) + return nd[(nd < high) & (nd > low)] + +def jq(y, x): + a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x) + return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a)) + +jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', + '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至'] +jq_dict = {} +for j in range(2023, 2025): + for i in range(24): + jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i] + +ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx') +ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate']) +ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']])) + +pd.set_option('display.width', None) +df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=3) +df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()] +df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date']) +df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31'] + +for city in df_qy_ah['city_name'].drop_duplicates(): + + df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city] + + df_ah_city.drop_duplicates(inplace=True) + + df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict) + + df_ah_city.fillna(method='ffill', inplace=True) + df_ah_city['24ST'].fillna('冬至', inplace=True) + label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8, + '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, + '立夏': 17, + '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23} + df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict) + + df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict) + + holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date'] + holiday_null_s = holiday_null_s.map(holiday_work) + holiday_null_s.iloc[-2:] = 3 + + df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True) + df_ah_city.set_index('pt_date', inplace=True) + df_ = df_ah_city.loc['2023-12']['power_sal'] + + df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index] + print(city) + print(df_ah_city) + + X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \ + df_ah_city['power_sal'].iloc[-180:-3] + x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \ + df_ah_city['power_sal'].iloc[-3:] + model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) + model.fit(x_train, y_train) + y_pred = model.predict(x_test) + result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index) + + print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean()) + print(r2_score(y_test, y_pred)) + + final_df = pd.DataFrame({'真实值': list(df_.values), '预测值': list(df_.values)[:-3] + list(model.predict(eval_x))}, + index=df_.index) + final_df['偏差率'] = (final_df['真实值'] - final_df['预测值']).sum() / final_df['真实值'].sum() + final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x)) + print(final_df) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\北京区域电量_12月.xlsx', if_sheet_exists='replace', mode='a', + engine='openpyxl') as writer: + final_df.to_excel(writer, sheet_name=f'{city}') diff --git a/北京安徽/北京行业电量_时间序列.py b/北京安徽/北京行业电量_时间序列.py new file mode 100644 index 0000000..45a603c --- /dev/null +++ b/北京安徽/北京行业电量_时间序列.py @@ -0,0 +1,74 @@ +from prophet import Prophet +import pandas as pd +import os +import numpy as np + + +def normal(data): + high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) + low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) + return (data <= high) & (data >= low) + + +excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx' +df = pd.read_excel(excel_file, sheet_name=0) + +for city in df['city_name'].drop_duplicates().dropna(): + df_city = df[df['city_name'] == city] + df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) + list_real = [] + list_pred = [] + list_industry = [] + result_dict = {} + + for industry in df_city.columns[3:]: + s1 = df_city[['stat_date', industry]] + + ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date') + + ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) + + df_train = ds_train.copy().iloc[:-3] + df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') + + model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) + model.add_country_holidays(country_name="CN") + model.fit(df_train) + future = model.make_future_dataframe(periods=3, freq='D') + + predict = model.predict(future) + print(city[-6:], industry) + + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'}) + ds_train.rename(columns={'y': '售电量'}, inplace=True) + + result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:])) + result_dict[industry] = list(result['售电量']) + result['真实值'] = ds_train.set_index('ds').loc['2023-12'] + result = result[['真实值','售电量']] + result.columns = ['真实值','预测值'] + + list_industry.append(industry) + list_real.append(result['真实值'].sum()) + list_pred.append(result['预测值'].sum()) + + final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry) + final_df['偏差'] = final_df['真实值']-final_df['预测值'] + final_df['偏差率'] = final_df['偏差']/final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x)) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_北京行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + final_df.to_excel(writer,sheet_name=f'{city[-6:]}') + + # df = predict.join(s1.set_index('ds')).loc['2023-8'] + # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] + # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() + # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) + # list_industry.append(industry) + + # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) + # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') + + # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f: + # f.write(f'{city[:2]}\n') + # df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t') diff --git a/北京安徽/安徽_分压_移动平均.py b/北京安徽/安徽_分压_移动平均.py new file mode 100644 index 0000000..bf47872 --- /dev/null +++ b/北京安徽/安徽_分压_移动平均.py @@ -0,0 +1,62 @@ +import pandas as pd +pd.set_option('display.width',None) +df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=1) +df['pt_date'] = pd.to_datetime(df['pt_date']) + +# 移动平均 +for city in df['city_name'].drop_duplicates(): + + df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index() + + dict_big = {} + dict_ok = {} + resut_df = pd.DataFrame({}) + index_industry = [] + tq_list = [] + pred_list = [] + loss_list = [] + rate_list = [] + + + for industry in df_city.columns[2:]: + + df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index) + + future = pd.date_range(start='2023-12-29', periods=3, freq='D') + + for date in future: + df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values[0] + + + resut_df = pd.concat([resut_df, df_moving_avg], axis=1) + "result_df为明细数据" + + print(city[-6:]) + + final_df = resut_df.sum() + final_df = pd.DataFrame(final_df,columns=['预测值']) + final_df['真实值'] = df_city[df_city.columns[2:]].sum() + final_df['偏差'] = final_df['真实值'] - final_df['预测值'] + final_df['偏差率'] = final_df['偏差'] / final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x)) + print(final_df) + # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum() + # tq_list.append(df_city1[industry].sum()) + # pred_list.append(df_moving_avg[industry].sum()) + # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum()) + # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum()) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽分压_12月.xlsx', mode='a', if_sheet_exists='replace', + engine='openpyxl') as writer: + final_df.to_excel(writer, sheet_name=f'{city[-6:]}') + +# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry) +# print(resut_df) +# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx') + +# if loss.values >= 0.005: +# dict_big[industry] = loss.values[0] +# else: +# dict_ok[industry] = loss.values[0] +# print(len(dict_ok)) +# print(len(dict_big)) diff --git a/北京安徽/安徽_行业_移动平均.py b/北京安徽/安徽_行业_移动平均.py new file mode 100644 index 0000000..25c4c3b --- /dev/null +++ b/北京安徽/安徽_行业_移动平均.py @@ -0,0 +1,59 @@ +import pandas as pd +pd.set_option('display.width',None) +df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=1) +df['stat_date'] = pd.to_datetime(df['stat_date']) + +# 移动平均 +for city in df['city_name'].drop_duplicates().dropna(): + + df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index() + + dict_big = {} + dict_ok = {} + resut_df = pd.DataFrame({}) + index_industry = [] + tq_list = [] + pred_list = [] + loss_list = [] + rate_list = [] + + for industry in df_city.columns[2:]: + + df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index) + + + future = pd.date_range(start='2023-12-29', periods=3, freq='D') + + for date in future: + df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values + + resut_df = pd.concat([resut_df, df_moving_avg], axis=1) + "result_df为明细数据" + print(city[-6:]) + + final_df = resut_df.sum() + final_df = pd.DataFrame(final_df,columns=['预测值']) + final_df['真实值'] = df_city[df_city.columns[2:]].sum() + final_df['偏差'] = final_df['真实值'] - final_df['预测值'] + final_df['偏差率'] = final_df['偏差'] / final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x)) + print(final_df) + # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum() + # tq_list.append(df_city1[industry].sum()) + # pred_list.append(df_moving_avg[industry].sum()) + # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum()) + # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum()) + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽行业_12月.xlsx', mode='a', if_sheet_exists='replace', + engine='openpyxl') as writer: + final_df.to_excel(writer, sheet_name=f'{city[-6:]}') + +# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry) +# print(resut_df) +# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx') + +# if loss.values >= 0.005: +# dict_big[industry] = loss.values[0] +# else: +# dict_ok[industry] = loss.values[0] +# print(len(dict_ok)) +# print(len(dict_big)) diff --git a/北京安徽/安徽区域电量.py b/北京安徽/安徽区域电量.py new file mode 100644 index 0000000..cf969e1 --- /dev/null +++ b/北京安徽/安徽区域电量.py @@ -0,0 +1,90 @@ +import pandas as pd +import datetime +import math +import chinese_calendar as cc +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score + + +def holiday_work(x): + if cc.is_workday(x): + return 0 + if cc.is_holiday(x): + return 1 +def jq(y, x): + a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x) + return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a)) + +def normal(nd): + high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%']) + low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%']) + return nd[(nd < high) & (nd > low)] + +jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', + '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至'] +label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8, + '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17, + '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23} +jq_dict = {} +for j in range(2023, 2025): + for i in range(24): + jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i] + +pd.set_option('display.width', None) +df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=2) +df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()] +df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date']) +df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31'] +for city in df_qy_ah['city_name'].drop_duplicates(): + + df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city] + + df_ah_city.drop_duplicates(inplace=True) + + df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict) + df_ah_city.fillna(method='ffill', inplace=True) + df_ah_city['24ST'].fillna('冬至', inplace=True) + df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict) + + ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx') + ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate']) + ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']])) + + df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict) + + holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date'] + holiday_null_s = holiday_null_s.map(holiday_work) + holiday_null_s.iloc[-2:] = 3 + + df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True) + df_ah_city.set_index('pt_date', inplace=True) + # df_12月真实电量数据 + df_ = df_ah_city.loc['2023-12']['power_sal'] + + # 去除异常值 + df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index] + print(city) + print(df_ah_city) + + X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \ + df_ah_city['power_sal'].iloc[-180:-3] + x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \ + df_ah_city['power_sal'].iloc[-3:] + model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) + model.fit(x_train, y_train) + y_pred = model.predict(x_test) + result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index) + + print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean()) + print(r2_score(y_test, y_pred)) + + final_df = pd.DataFrame({'真实值':list(df_.values),'预测值':list(df_.values)[:-3]+list(model.predict(eval_x))},index=df_.index) + final_df['偏差率'] = (final_df['真实值']-final_df['预测值']).sum()/final_df['真实值'].sum() + final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x)) + print(final_df) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\安徽区域电量_12月.xlsx', if_sheet_exists='replace', mode='a', + engine='openpyxl') as writer: + final_df.to_excel(writer,sheet_name=f'{city}') diff --git a/北京安徽/安徽行业电量_时间序列.py b/北京安徽/安徽行业电量_时间序列.py new file mode 100644 index 0000000..cf47659 --- /dev/null +++ b/北京安徽/安徽行业电量_时间序列.py @@ -0,0 +1,74 @@ +from prophet import Prophet +import pandas as pd +import os +import numpy as np + + +def normal(data): + high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) + low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) + return (data <= high) & (data >= low) + + +excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx' +df = pd.read_excel(excel_file, sheet_name=1) + +for city in df['city_name'].drop_duplicates().dropna(): + df_city = df[df['city_name'] == city] + df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) + list_real = [] + list_pred = [] + list_industry = [] + result_dict = {} + + for industry in df_city.columns[3:]: + s1 = df_city[['stat_date', industry]] + + ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date') + + ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) + + df_train = ds_train.copy().iloc[:-3] + df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') + + model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) + model.add_country_holidays(country_name="CN") + model.fit(df_train) + future = model.make_future_dataframe(periods=3, freq='D') + + predict = model.predict(future) + print(city[-6:], industry) + + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'}) + ds_train.rename(columns={'y': '售电量'}, inplace=True) + + result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:])) + result_dict[industry] = list(result['售电量']) + result['真实值'] = ds_train.set_index('ds').loc['2023-12'] + result = result[['真实值','售电量']] + result.columns = ['真实值','预测值'] + + list_industry.append(industry) + list_real.append(result['真实值'].sum()) + list_pred.append(result['预测值'].sum()) + + final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry) + final_df['偏差'] = final_df['真实值']-final_df['预测值'] + final_df['偏差率'] = final_df['偏差']/final_df['真实值'] + final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x)) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_安徽行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + final_df.to_excel(writer,sheet_name=f'{city[-6:]}') + + # df = predict.join(s1.set_index('ds')).loc['2023-8'] + # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] + # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() + # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) + # list_industry.append(industry) + + # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) + # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') + + # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f: + # f.write(f'{city[:2]}\n') + # df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t') diff --git a/区域电量19年至今数据.py b/区域电量19年至今数据.py index bed20b7..7c5b4b3 100644 --- a/区域电量19年至今数据.py +++ b/区域电量19年至今数据.py @@ -32,12 +32,12 @@ def normal(nd): # df = df[['dtdate','tem_max','tem_min']] # # print(df.head()) # # print(df_elec.head()) -# + # merge_df = pd.merge(df_elec,df,left_on='pt_date',right_on='dtdate')[['pt_date','tem_max','tem_min','售电量']] # merge_df.set_index('pt_date',inplace=True) # merge_df.index = pd.to_datetime(merge_df.index,format='%Y%m%d') -# -# + + # merge_df['month'] = merge_df.index.strftime('%Y-%m-%d').str[5:7] # merge_df['month'] = merge_df['month'].astype('int') # merge_df.to_csv('杭州入模数据.csv',encoding='gbk') diff --git a/各地级市日电量模型/test01.py b/各地级市日电量模型/test01.py deleted file mode 100644 index e69de29..0000000 diff --git a/各地级市日电量模型/追加10月数据.py b/各地级市日电量模型/追加10月数据.py index 9b5ee2a..10ff61d 100644 --- a/各地级市日电量模型/追加10月数据.py +++ b/各地级市日电量模型/追加10月数据.py @@ -9,15 +9,17 @@ df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y%m%d').astype('str') df['city_name'] = df['city_name'].map(lambda x:x.strip()) df['city_name'] = df['city_name'].str[:-1] df['dtdate'] = df['dtdate'].map(lambda x:x.strip()) +# 判断工作日 def holiday_work(x): if cc.is_workday(x): return 0 if cc.is_holiday(x): return 1 +# 判断节气 def jq(y,x): a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x) return datetime.date(1899,12,31)+datetime.timedelta(days=int(a)) -# print(jq(2023,1)) +print(jq(2023,1)) jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至'] jq_dict={} for j in range(2023,2024): diff --git a/文档处理.py b/文档处理.py index 6395243..befe42e 100644 --- a/文档处理.py +++ b/文档处理.py @@ -63,6 +63,7 @@ import os from openpyxl import Workbook import pandas as pd + # df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1) # print(df.head()) # print(df[df.columns[2:]].groupby(df['city_name']).sum().T) @@ -94,6 +95,7 @@ file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.dates as mdates + # date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D') # mpl.rcParams['font.sans-serif']=['kaiti'] # print(df['4.有色金属矿采选业'][:-1]) @@ -110,18 +112,18 @@ import matplotlib.dates as mdates # plt.show() -excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') -df_city_real = pd.read_excel(excel_file,sheet_name=0) -df_city_real = df_city_real[df_city_real['county_name'].isnull()] -df_city_real['city_name'] = df_city_real['city_name'].str[4:6] -# print(df_city_real) - -file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129' -print(os.listdir(file_dir)) +# excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') +# df_city_real = pd.read_excel(excel_file,sheet_name=0) +# df_city_real = df_city_real[df_city_real['county_name'].isnull()] +# df_city_real['city_name'] = df_city_real['city_name'].str[4:6] +# # print(df_city_real) +# +# file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129' +# print(os.listdir(file_dir)) # 区域明细及偏差率统计 -city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2])) +# city_area_file = pd.ExcelFile(os.path.join(file_dir, os.listdir(file_dir)[2])) # for city in df_city_real['city_name'].drop_duplicates(): # df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期') # df_city_pred.index = pd.to_datetime(df_city_pred.index) @@ -139,12 +141,11 @@ city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2])) # result.to_excel(writer,sheet_name=f'{city}') +# pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期') +# df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal'] - # pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期') - # df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal'] - -city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2]) -excel_file1 = pd.ExcelFile(city_volt_file) +# city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2]) +# excel_file1 = pd.ExcelFile(city_volt_file) # for sheet_name in excel_file1.sheet_names[1:]: # print(sheet_name) @@ -164,7 +165,7 @@ excel_file1 = pd.ExcelFile(city_volt_file) # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: # result.to_excel(wirter,sheet_name=f'{sheet_name}') -industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4])) +# industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4])) # for sheet_name in industry_file.sheet_names[1:]: # # pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True) @@ -186,16 +187,96 @@ industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4])) # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: # result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}') -e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx' -df1 = pd.read_excel(e1,sheet_name=1) -df1.set_index(df1.columns[0],inplace=True) -for sheet_name in industry_file.sheet_names[2:]: - df2 = pd.read_excel(e1,sheet_name=sheet_name) - df2 = df2.set_index(df2.columns[0]) - df1 += df2 -df1['偏差'] = df1['真实值']-df1['预测值'] -df1['偏差率'] = df1['偏差']/df1['真实值'] -df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') -# writer = pd.ExcelWriter(e1,engine='openpyxl') -# df1.to_excel(writer,sheet_name=0) -print(df1) \ No newline at end of file +# e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx' +# df1 = pd.read_excel(e1,sheet_name=1) +# df1.set_index(df1.columns[0],inplace=True) +# for sheet_name in industry_file.sheet_names[2:]: +# df2 = pd.read_excel(e1,sheet_name=sheet_name) +# df2 = df2.set_index(df2.columns[0]) +# df1 += df2 +# df1['偏差'] = df1['真实值']-df1['预测值'] +# df1['偏差率'] = df1['偏差']/df1['真实值'] +# df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') +# # writer = pd.ExcelWriter(e1,engine='openpyxl') +# # df1.to_excel(writer,sheet_name=0) +# print(df1) +import numpy as np + +pd.set_option('display.width', None) + +# 同期发行电量差别统计 +df_fx = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江发行202311-202312v2.xlsx') +df_tq = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江分区202311-202312.xlsx') + +# 市级别 +df_tq_city = df_tq[df_tq['county_name'].isnull()] + +df_tq_city['pt_date'] = pd.to_datetime(df_tq_city['pt_date']) +df_tq_city = df_tq_city[df_tq_city['pt_date'].astype('string').str[:7]=='2023-11'] + +# print(df_tq_city[df_tq_city['city_name']==df_tq_city['city_name'].iloc[0]].set_index('pt_date')['power_sal'].resample('M').sum()) + +# 同期按月汇总 +df_tq_city = pd.DataFrame(df_tq_city['power_sal'].groupby(df_tq_city['city_name']).sum() * 10000) + +df_fx_city = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].isnull()) + & (df_fx['city_name'].notnull())].drop(columns='coountry_name').set_index('city_name') + + +df_city = df_fx_city.join(df_tq_city) + +df_city = df_city.drop(columns='province_name') +df_city['bias'] = (df_city['power_pub'] - df_city['power_sal']) / df_city['power_pub'] + +df_city = df_city.iloc[np.argsort(abs(df_city['bias']))] +# df_city.to_excel('市区域发行同期偏差.xlsx') + +print('------------------------------------------------------------------------') + +# 区县偏差 +df_fx_county = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].notnull()) + & (df_fx['city_name'].notnull())].drop(columns=['province_name']).set_index('coountry_name') +# print(df_fx_county.reset_index().sort_values('coountry_name').drop_duplicates()) + +df_tq_county = df_tq[(df_tq['county_name'].notnull())&(df_tq['pt_date'].astype('string').str[:7]=='2023-11')] + +df_tq_county = pd.DataFrame(df_tq_county['power_sal'].groupby(df_tq_county['county_name']).sum()* 10000) + +print(df_tq_county.sort_index()) +df_county = df_fx_county.join(df_tq_county).sort_index() + +# print(df_county.reset_index().drop_duplicates()) + +df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub'] +# df_county = df_county.iloc[np.argsort(abs(df_county['bias']))] + +# print(df_county.reset_index().drop_duplicates()) +df_county.reset_index(inplace=True) +df_county = df_county[['date_pub','city_name','coountry_name','power_pub','power_sal','bias']] + + + +zjbs_ = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江变损202311-202312.xlsx') +zjbs = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].notnull())][['county_name','region_power']] + + + +df_county = pd.merge(df_county,zjbs,left_on='coountry_name',right_on='county_name',how='left') +df_county.fillna(0,inplace=True) + + +df_county['power_sal'] += df_county['region_power'] +df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub'] +df_county['_'] = abs(df_county['bias']) + +df_county.sort_values(['city_name','_']).drop(columns=['region_power','county_name','_']).to_excel('区县发行同期偏差.xlsx',index=False) + +zjbs_qx = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].isnull())][['city_name','region_power']].set_index('city_name') +print(zjbs_qx) +print(df_city) +df_city = df_city.join(zjbs_qx) +df_city['power_sal'] += df_city['region_power'] +df_city['bias'] = (df_city['power_pub'] - df_city['power_sal'])/df_city['power_pub'] + +print(df_city.drop(columns='region_power')) +df_city.drop(columns='region_power').to_excel('市区域发行同期偏差.xlsx') \ No newline at end of file diff --git a/浙江电压等级电量/prophet_分压电量_10kv.py b/浙江电压等级电量/prophet_分压电量.py similarity index 100% rename from 浙江电压等级电量/prophet_分压电量_10kv.py rename to 浙江电压等级电量/prophet_分压电量.py diff --git a/浙江行业电量/prophet_行业电量.py b/浙江行业电量/prophet_行业电量.py index 965a9d2..31fa80d 100644 --- a/浙江行业电量/prophet_行业电量.py +++ b/浙江行业电量/prophet_行业电量.py @@ -1,58 +1,60 @@ from prophet import Prophet import pandas as pd import os -import datetime import numpy as np def normal(data): high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) - return (data<=high)&(data>=low) + return (data <= high) & (data >= low) -file_dir = './浙江各地市行业电量数据' -# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') -for city in os.listdir(file_dir): - df_city = pd.read_excel(os.path.join(file_dir, city)) - df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10]) +excel_file = r'C:\Users\鸽子\Desktop\北京安徽行业.xlsx' +df = pd.read_excel(excel_file, sheet_name=1) + +print(df.columns) +for city in df['city_name'].drop_duplicates().dropna(): + df_city = df[df['city_name'] == city] df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) list_goal = [] list_industry = [] result_dict = {} + for industry in df_city.columns[3:]: s1 = df_city[['stat_date', industry]] - ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date') + ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-28')].sort_values(by='stat_date') + print(ds_train) ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) df_train = ds_train.copy() df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') - model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model.add_country_holidays(country_name="CN") model.fit(df_train) future = model.make_future_dataframe(periods=3, freq='D') predict = model.predict(future) - print(city[:2],industry) + print(city[-6:], industry) - predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'}) - ds_train.rename(columns={'y':'售电量'},inplace=True) + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'}) + ds_train.rename(columns={'y': '售电量'}, inplace=True) - result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:])) + result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:28], predict[-3:])) result_dict[industry] = list(result['售电量']) + print(result) - with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: - pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2]) + # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + # pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2]) - # df = predict.join(s1.set_index('ds')).loc['2023-8'] - # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] - # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() - # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) - # list_industry.append(industry) + # df = predict.join(s1.set_index('ds')).loc['2023-8'] + # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] + # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() + # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) + # list_industry.append(industry) # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') diff --git a/浙江行业电量/test1.py b/浙江行业电量/test1.py index 96023a3..04b0307 100644 --- a/浙江行业电量/test1.py +++ b/浙江行业电量/test1.py @@ -112,3 +112,4 @@ print(df) print("\n根据条件替换后的数据:") print(df_new) + diff --git a/浙江行业电量/行业电量_输出为3_步长为10.py b/浙江行业电量/行业电量_输出为3_步长为10.py index 53ec20b..eed256e 100644 --- a/浙江行业电量/行业电量_输出为3_步长为10.py +++ b/浙江行业电量/行业电量_输出为3_步长为10.py @@ -26,7 +26,6 @@ class LSTM_Regression(nn.Module): x = x.view(s, b, -1) # 把形状改回来 return x - def create_dataset(data, days_for_train=5) -> (np.array, np.array): dataset_x, dataset_y = [], [] for i in range(len(data) - days_for_train-3): @@ -190,8 +189,8 @@ for city in df['city_name'].drop_duplicates(): print(df1) - with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: - df1.to_excel(writer,sheet_name=f'{city[4:6]}') + # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: + # df1.to_excel(writer,sheet_name=f'{city[4:6]}') print(time.time()-t1) print(result_dict)