输出预测结果

main
鸽子 11 months ago
parent 5e3d97c389
commit 198bd29174

@ -0,0 +1,59 @@
import pandas as pd
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=0)
df['pt_date'] = pd.to_datetime(df['pt_date'])
# 移动平均
for city in df['city_name'].drop_duplicates():
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
dict_big = {}
dict_ok = {}
resut_df = pd.DataFrame({})
index_industry = []
tq_list = []
pred_list = []
loss_list = []
rate_list = []
for industry in df_city.columns[2:]:
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
for date in future:
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
"result_df为明细数据"
print(city[-6:])
final_df = resut_df.sum()
final_df = pd.DataFrame(final_df,columns=['预测值'])
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
print(final_df)
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
# tq_list.append(df_city1[industry].sum())
# pred_list.append(df_moving_avg[industry].sum())
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京分压_12月.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
# print(resut_df)
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
# if loss.values >= 0.005:
# dict_big[industry] = loss.values[0]
# else:
# dict_ok[industry] = loss.values[0]
# print(len(dict_ok))
# print(len(dict_big))

@ -0,0 +1,59 @@
import pandas as pd
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=0)
df['stat_date'] = pd.to_datetime(df['stat_date'])
# 移动平均
for city in df['city_name'].drop_duplicates():
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
dict_big = {}
dict_ok = {}
resut_df = pd.DataFrame({})
index_industry = []
tq_list = []
pred_list = []
loss_list = []
rate_list = []
for industry in df_city.columns[2:]:
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
for date in future:
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
"result_df为明细数据"
print(city[-6:])
final_df = resut_df.sum()
final_df = pd.DataFrame(final_df,columns=['预测值'])
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
print(final_df)
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
# tq_list.append(df_city1[industry].sum())
# pred_list.append(df_moving_avg[industry].sum())
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京行业_12月.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
# print(resut_df)
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
# if loss.values >= 0.005:
# dict_big[industry] = loss.values[0]
# else:
# dict_ok[industry] = loss.values[0]
# print(len(dict_ok))
# print(len(dict_big))

@ -0,0 +1,93 @@
import pandas as pd
import datetime
import math
import chinese_calendar as cc
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
def holiday_work(x):
if cc.is_workday(x):
return 0
if cc.is_holiday(x):
return 1
def normal(nd):
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
return nd[(nd < high) & (nd > low)]
def jq(y, x):
a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
'大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
jq_dict = {}
for j in range(2023, 2025):
for i in range(24):
jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
pd.set_option('display.width', None)
df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=3)
df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
for city in df_qy_ah['city_name'].drop_duplicates():
df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
df_ah_city.drop_duplicates(inplace=True)
df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
df_ah_city.fillna(method='ffill', inplace=True)
df_ah_city['24ST'].fillna('冬至', inplace=True)
label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
'小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16,
'立夏': 17,
'立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
holiday_null_s = holiday_null_s.map(holiday_work)
holiday_null_s.iloc[-2:] = 3
df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
df_ah_city.set_index('pt_date', inplace=True)
df_ = df_ah_city.loc['2023-12']['power_sal']
df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
print(city)
print(df_ah_city)
X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
df_ah_city['power_sal'].iloc[-180:-3]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
df_ah_city['power_sal'].iloc[-3:]
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
print(r2_score(y_test, y_pred))
final_df = pd.DataFrame({'真实值': list(df_.values), '预测值': list(df_.values)[:-3] + list(model.predict(eval_x))},
index=df_.index)
final_df['偏差率'] = (final_df['真实值'] - final_df['预测值']).sum() / final_df['真实值'].sum()
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
print(final_df)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\北京区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city}')

@ -0,0 +1,74 @@
from prophet import Prophet
import pandas as pd
import os
import numpy as np
def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
return (data <= high) & (data >= low)
excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
df = pd.read_excel(excel_file, sheet_name=0)
for city in df['city_name'].drop_duplicates().dropna():
df_city = df[df['city_name'] == city]
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_real = []
list_pred = []
list_industry = []
result_dict = {}
for industry in df_city.columns[3:]:
s1 = df_city[['stat_date', industry]]
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = ds_train.copy().iloc[:-3]
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
print(city[-6:], industry)
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
ds_train.rename(columns={'y': '售电量'}, inplace=True)
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
result_dict[industry] = list(result['售电量'])
result['真实值'] = ds_train.set_index('ds').loc['2023-12']
result = result[['真实值','售电量']]
result.columns = ['真实值','预测值']
list_industry.append(industry)
list_real.append(result['真实值'].sum())
list_pred.append(result['预测值'].sum())
final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
final_df['偏差'] = final_df['真实值']-final_df['预测值']
final_df['偏差率'] = final_df['偏差']/final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_北京行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
# list_industry.append(industry)
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
# f.write(f'{city[:2]}\n')
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')

@ -0,0 +1,62 @@
import pandas as pd
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=1)
df['pt_date'] = pd.to_datetime(df['pt_date'])
# 移动平均
for city in df['city_name'].drop_duplicates():
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
dict_big = {}
dict_ok = {}
resut_df = pd.DataFrame({})
index_industry = []
tq_list = []
pred_list = []
loss_list = []
rate_list = []
for industry in df_city.columns[2:]:
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
for date in future:
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values[0]
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
"result_df为明细数据"
print(city[-6:])
final_df = resut_df.sum()
final_df = pd.DataFrame(final_df,columns=['预测值'])
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
print(final_df)
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
# tq_list.append(df_city1[industry].sum())
# pred_list.append(df_moving_avg[industry].sum())
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽分压_12月.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
# print(resut_df)
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
# if loss.values >= 0.005:
# dict_big[industry] = loss.values[0]
# else:
# dict_ok[industry] = loss.values[0]
# print(len(dict_ok))
# print(len(dict_big))

@ -0,0 +1,59 @@
import pandas as pd
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=1)
df['stat_date'] = pd.to_datetime(df['stat_date'])
# 移动平均
for city in df['city_name'].drop_duplicates().dropna():
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
dict_big = {}
dict_ok = {}
resut_df = pd.DataFrame({})
index_industry = []
tq_list = []
pred_list = []
loss_list = []
rate_list = []
for industry in df_city.columns[2:]:
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
for date in future:
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
"result_df为明细数据"
print(city[-6:])
final_df = resut_df.sum()
final_df = pd.DataFrame(final_df,columns=['预测值'])
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
print(final_df)
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
# tq_list.append(df_city1[industry].sum())
# pred_list.append(df_moving_avg[industry].sum())
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽行业_12月.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
# print(resut_df)
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
# if loss.values >= 0.005:
# dict_big[industry] = loss.values[0]
# else:
# dict_ok[industry] = loss.values[0]
# print(len(dict_ok))
# print(len(dict_big))

@ -0,0 +1,90 @@
import pandas as pd
import datetime
import math
import chinese_calendar as cc
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
def holiday_work(x):
if cc.is_workday(x):
return 0
if cc.is_holiday(x):
return 1
def jq(y, x):
a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
def normal(nd):
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
return nd[(nd < high) & (nd > low)]
jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
'大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
'小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17,
'立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
jq_dict = {}
for j in range(2023, 2025):
for i in range(24):
jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
pd.set_option('display.width', None)
df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=2)
df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
for city in df_qy_ah['city_name'].drop_duplicates():
df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
df_ah_city.drop_duplicates(inplace=True)
df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
df_ah_city.fillna(method='ffill', inplace=True)
df_ah_city['24ST'].fillna('冬至', inplace=True)
df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
holiday_null_s = holiday_null_s.map(holiday_work)
holiday_null_s.iloc[-2:] = 3
df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
df_ah_city.set_index('pt_date', inplace=True)
# df_12月真实电量数据
df_ = df_ah_city.loc['2023-12']['power_sal']
# 去除异常值
df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
print(city)
print(df_ah_city)
X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
df_ah_city['power_sal'].iloc[-180:-3]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
df_ah_city['power_sal'].iloc[-3:]
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
print(r2_score(y_test, y_pred))
final_df = pd.DataFrame({'真实值':list(df_.values),'预测值':list(df_.values)[:-3]+list(model.predict(eval_x))},index=df_.index)
final_df['偏差率'] = (final_df['真实值']-final_df['预测值']).sum()/final_df['真实值'].sum()
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
print(final_df)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\安徽区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
engine='openpyxl') as writer:
final_df.to_excel(writer,sheet_name=f'{city}')

@ -0,0 +1,74 @@
from prophet import Prophet
import pandas as pd
import os
import numpy as np
def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
return (data <= high) & (data >= low)
excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
df = pd.read_excel(excel_file, sheet_name=1)
for city in df['city_name'].drop_duplicates().dropna():
df_city = df[df['city_name'] == city]
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_real = []
list_pred = []
list_industry = []
result_dict = {}
for industry in df_city.columns[3:]:
s1 = df_city[['stat_date', industry]]
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = ds_train.copy().iloc[:-3]
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
print(city[-6:], industry)
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
ds_train.rename(columns={'y': '售电量'}, inplace=True)
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
result_dict[industry] = list(result['售电量'])
result['真实值'] = ds_train.set_index('ds').loc['2023-12']
result = result[['真实值','售电量']]
result.columns = ['真实值','预测值']
list_industry.append(industry)
list_real.append(result['真实值'].sum())
list_pred.append(result['预测值'].sum())
final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
final_df['偏差'] = final_df['真实值']-final_df['预测值']
final_df['偏差率'] = final_df['偏差']/final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_安徽行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
# list_industry.append(industry)
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
# f.write(f'{city[:2]}\n')
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')

@ -32,12 +32,12 @@ def normal(nd):
# df = df[['dtdate','tem_max','tem_min']]
# # print(df.head())
# # print(df_elec.head())
#
# merge_df = pd.merge(df_elec,df,left_on='pt_date',right_on='dtdate')[['pt_date','tem_max','tem_min','售电量']]
# merge_df.set_index('pt_date',inplace=True)
# merge_df.index = pd.to_datetime(merge_df.index,format='%Y%m%d')
#
#
# merge_df['month'] = merge_df.index.strftime('%Y-%m-%d').str[5:7]
# merge_df['month'] = merge_df['month'].astype('int')
# merge_df.to_csv('杭州入模数据.csv',encoding='gbk')

@ -9,15 +9,17 @@ df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y%m%d').astype('str')
df['city_name'] = df['city_name'].map(lambda x:x.strip())
df['city_name'] = df['city_name'].str[:-1]
df['dtdate'] = df['dtdate'].map(lambda x:x.strip())
# 判断工作日
def holiday_work(x):
if cc.is_workday(x):
return 0
if cc.is_holiday(x):
return 1
# 判断节气
def jq(y,x):
a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
# print(jq(2023,1))
print(jq(2023,1))
jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
jq_dict={}
for j in range(2023,2024):

@ -63,6 +63,7 @@
import os
from openpyxl import Workbook
import pandas as pd
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1)
# print(df.head())
# print(df[df.columns[2:]].groupby(df['city_name']).sum().T)
@ -94,6 +95,7 @@ file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
# date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
# mpl.rcParams['font.sans-serif']=['kaiti']
# print(df['4.有色金属矿采选业'][:-1])
@ -110,18 +112,18 @@ import matplotlib.dates as mdates
# plt.show()
excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
df_city_real = pd.read_excel(excel_file,sheet_name=0)
df_city_real = df_city_real[df_city_real['county_name'].isnull()]
df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
# print(df_city_real)
file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
print(os.listdir(file_dir))
# excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
# df_city_real = pd.read_excel(excel_file,sheet_name=0)
# df_city_real = df_city_real[df_city_real['county_name'].isnull()]
# df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
# # print(df_city_real)
#
# file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
# print(os.listdir(file_dir))
# 区域明细及偏差率统计
city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2]))
# city_area_file = pd.ExcelFile(os.path.join(file_dir, os.listdir(file_dir)[2]))
# for city in df_city_real['city_name'].drop_duplicates():
# df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期')
# df_city_pred.index = pd.to_datetime(df_city_pred.index)
@ -139,12 +141,11 @@ city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2]))
# result.to_excel(writer,sheet_name=f'{city}')
# pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
# df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
excel_file1 = pd.ExcelFile(city_volt_file)
# city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
# excel_file1 = pd.ExcelFile(city_volt_file)
# for sheet_name in excel_file1.sheet_names[1:]:
# print(sheet_name)
@ -164,7 +165,7 @@ excel_file1 = pd.ExcelFile(city_volt_file)
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name}')
industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
# industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
# for sheet_name in industry_file.sheet_names[1:]:
#
# pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True)
@ -186,16 +187,96 @@ industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}')
e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
df1 = pd.read_excel(e1,sheet_name=1)
df1.set_index(df1.columns[0],inplace=True)
for sheet_name in industry_file.sheet_names[2:]:
df2 = pd.read_excel(e1,sheet_name=sheet_name)
df2 = df2.set_index(df2.columns[0])
df1 += df2
df1['偏差'] = df1['真实值']-df1['预测值']
df1['偏差率'] = df1['偏差']/df1['真实值']
df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
# writer = pd.ExcelWriter(e1,engine='openpyxl')
# df1.to_excel(writer,sheet_name=0)
print(df1)
# e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
# df1 = pd.read_excel(e1,sheet_name=1)
# df1.set_index(df1.columns[0],inplace=True)
# for sheet_name in industry_file.sheet_names[2:]:
# df2 = pd.read_excel(e1,sheet_name=sheet_name)
# df2 = df2.set_index(df2.columns[0])
# df1 += df2
# df1['偏差'] = df1['真实值']-df1['预测值']
# df1['偏差率'] = df1['偏差']/df1['真实值']
# df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
# # writer = pd.ExcelWriter(e1,engine='openpyxl')
# # df1.to_excel(writer,sheet_name=0)
# print(df1)
import numpy as np
pd.set_option('display.width', None)
# 同期发行电量差别统计
df_fx = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江发行202311-202312v2.xlsx')
df_tq = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江分区202311-202312.xlsx')
# 市级别
df_tq_city = df_tq[df_tq['county_name'].isnull()]
df_tq_city['pt_date'] = pd.to_datetime(df_tq_city['pt_date'])
df_tq_city = df_tq_city[df_tq_city['pt_date'].astype('string').str[:7]=='2023-11']
# print(df_tq_city[df_tq_city['city_name']==df_tq_city['city_name'].iloc[0]].set_index('pt_date')['power_sal'].resample('M').sum())
# 同期按月汇总
df_tq_city = pd.DataFrame(df_tq_city['power_sal'].groupby(df_tq_city['city_name']).sum() * 10000)
df_fx_city = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].isnull())
& (df_fx['city_name'].notnull())].drop(columns='coountry_name').set_index('city_name')
df_city = df_fx_city.join(df_tq_city)
df_city = df_city.drop(columns='province_name')
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal']) / df_city['power_pub']
df_city = df_city.iloc[np.argsort(abs(df_city['bias']))]
# df_city.to_excel('市区域发行同期偏差.xlsx')
print('------------------------------------------------------------------------')
# 区县偏差
df_fx_county = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].notnull())
& (df_fx['city_name'].notnull())].drop(columns=['province_name']).set_index('coountry_name')
# print(df_fx_county.reset_index().sort_values('coountry_name').drop_duplicates())
df_tq_county = df_tq[(df_tq['county_name'].notnull())&(df_tq['pt_date'].astype('string').str[:7]=='2023-11')]
df_tq_county = pd.DataFrame(df_tq_county['power_sal'].groupby(df_tq_county['county_name']).sum()* 10000)
print(df_tq_county.sort_index())
df_county = df_fx_county.join(df_tq_county).sort_index()
# print(df_county.reset_index().drop_duplicates())
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
# df_county = df_county.iloc[np.argsort(abs(df_county['bias']))]
# print(df_county.reset_index().drop_duplicates())
df_county.reset_index(inplace=True)
df_county = df_county[['date_pub','city_name','coountry_name','power_pub','power_sal','bias']]
zjbs_ = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江变损202311-202312.xlsx')
zjbs = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].notnull())][['county_name','region_power']]
df_county = pd.merge(df_county,zjbs,left_on='coountry_name',right_on='county_name',how='left')
df_county.fillna(0,inplace=True)
df_county['power_sal'] += df_county['region_power']
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
df_county['_'] = abs(df_county['bias'])
df_county.sort_values(['city_name','_']).drop(columns=['region_power','county_name','_']).to_excel('区县发行同期偏差.xlsx',index=False)
zjbs_qx = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].isnull())][['city_name','region_power']].set_index('city_name')
print(zjbs_qx)
print(df_city)
df_city = df_city.join(zjbs_qx)
df_city['power_sal'] += df_city['region_power']
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal'])/df_city['power_pub']
print(df_city.drop(columns='region_power'))
df_city.drop(columns='region_power').to_excel('市区域发行同期偏差.xlsx')

@ -1,7 +1,6 @@
from prophet import Prophet
import pandas as pd
import os
import datetime
import numpy as np
@ -11,42 +10,45 @@ def normal(data):
return (data <= high) & (data >= low)
file_dir = './浙江各地市行业电量数据'
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
for city in os.listdir(file_dir):
df_city = pd.read_excel(os.path.join(file_dir, city))
df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
excel_file = r'C:\Users\鸽子\Desktop\北京安徽行业.xlsx'
df = pd.read_excel(excel_file, sheet_name=1)
print(df.columns)
for city in df['city_name'].drop_duplicates().dropna():
df_city = df[df['city_name'] == city]
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_goal = []
list_industry = []
result_dict = {}
for industry in df_city.columns[3:]:
s1 = df_city[['stat_date', industry]]
ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-28')].sort_values(by='stat_date')
print(ds_train)
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = ds_train.copy()
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
print(city[:2],industry)
print(city[-6:], industry)
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
ds_train.rename(columns={'y': '售电量'}, inplace=True)
result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:28], predict[-3:]))
result_dict[industry] = list(result['售电量'])
print(result)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
# pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']

@ -112,3 +112,4 @@ print(df)
print("\n根据条件替换后的数据:")
print(df_new)

@ -26,7 +26,6 @@ class LSTM_Regression(nn.Module):
x = x.view(s, b, -1) # 把形状改回来
return x
def create_dataset(data, days_for_train=5) -> (np.array, np.array):
dataset_x, dataset_y = [], []
for i in range(len(data) - days_for_train-3):
@ -190,8 +189,8 @@ for city in df['city_name'].drop_duplicates():
print(df1)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
df1.to_excel(writer,sheet_name=f'{city[4:6]}')
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
# df1.to_excel(writer,sheet_name=f'{city[4:6]}')
print(time.time()-t1)
print(result_dict)

Loading…
Cancel
Save