输出预测结果
parent
5e3d97c389
commit
198bd29174
@ -0,0 +1,59 @@
|
||||
import pandas as pd
|
||||
pd.set_option('display.width',None)
|
||||
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=0)
|
||||
df['pt_date'] = pd.to_datetime(df['pt_date'])
|
||||
|
||||
# 移动平均
|
||||
for city in df['city_name'].drop_duplicates():
|
||||
|
||||
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
|
||||
|
||||
dict_big = {}
|
||||
dict_ok = {}
|
||||
resut_df = pd.DataFrame({})
|
||||
index_industry = []
|
||||
tq_list = []
|
||||
pred_list = []
|
||||
loss_list = []
|
||||
rate_list = []
|
||||
|
||||
for industry in df_city.columns[2:]:
|
||||
|
||||
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
|
||||
|
||||
|
||||
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
|
||||
|
||||
for date in future:
|
||||
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
|
||||
|
||||
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
|
||||
"result_df为明细数据"
|
||||
print(city[-6:])
|
||||
|
||||
final_df = resut_df.sum()
|
||||
final_df = pd.DataFrame(final_df,columns=['预测值'])
|
||||
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
|
||||
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
|
||||
print(final_df)
|
||||
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
|
||||
# tq_list.append(df_city1[industry].sum())
|
||||
# pred_list.append(df_moving_avg[industry].sum())
|
||||
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
|
||||
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京分压_12月.xlsx', mode='a', if_sheet_exists='replace',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
|
||||
|
||||
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
|
||||
# print(resut_df)
|
||||
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
|
||||
|
||||
# if loss.values >= 0.005:
|
||||
# dict_big[industry] = loss.values[0]
|
||||
# else:
|
||||
# dict_ok[industry] = loss.values[0]
|
||||
# print(len(dict_ok))
|
||||
# print(len(dict_big))
|
@ -0,0 +1,59 @@
|
||||
import pandas as pd
|
||||
pd.set_option('display.width',None)
|
||||
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=0)
|
||||
df['stat_date'] = pd.to_datetime(df['stat_date'])
|
||||
|
||||
# 移动平均
|
||||
for city in df['city_name'].drop_duplicates():
|
||||
|
||||
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
|
||||
|
||||
dict_big = {}
|
||||
dict_ok = {}
|
||||
resut_df = pd.DataFrame({})
|
||||
index_industry = []
|
||||
tq_list = []
|
||||
pred_list = []
|
||||
loss_list = []
|
||||
rate_list = []
|
||||
|
||||
for industry in df_city.columns[2:]:
|
||||
|
||||
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
|
||||
|
||||
|
||||
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
|
||||
|
||||
for date in future:
|
||||
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
|
||||
|
||||
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
|
||||
"result_df为明细数据"
|
||||
print(city[-6:])
|
||||
|
||||
final_df = resut_df.sum()
|
||||
final_df = pd.DataFrame(final_df,columns=['预测值'])
|
||||
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
|
||||
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
|
||||
print(final_df)
|
||||
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
|
||||
# tq_list.append(df_city1[industry].sum())
|
||||
# pred_list.append(df_moving_avg[industry].sum())
|
||||
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
|
||||
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京行业_12月.xlsx', mode='a', if_sheet_exists='replace',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
|
||||
|
||||
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
|
||||
# print(resut_df)
|
||||
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
|
||||
|
||||
# if loss.values >= 0.005:
|
||||
# dict_big[industry] = loss.values[0]
|
||||
# else:
|
||||
# dict_ok[industry] = loss.values[0]
|
||||
# print(len(dict_ok))
|
||||
# print(len(dict_big))
|
@ -0,0 +1,93 @@
|
||||
import pandas as pd
|
||||
import datetime
|
||||
import math
|
||||
import chinese_calendar as cc
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
|
||||
def holiday_work(x):
|
||||
if cc.is_workday(x):
|
||||
return 0
|
||||
if cc.is_holiday(x):
|
||||
return 1
|
||||
|
||||
def normal(nd):
|
||||
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
|
||||
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
|
||||
return nd[(nd < high) & (nd > low)]
|
||||
|
||||
def jq(y, x):
|
||||
a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
|
||||
return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
|
||||
|
||||
jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
|
||||
'大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
|
||||
jq_dict = {}
|
||||
for j in range(2023, 2025):
|
||||
for i in range(24):
|
||||
jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
|
||||
|
||||
ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
|
||||
ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
|
||||
ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
|
||||
|
||||
pd.set_option('display.width', None)
|
||||
df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=3)
|
||||
df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
|
||||
df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
|
||||
df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
|
||||
|
||||
for city in df_qy_ah['city_name'].drop_duplicates():
|
||||
|
||||
df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
|
||||
|
||||
df_ah_city.drop_duplicates(inplace=True)
|
||||
|
||||
df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
|
||||
|
||||
df_ah_city.fillna(method='ffill', inplace=True)
|
||||
df_ah_city['24ST'].fillna('冬至', inplace=True)
|
||||
label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
|
||||
'小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16,
|
||||
'立夏': 17,
|
||||
'立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
|
||||
df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
|
||||
|
||||
df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
|
||||
|
||||
holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
|
||||
holiday_null_s = holiday_null_s.map(holiday_work)
|
||||
holiday_null_s.iloc[-2:] = 3
|
||||
|
||||
df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
|
||||
df_ah_city.set_index('pt_date', inplace=True)
|
||||
df_ = df_ah_city.loc['2023-12']['power_sal']
|
||||
|
||||
df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
|
||||
print(city)
|
||||
print(df_ah_city)
|
||||
|
||||
X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
|
||||
df_ah_city['power_sal'].iloc[-180:-3]
|
||||
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
|
||||
df_ah_city['power_sal'].iloc[-3:]
|
||||
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
|
||||
model.fit(x_train, y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
|
||||
|
||||
print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
|
||||
print(r2_score(y_test, y_pred))
|
||||
|
||||
final_df = pd.DataFrame({'真实值': list(df_.values), '预测值': list(df_.values)[:-3] + list(model.predict(eval_x))},
|
||||
index=df_.index)
|
||||
final_df['偏差率'] = (final_df['真实值'] - final_df['预测值']).sum() / final_df['真实值'].sum()
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
|
||||
print(final_df)
|
||||
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\北京区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer, sheet_name=f'{city}')
|
@ -0,0 +1,74 @@
|
||||
from prophet import Prophet
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def normal(data):
|
||||
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||
return (data <= high) & (data >= low)
|
||||
|
||||
|
||||
excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
|
||||
df = pd.read_excel(excel_file, sheet_name=0)
|
||||
|
||||
for city in df['city_name'].drop_duplicates().dropna():
|
||||
df_city = df[df['city_name'] == city]
|
||||
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
||||
list_real = []
|
||||
list_pred = []
|
||||
list_industry = []
|
||||
result_dict = {}
|
||||
|
||||
for industry in df_city.columns[3:]:
|
||||
s1 = df_city[['stat_date', industry]]
|
||||
|
||||
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
|
||||
|
||||
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
|
||||
|
||||
df_train = ds_train.copy().iloc[:-3]
|
||||
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
|
||||
|
||||
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
||||
model.add_country_holidays(country_name="CN")
|
||||
model.fit(df_train)
|
||||
future = model.make_future_dataframe(periods=3, freq='D')
|
||||
|
||||
predict = model.predict(future)
|
||||
print(city[-6:], industry)
|
||||
|
||||
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
|
||||
ds_train.rename(columns={'y': '售电量'}, inplace=True)
|
||||
|
||||
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
|
||||
result_dict[industry] = list(result['售电量'])
|
||||
result['真实值'] = ds_train.set_index('ds').loc['2023-12']
|
||||
result = result[['真实值','售电量']]
|
||||
result.columns = ['真实值','预测值']
|
||||
|
||||
list_industry.append(industry)
|
||||
list_real.append(result['真实值'].sum())
|
||||
list_pred.append(result['预测值'].sum())
|
||||
|
||||
final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
|
||||
final_df['偏差'] = final_df['真实值']-final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差']/final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
|
||||
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_北京行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
|
||||
|
||||
# df = predict.join(s1.set_index('ds')).loc['2023-8']
|
||||
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
|
||||
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
|
||||
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
|
||||
# list_industry.append(industry)
|
||||
|
||||
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
|
||||
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
|
||||
|
||||
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
|
||||
# f.write(f'{city[:2]}\n')
|
||||
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
|
@ -0,0 +1,62 @@
|
||||
import pandas as pd
|
||||
pd.set_option('display.width',None)
|
||||
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=1)
|
||||
df['pt_date'] = pd.to_datetime(df['pt_date'])
|
||||
|
||||
# 移动平均
|
||||
for city in df['city_name'].drop_duplicates():
|
||||
|
||||
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
|
||||
|
||||
dict_big = {}
|
||||
dict_ok = {}
|
||||
resut_df = pd.DataFrame({})
|
||||
index_industry = []
|
||||
tq_list = []
|
||||
pred_list = []
|
||||
loss_list = []
|
||||
rate_list = []
|
||||
|
||||
|
||||
for industry in df_city.columns[2:]:
|
||||
|
||||
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
|
||||
|
||||
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
|
||||
|
||||
for date in future:
|
||||
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values[0]
|
||||
|
||||
|
||||
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
|
||||
"result_df为明细数据"
|
||||
|
||||
print(city[-6:])
|
||||
|
||||
final_df = resut_df.sum()
|
||||
final_df = pd.DataFrame(final_df,columns=['预测值'])
|
||||
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
|
||||
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
|
||||
print(final_df)
|
||||
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
|
||||
# tq_list.append(df_city1[industry].sum())
|
||||
# pred_list.append(df_moving_avg[industry].sum())
|
||||
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
|
||||
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
|
||||
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽分压_12月.xlsx', mode='a', if_sheet_exists='replace',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
|
||||
|
||||
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
|
||||
# print(resut_df)
|
||||
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
|
||||
|
||||
# if loss.values >= 0.005:
|
||||
# dict_big[industry] = loss.values[0]
|
||||
# else:
|
||||
# dict_ok[industry] = loss.values[0]
|
||||
# print(len(dict_ok))
|
||||
# print(len(dict_big))
|
@ -0,0 +1,59 @@
|
||||
import pandas as pd
|
||||
pd.set_option('display.width',None)
|
||||
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=1)
|
||||
df['stat_date'] = pd.to_datetime(df['stat_date'])
|
||||
|
||||
# 移动平均
|
||||
for city in df['city_name'].drop_duplicates().dropna():
|
||||
|
||||
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
|
||||
|
||||
dict_big = {}
|
||||
dict_ok = {}
|
||||
resut_df = pd.DataFrame({})
|
||||
index_industry = []
|
||||
tq_list = []
|
||||
pred_list = []
|
||||
loss_list = []
|
||||
rate_list = []
|
||||
|
||||
for industry in df_city.columns[2:]:
|
||||
|
||||
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
|
||||
|
||||
|
||||
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
|
||||
|
||||
for date in future:
|
||||
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
|
||||
|
||||
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
|
||||
"result_df为明细数据"
|
||||
print(city[-6:])
|
||||
|
||||
final_df = resut_df.sum()
|
||||
final_df = pd.DataFrame(final_df,columns=['预测值'])
|
||||
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
|
||||
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
|
||||
print(final_df)
|
||||
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
|
||||
# tq_list.append(df_city1[industry].sum())
|
||||
# pred_list.append(df_moving_avg[industry].sum())
|
||||
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
|
||||
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽行业_12月.xlsx', mode='a', if_sheet_exists='replace',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
|
||||
|
||||
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
|
||||
# print(resut_df)
|
||||
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
|
||||
|
||||
# if loss.values >= 0.005:
|
||||
# dict_big[industry] = loss.values[0]
|
||||
# else:
|
||||
# dict_ok[industry] = loss.values[0]
|
||||
# print(len(dict_ok))
|
||||
# print(len(dict_big))
|
@ -0,0 +1,90 @@
|
||||
import pandas as pd
|
||||
import datetime
|
||||
import math
|
||||
import chinese_calendar as cc
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
|
||||
def holiday_work(x):
|
||||
if cc.is_workday(x):
|
||||
return 0
|
||||
if cc.is_holiday(x):
|
||||
return 1
|
||||
def jq(y, x):
|
||||
a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
|
||||
return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
|
||||
|
||||
def normal(nd):
|
||||
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
|
||||
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
|
||||
return nd[(nd < high) & (nd > low)]
|
||||
|
||||
jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
|
||||
'大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
|
||||
label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
|
||||
'小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17,
|
||||
'立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
|
||||
jq_dict = {}
|
||||
for j in range(2023, 2025):
|
||||
for i in range(24):
|
||||
jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
|
||||
|
||||
pd.set_option('display.width', None)
|
||||
df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=2)
|
||||
df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
|
||||
df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
|
||||
df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
|
||||
for city in df_qy_ah['city_name'].drop_duplicates():
|
||||
|
||||
df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
|
||||
|
||||
df_ah_city.drop_duplicates(inplace=True)
|
||||
|
||||
df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
|
||||
df_ah_city.fillna(method='ffill', inplace=True)
|
||||
df_ah_city['24ST'].fillna('冬至', inplace=True)
|
||||
df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
|
||||
|
||||
ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
|
||||
ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
|
||||
ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
|
||||
|
||||
df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
|
||||
|
||||
holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
|
||||
holiday_null_s = holiday_null_s.map(holiday_work)
|
||||
holiday_null_s.iloc[-2:] = 3
|
||||
|
||||
df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
|
||||
df_ah_city.set_index('pt_date', inplace=True)
|
||||
# df_12月真实电量数据
|
||||
df_ = df_ah_city.loc['2023-12']['power_sal']
|
||||
|
||||
# 去除异常值
|
||||
df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
|
||||
print(city)
|
||||
print(df_ah_city)
|
||||
|
||||
X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
|
||||
df_ah_city['power_sal'].iloc[-180:-3]
|
||||
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
|
||||
df_ah_city['power_sal'].iloc[-3:]
|
||||
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
|
||||
model.fit(x_train, y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
|
||||
|
||||
print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
|
||||
print(r2_score(y_test, y_pred))
|
||||
|
||||
final_df = pd.DataFrame({'真实值':list(df_.values),'预测值':list(df_.values)[:-3]+list(model.predict(eval_x))},index=df_.index)
|
||||
final_df['偏差率'] = (final_df['真实值']-final_df['预测值']).sum()/final_df['真实值'].sum()
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
|
||||
print(final_df)
|
||||
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\安徽区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
|
||||
engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer,sheet_name=f'{city}')
|
@ -0,0 +1,74 @@
|
||||
from prophet import Prophet
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def normal(data):
|
||||
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||
return (data <= high) & (data >= low)
|
||||
|
||||
|
||||
excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
|
||||
df = pd.read_excel(excel_file, sheet_name=1)
|
||||
|
||||
for city in df['city_name'].drop_duplicates().dropna():
|
||||
df_city = df[df['city_name'] == city]
|
||||
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
||||
list_real = []
|
||||
list_pred = []
|
||||
list_industry = []
|
||||
result_dict = {}
|
||||
|
||||
for industry in df_city.columns[3:]:
|
||||
s1 = df_city[['stat_date', industry]]
|
||||
|
||||
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
|
||||
|
||||
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
|
||||
|
||||
df_train = ds_train.copy().iloc[:-3]
|
||||
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
|
||||
|
||||
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
||||
model.add_country_holidays(country_name="CN")
|
||||
model.fit(df_train)
|
||||
future = model.make_future_dataframe(periods=3, freq='D')
|
||||
|
||||
predict = model.predict(future)
|
||||
print(city[-6:], industry)
|
||||
|
||||
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
|
||||
ds_train.rename(columns={'y': '售电量'}, inplace=True)
|
||||
|
||||
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
|
||||
result_dict[industry] = list(result['售电量'])
|
||||
result['真实值'] = ds_train.set_index('ds').loc['2023-12']
|
||||
result = result[['真实值','售电量']]
|
||||
result.columns = ['真实值','预测值']
|
||||
|
||||
list_industry.append(industry)
|
||||
list_real.append(result['真实值'].sum())
|
||||
list_pred.append(result['预测值'].sum())
|
||||
|
||||
final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
|
||||
final_df['偏差'] = final_df['真实值']-final_df['预测值']
|
||||
final_df['偏差率'] = final_df['偏差']/final_df['真实值']
|
||||
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
|
||||
|
||||
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_安徽行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
||||
final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
|
||||
|
||||
# df = predict.join(s1.set_index('ds')).loc['2023-8']
|
||||
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
|
||||
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
|
||||
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
|
||||
# list_industry.append(industry)
|
||||
|
||||
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
|
||||
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
|
||||
|
||||
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
|
||||
# f.write(f'{city[:2]}\n')
|
||||
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
|
Loading…
Reference in New Issue