输出预测结果
parent
e5ecbf7bb8
commit
71bc236f76
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,61 @@
|
|||||||
|
from prophet import Prophet
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def normal(data):
|
||||||
|
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||||
|
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||||
|
return (data<=high)&(data>=low)
|
||||||
|
|
||||||
|
|
||||||
|
file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据'
|
||||||
|
|
||||||
|
for city in os.listdir(file_dir):
|
||||||
|
df_city = pd.read_excel(os.path.join(file_dir, city))
|
||||||
|
# df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
|
||||||
|
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
||||||
|
list_goal = []
|
||||||
|
list_industry = []
|
||||||
|
result_dict = {}
|
||||||
|
for level in df_city.columns[2:]:
|
||||||
|
s1 = df_city[['stat_date', level]]
|
||||||
|
|
||||||
|
ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
|
||||||
|
|
||||||
|
ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True)
|
||||||
|
|
||||||
|
df_train = ds_train.copy()
|
||||||
|
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
|
||||||
|
|
||||||
|
|
||||||
|
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
||||||
|
model.add_country_holidays(country_name="CN")
|
||||||
|
model.fit(df_train)
|
||||||
|
future = model.make_future_dataframe(periods=3, freq='D')
|
||||||
|
|
||||||
|
predict = model.predict(future)
|
||||||
|
print(city[1:3],level)
|
||||||
|
|
||||||
|
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
|
||||||
|
ds_train.rename(columns={'y':'售电量'},inplace=True)
|
||||||
|
|
||||||
|
result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
|
||||||
|
result_dict[level] = list(result['售电量'])
|
||||||
|
|
||||||
|
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
||||||
|
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3])
|
||||||
|
|
||||||
|
# df = predict.join(s1.set_index('ds')).loc['2023-8']
|
||||||
|
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
|
||||||
|
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
|
||||||
|
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
|
||||||
|
# list_industry.append(industry)
|
||||||
|
|
||||||
|
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
|
||||||
|
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
|
||||||
|
#
|
||||||
|
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
|
||||||
|
# f.write(f'{city[:2]}\n')
|
||||||
|
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
|
@ -0,0 +1,61 @@
|
|||||||
|
from prophet import Prophet
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def normal(data):
|
||||||
|
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||||
|
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
||||||
|
return (data<=high)&(data>=low)
|
||||||
|
|
||||||
|
|
||||||
|
file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据'
|
||||||
|
|
||||||
|
for city in os.listdir(file_dir):
|
||||||
|
df_city = pd.read_excel(os.path.join(file_dir, city))
|
||||||
|
# df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
|
||||||
|
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
||||||
|
list_goal = []
|
||||||
|
list_industry = []
|
||||||
|
result_dict = {}
|
||||||
|
for level in df_city.columns[2:]:
|
||||||
|
s1 = df_city[['stat_date', level]]
|
||||||
|
|
||||||
|
ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
|
||||||
|
|
||||||
|
ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True)
|
||||||
|
|
||||||
|
df_train = ds_train.copy()
|
||||||
|
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
|
||||||
|
|
||||||
|
|
||||||
|
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
||||||
|
model.add_country_holidays(country_name="CN")
|
||||||
|
model.fit(df_train)
|
||||||
|
future = model.make_future_dataframe(periods=3, freq='D')
|
||||||
|
|
||||||
|
predict = model.predict(future)
|
||||||
|
print(city[1:3],level)
|
||||||
|
|
||||||
|
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
|
||||||
|
ds_train.rename(columns={'y':'售电量'},inplace=True)
|
||||||
|
|
||||||
|
result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
|
||||||
|
result_dict[level] = list(result['售电量'])
|
||||||
|
|
||||||
|
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
||||||
|
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3])
|
||||||
|
|
||||||
|
# df = predict.join(s1.set_index('ds')).loc['2023-8']
|
||||||
|
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
|
||||||
|
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
|
||||||
|
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
|
||||||
|
# list_industry.append(industry)
|
||||||
|
|
||||||
|
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
|
||||||
|
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
|
||||||
|
#
|
||||||
|
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
|
||||||
|
# f.write(f'{city[:2]}\n')
|
||||||
|
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
|
@ -0,0 +1,47 @@
|
|||||||
|
import pandas as pd
|
||||||
|
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=1)
|
||||||
|
df['pt_date'] = pd.to_datetime(df['pt_date'])
|
||||||
|
|
||||||
|
# 移动平均
|
||||||
|
|
||||||
|
dict_big = {}
|
||||||
|
dict_ok = {}
|
||||||
|
|
||||||
|
# for city in df['city_name'].drop_duplicates():
|
||||||
|
#
|
||||||
|
# df_city1 = df[(df['city_name'] == city) & (df['county_name'].isnull())].set_index('pt_date').loc['2023-11']
|
||||||
|
# resut_df = pd.DataFrame({})
|
||||||
|
# index_level = []
|
||||||
|
# tq_list = []
|
||||||
|
# pred_list = []
|
||||||
|
# loss_list = []
|
||||||
|
# rate_list = []
|
||||||
|
# for level in df_city1.columns[2:]:
|
||||||
|
#
|
||||||
|
# index_level.append(level)
|
||||||
|
#
|
||||||
|
# df_moving_avg = pd.DataFrame(df_city1[:-3][level], index=df_city1[:-3].index)
|
||||||
|
# future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D')
|
||||||
|
#
|
||||||
|
# for date in future:
|
||||||
|
# df_moving_avg.loc[date, level] = df_moving_avg[-3:].mean().values
|
||||||
|
# loss = (df_city1[level].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[level].sum()
|
||||||
|
# tq_list.append(df_city1[level].sum())
|
||||||
|
# pred_list.append(df_moving_avg[level].sum())
|
||||||
|
# loss_list.append(df_city1[level].sum()-df_moving_avg[level].sum())
|
||||||
|
# rate_list.append((df_city1[level].sum()-df_moving_avg[level].sum())/df_city1[level].sum())
|
||||||
|
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_level)
|
||||||
|
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
||||||
|
# resut_df.to_excel(writer,sheet_name=f'{city[4:6]}')
|
||||||
|
|
||||||
|
excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx')
|
||||||
|
df1 = pd.read_excel(excel_file,sheet_name=1)
|
||||||
|
df1.set_index(df1.columns[0],inplace=True)
|
||||||
|
for sheet in excel_file.sheet_names[2:]:
|
||||||
|
df = pd.read_excel(excel_file,sheet_name=sheet)
|
||||||
|
df.set_index(df.columns[0],inplace=True)
|
||||||
|
df1 += df
|
||||||
|
df1['偏差'] = df1['同期电量']-df1['预测电量']
|
||||||
|
df1['偏差率'] = df1['偏差']/df1['同期电量']
|
||||||
|
df1.to_excel('移动平均_11月分压汇总.xlsx')
|
||||||
|
print(df1)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,42 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=2)
|
||||||
|
df['stat_date'] = pd.to_datetime(df['stat_date'])
|
||||||
|
|
||||||
|
|
||||||
|
# 移动平均
|
||||||
|
city = df['city_name'].iloc[0]
|
||||||
|
print(city)
|
||||||
|
df_city1 = df[df['city_name'] == city].set_index('stat_date').loc['2023-11']
|
||||||
|
|
||||||
|
dict_big = {}
|
||||||
|
dict_ok = {}
|
||||||
|
resut_df = pd.DataFrame({})
|
||||||
|
index_industry = []
|
||||||
|
tq_list = []
|
||||||
|
pred_list = []
|
||||||
|
loss_list = []
|
||||||
|
rate_list = []
|
||||||
|
for industry in df_city1.columns[1:]:
|
||||||
|
index_industry.append(industry)
|
||||||
|
|
||||||
|
df_moving_avg = pd.DataFrame(df_city1[:-3][industry], index=df_city1[:-3].index)
|
||||||
|
future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D')
|
||||||
|
|
||||||
|
for date in future:
|
||||||
|
df_moving_avg.loc[date, industry] = df_moving_avg[-3:].mean().values
|
||||||
|
loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
|
||||||
|
tq_list.append(df_city1[industry].sum())
|
||||||
|
pred_list.append(df_moving_avg[industry].sum())
|
||||||
|
loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
|
||||||
|
rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
|
||||||
|
resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
|
||||||
|
print(resut_df)
|
||||||
|
resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
|
||||||
|
|
||||||
|
# if loss.values >= 0.005:
|
||||||
|
# dict_big[industry] = loss.values[0]
|
||||||
|
# else:
|
||||||
|
# dict_ok[industry] = loss.values[0]
|
||||||
|
# print(len(dict_ok))
|
||||||
|
# print(len(dict_big))
|
Loading…
Reference in New Issue