|
|
|
@ -8,7 +8,7 @@ import numpy as np
|
|
|
|
|
def normal(data):
|
|
|
|
|
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
|
|
|
|
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
|
|
|
|
|
return data[(data<=high)&(data>=low)]
|
|
|
|
|
return (data<=high)&(data>=low)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
file_dir = './浙江各地市行业电量数据'
|
|
|
|
@ -18,14 +18,14 @@ for city in os.listdir(file_dir):
|
|
|
|
|
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
|
|
|
|
list_goal = []
|
|
|
|
|
list_industry = []
|
|
|
|
|
result_dict = {}
|
|
|
|
|
for industry in df_city.columns[2:]:
|
|
|
|
|
s1 = df_city[['stat_date', industry]]
|
|
|
|
|
s1 = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')]
|
|
|
|
|
s1 = s1.loc[normal(s1[industry]).index]
|
|
|
|
|
s1.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
|
|
|
|
|
df_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')].sort_values(by='stat_date')
|
|
|
|
|
df_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
|
|
|
|
|
|
|
|
|
|
df_train = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
|
|
|
|
|
df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
|
|
|
|
|
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
|
|
|
|
|
# df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
|
|
|
|
|
|
|
|
|
|
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
|
|
|
|
model.add_country_holidays(country_name="CN")
|
|
|
|
@ -33,9 +33,16 @@ for city in os.listdir(file_dir):
|
|
|
|
|
future = model.make_future_dataframe(periods=3, freq='D')
|
|
|
|
|
|
|
|
|
|
predict = model.predict(future)
|
|
|
|
|
predict = predict[['ds', 'yhat']].set_index('ds')
|
|
|
|
|
print(city,industry)
|
|
|
|
|
print(predict.loc['2023-10'])
|
|
|
|
|
|
|
|
|
|
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-10'].rename(columns={'yhat':'售电量'})
|
|
|
|
|
df_train.rename(columns={'y':'售电量'},inplace=True)
|
|
|
|
|
result = pd.concat((df_train.set_index('ds').loc['2023-10'][:28],predict[-3:]))
|
|
|
|
|
result_dict[industry] = list(result['售电量'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
|
|
|
|
|
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-10-01', end=f'2023-10-31', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city)
|
|
|
|
|
|
|
|
|
|
# df = predict.join(s1.set_index('ds')).loc['2023-8']
|
|
|
|
|
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
|
|
|
|
|