You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pytorch/北京安徽/北京行业电量_时间序列.py

75 lines
3.3 KiB
Python

from prophet import Prophet
import pandas as pd
import os
import numpy as np
def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
return (data <= high) & (data >= low)
excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
df = pd.read_excel(excel_file, sheet_name=0)
for city in df['city_name'].drop_duplicates().dropna():
df_city = df[df['city_name'] == city]
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_real = []
list_pred = []
list_industry = []
result_dict = {}
for industry in df_city.columns[3:]:
s1 = df_city[['stat_date', industry]]
ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = ds_train.copy().iloc[:-3]
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
print(city[-6:], industry)
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
ds_train.rename(columns={'y': '售电量'}, inplace=True)
result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
result_dict[industry] = list(result['售电量'])
result['真实值'] = ds_train.set_index('ds').loc['2023-12']
result = result[['真实值','售电量']]
result.columns = ['真实值','预测值']
list_industry.append(industry)
list_real.append(result['真实值'].sum())
list_pred.append(result['预测值'].sum())
final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
final_df['偏差'] = final_df['真实值']-final_df['预测值']
final_df['偏差率'] = final_df['偏差']/final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_北京行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
# list_industry.append(industry)
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
# f.write(f'{city[:2]}\n')
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')