You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
2.3 KiB
Python

from prophet import Prophet
import pandas as pd
import os
import datetime
import numpy as np
def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
return data[(data<=high)&(data>=low)]
file_dir = './浙江各地市行业电量数据'
for city in os.listdir(file_dir):
df_city = pd.read_excel(os.path.join(file_dir, city))
df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_goal = []
list_industry = []
for industry in df_city.columns[2:]:
s1 = df_city[['stat_date', industry]]
s1 = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')]
s1 = s1.loc[normal(s1[industry]).index]
s1.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
predict = predict[['ds', 'yhat']].set_index('ds')
print(city,industry)
print(predict.loc['2023-10'])
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
# list_industry.append(industry)
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
#
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
# f.write(f'{city[:2]}\n')
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')