You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
2.8 KiB
Python

10 months ago
from prophet import Prophet
import pandas as pd
import os
import datetime
10 months ago
import numpy as np
def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
10 months ago
return (data<=high)&(data>=low)
10 months ago
10 months ago
file_dir = './浙江各地市行业电量数据'
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
10 months ago
for city in os.listdir(file_dir):
df_city = pd.read_excel(os.path.join(file_dir, city))
df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_goal = []
list_industry = []
10 months ago
result_dict = {}
for industry in df_city.columns[3:]:
10 months ago
s1 = df_city[['stat_date', industry]]
ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = ds_train.copy()
10 months ago
df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
10 months ago
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN")
model.fit(df_train)
future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future)
print(city[:2],industry)
10 months ago
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
ds_train.rename(columns={'y':'售电量'},inplace=True)
10 months ago
result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
result_dict[industry] = list(result['售电量'])
10 months ago
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])
10 months ago
# df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y']
# df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
# list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
# list_industry.append(industry)
# df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
# df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
#
# with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
# f.write(f'{city[:2]}\n')
# df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')