from prophet import Prophet import pandas as pd import os import datetime import numpy as np def normal(data): high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) return (data<=high)&(data>=low) file_dir = './浙江各地市行业电量数据' # df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') for city in os.listdir(file_dir): df_city = pd.read_excel(os.path.join(file_dir, city)) df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10]) df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) list_goal = [] list_industry = [] result_dict = {} for industry in df_city.columns[3:]: s1 = df_city[['stat_date', industry]] ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date') ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) df_train = ds_train.copy() df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model.add_country_holidays(country_name="CN") model.fit(df_train) future = model.make_future_dataframe(periods=3, freq='D') predict = model.predict(future) print(city[:2],industry) predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'}) ds_train.rename(columns={'y':'售电量'},inplace=True) result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:])) result_dict[industry] = list(result['售电量']) with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2]) # df = predict.join(s1.set_index('ds')).loc['2023-8'] # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) # list_industry.append(industry) # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') # # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f: # f.write(f'{city[:2]}\n') # df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')