import pandas as pd from prophet import Prophet import math import matplotlib.pyplot as plt import os pd.set_option('display.width',None) def normal(x): high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%']) low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%']) return x[(x<=high)&(x>=low)] df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv') df.columns = df.columns.map(lambda x:x.strip()) df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True) print(df.columns) # print(df.head()) print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns]))) for i in range(30): df_ct = df[df['org_name']==df['org_name'].drop_duplicates().values[i]] # print(df_ct.head()) df_ct['1-10kv'] /= 10000 df_ct['35kv'] /= 10000 df_ct['0.4kv及以下'] /= 10000 s1 = df_ct[['日期','1-10kv']] s1.dropna(how='any',inplace=True) s1 = s1.loc[normal(s1['1-10kv']).index] # plt.plot(range(len(s1)),s1['1-10kv']) # plt.show() # 更改列名,更改为Prophet指定的列名ds和y dd = s1.rename(columns={'日期':'ds','1-10kv':'y'}) # 注意:Prophet模型对于数据格式有要求,日期字段必须是datetime格式,这里通过pd.to_datetime来进行转换。 dd['ds'] = pd.to_datetime(dd['ds']) # 划分数据,划分为训练集和验证集,预测的数据设置为未来一个月 df_train = dd[(dd['ds']>='2019-01-01')&(dd['ds']<='2023-10-31')][:-3] df_test = dd[(dd['ds']>='2019-01-01')&(dd['ds']<='2023-10-31')][-3:] # 数据的变动会受到季节、周、天的影响,存在一定的规律性,因此我们将这三个参数设置为True model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) # 采用中国的假期模式,其余参数均保持默认 model.add_country_holidays(country_name="CN") model.fit(df_train) # make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。这里设置为30,即预测一个月时间的数据。 future = model.make_future_dataframe(periods=3, freq='D') # 进行预测,返回预测的结果forecast forecast = model.predict(future) # forecast['additive_terms'] = forecast['weekly'] + forecast['yearly']; # 有:forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。 # 因此:forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。 # 如果有节假日因素,那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。 # print(forecast) # 测试,把ds列,即data_series列设置为索引列 df_test = df_test.set_index('ds') # 把预测到的数据取出ds列,预测值列yhat,同样把ds列设置为索引列。 forecast = forecast[['ds','yhat']].set_index('ds') print(forecast) # join:按照索引进行连接, # dropna:能够找到DataFrame类型数据的空值(缺失值),将空值所在的行/列删除后,将新的DataFrame作为返回值返回。 df_all = forecast.join(dd.set_index('ds')).dropna() print(df_all.loc['2023-10']) # # 创建一个ExcelWriter对象 # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer: # # 将不同的子文件写入同一个Excel文件的不同工作表 # df_all.to_excel(writer, sheet_name=f'Sheet{i+1}') # df_all.plot() # # 设置左上角小标 # plt.legend(['true', 'yhat']) # plt.show()