|
|
@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
from prophet import Prophet
|
|
|
|
|
|
|
|
import math
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pd.set_option('display.width',None)
|
|
|
|
|
|
|
|
def normal(x):
|
|
|
|
|
|
|
|
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
|
|
|
|
|
|
|
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
|
|
|
|
|
|
|
return x[(x<=high)&(x>=low)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
|
|
|
|
|
|
|
|
df.columns = df.columns.map(lambda x:x.strip())
|
|
|
|
|
|
|
|
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True)
|
|
|
|
|
|
|
|
print(df.columns)
|
|
|
|
|
|
|
|
# print(df.head())
|
|
|
|
|
|
|
|
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_ct = df[df['org_name']==df['org_name'][0]]
|
|
|
|
|
|
|
|
print(df_ct.head())
|
|
|
|
|
|
|
|
df_ct['1-10kv'] /= 10000
|
|
|
|
|
|
|
|
df_ct['35kv'] /= 10000
|
|
|
|
|
|
|
|
df_ct['0.4kv及以下'] /= 10000
|
|
|
|
|
|
|
|
s1 = df_ct[['日期','1-10kv']]
|
|
|
|
|
|
|
|
s1.dropna(how='any',inplace=True)
|
|
|
|
|
|
|
|
s1 = s1.loc[normal(s1['1-10kv']).index]
|
|
|
|
|
|
|
|
print(s1)
|
|
|
|
|
|
|
|
# plt.plot(range(len(s1)),s1['1-10kv'])
|
|
|
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 更改列名,更改为Prophet指定的列名ds和y
|
|
|
|
|
|
|
|
dd = s1.rename(columns={'日期':'ds','1-10kv':'y'})
|
|
|
|
|
|
|
|
# 注意:Prophet模型对于数据格式有要求,日期字段必须是datetime格式,这里通过pd.to_datetime来进行转换。
|
|
|
|
|
|
|
|
dd['ds'] = pd.to_datetime(dd['ds'],format='%Y-%m-%d')
|
|
|
|
|
|
|
|
# 划分数据,划分为训练集和验证集,预测的数据设置为未来一个月
|
|
|
|
|
|
|
|
df_train = dd[:1580]
|
|
|
|
|
|
|
|
df_test = dd[1580:]
|
|
|
|
|
|
|
|
# 数据的变动会受到季节、周、天的影响,存在一定的规律性,因此我们将这三个参数设置为True
|
|
|
|
|
|
|
|
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
|
|
|
|
|
|
|
|
# 采用中国的假期模式,其余参数均保持默认
|
|
|
|
|
|
|
|
model.add_country_holidays(country_name="CN")
|
|
|
|
|
|
|
|
model.fit(df_train)
|
|
|
|
|
|
|
|
# make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。这里设置为30,即预测一个月时间的数据。
|
|
|
|
|
|
|
|
future = model.make_future_dataframe(periods=30, freq='D')
|
|
|
|
|
|
|
|
# 进行预测,返回预测的结果forecast
|
|
|
|
|
|
|
|
forecast = model.predict(future)
|
|
|
|
|
|
|
|
# forecast['additive_terms'] = forecast['weekly'] + forecast['yearly'];
|
|
|
|
|
|
|
|
# 有:forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
|
|
|
|
|
|
|
|
# 因此:forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
|
|
|
|
|
|
|
|
# 如果有节假日因素,那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
|
|
|
|
|
|
|
|
print(forecast)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|