You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
2.4 KiB
Python

11 months ago
import pandas as pd
from prophet import Prophet
import math
import matplotlib.pyplot as plt
pd.set_option('display.width',None)
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)]
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
df.columns = df.columns.map(lambda x:x.strip())
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True)
print(df.columns)
# print(df.head())
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
df_ct = df[df['org_name']==df['org_name'][0]]
print(df_ct.head())
df_ct['1-10kv'] /= 10000
df_ct['35kv'] /= 10000
df_ct['0.4kv及以下'] /= 10000
s1 = df_ct[['日期','1-10kv']]
s1.dropna(how='any',inplace=True)
s1 = s1.loc[normal(s1['1-10kv']).index]
print(s1)
# plt.plot(range(len(s1)),s1['1-10kv'])
# plt.show()
# 更改列名更改为Prophet指定的列名ds和y
dd = s1.rename(columns={'日期':'ds','1-10kv':'y'})
# 注意Prophet模型对于数据格式有要求日期字段必须是datetime格式这里通过pd.to_datetime来进行转换。
dd['ds'] = pd.to_datetime(dd['ds'],format='%Y-%m-%d')
# 划分数据,划分为训练集和验证集,预测的数据设置为未来一个月
df_train = dd[:1580]
df_test = dd[1580:]
# 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
# 采用中国的假期模式,其余参数均保持默认
model.add_country_holidays(country_name="CN")
model.fit(df_train)
# make_future_dataframe: 作用是告诉模型我们要预测多长时间以及时间的周期是什么。这里设置为30即预测一个月时间的数据。
future = model.make_future_dataframe(periods=30, freq='D')
# 进行预测返回预测的结果forecast
forecast = model.predict(future)
# forecast['additive_terms'] = forecast['weekly'] + forecast['yearly']
# 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
# 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
# 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
print(forecast)