You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
3.0 KiB
Python

11 months ago
import pandas as pd
from prophet import Prophet
import math
import matplotlib.pyplot as plt
pd.set_option('display.width',None)
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)]
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
df.columns = df.columns.map(lambda x:x.strip())
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True)
print(df.columns)
# print(df.head())
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
df_ct = df[df['org_name']==df['org_name'][0]]
print(df_ct.head())
df_ct['1-10kv'] /= 10000
df_ct['35kv'] /= 10000
df_ct['0.4kv及以下'] /= 10000
s1 = df_ct[['日期','1-10kv']]
s1.dropna(how='any',inplace=True)
s1 = s1.loc[normal(s1['1-10kv']).index]
print(s1)
# plt.plot(range(len(s1)),s1['1-10kv'])
# plt.show()
# 更改列名更改为Prophet指定的列名ds和y
dd = s1.rename(columns={'日期':'ds','1-10kv':'y'})
# 注意Prophet模型对于数据格式有要求日期字段必须是datetime格式这里通过pd.to_datetime来进行转换。
11 months ago
dd['ds'] = pd.to_datetime(dd['ds'])
11 months ago
# 划分数据,划分为训练集和验证集,预测的数据设置为未来一个月
11 months ago
df_train = dd[:-3]
df_test = dd[-3:]
11 months ago
# 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
# 采用中国的假期模式,其余参数均保持默认
model.add_country_holidays(country_name="CN")
model.fit(df_train)
# make_future_dataframe: 作用是告诉模型我们要预测多长时间以及时间的周期是什么。这里设置为30即预测一个月时间的数据。
11 months ago
future = model.make_future_dataframe(periods=3, freq='D')
11 months ago
# 进行预测返回预测的结果forecast
forecast = model.predict(future)
# forecast['additive_terms'] = forecast['weekly'] + forecast['yearly']
# 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
# 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
# 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
11 months ago
# print(forecast)
# 测试把ds列即data_series列设置为索引列
df_test = df_test.set_index('ds')
# 把预测到的数据取出ds列预测值列yhat同样把ds列设置为索引列。
forecast = forecast[['ds','yhat']].set_index('ds')
# join:按照索引进行连接,
# dropna能够找到DataFrame类型数据的空值缺失值将空值所在的行/列删除后将新的DataFrame作为返回值返回。
df_all = forecast.join(df_test).dropna()
print(df_all)
df_all.plot()
# 设置左上角小标
plt.legend(['true', 'yhat'])
plt.show()
11 months ago