You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
4.9 KiB
Python

10 months ago
import numpy as np
11 months ago
import pandas as pd
from prophet import Prophet
import math
import matplotlib.pyplot as plt
10 months ago
import os
10 months ago
from openpyxl import Workbook
11 months ago
pd.set_option('display.width',None)
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)]
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
df.columns = df.columns.map(lambda x:x.strip())
df.drop(columns=['500kv(含330kv)及以上','220kv','110kv(含66kv)','20kv','power_sal'],inplace=True)
print(df.columns)
10 months ago
11 months ago
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
10 months ago
yc_org_list = []
10 months ago
list_fl = []
list_org = []
10 months ago
for city in df[''].drop_duplicates():
df_ct = df[df['']==city]
10 months ago
# wb = Workbook()
# wb.save(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx')
10 months ago
for org in df_ct['org_name'].drop_duplicates():
if org.strip()[-4:] != '供电公司':
continue
df_org = df_ct[df_ct['org_name']==org]
df_org['1-10kv'] /= 10000
df_org['35kv'] /= 10000
df_org['0.4kv及以下'] /= 10000
s1 = df_org[['日期','0.4kv及以下']]
s1.replace(0,np.NaN,inplace=True)
s1.dropna(how='any',inplace=True)
# plt.plot(range(len(s1)),s1['1-10kv'])
# plt.show()
# 更改列名更改为Prophet指定的列名ds和y
dd = s1.rename(columns={'日期':'ds','0.4kv及以下':'y'})
dd['ds'] = pd.to_datetime(dd['ds'])
# 划分数据划分为训练集和验证集预测的数据设置为未来3天
10 months ago
df_train = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][:-3]
10 months ago
df_train = df_train.loc[normal(df_train['y']).index]
if df_train.shape[0] <= 180:
yc_org_list.append(org)
continue
10 months ago
df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:]
10 months ago
# 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
# 采用中国的假期模式,其余参数均保持默认
model.add_country_holidays(country_name="CN")
model.fit(df_train)
# make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳
future = model.make_future_dataframe(periods=3, freq='D')
# 进行预测返回预测的结果forecast
forecast = model.predict(future)
# forecast['additive_terms'] = forecast['weekly'] + forecast['yearly']
# 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
# 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
# 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
# print(forecast)
# 测试把ds列即data_series列设置为索引列
df_test = df_test.set_index('ds')
# 把预测到的数据取出ds列预测值列yhat同样把ds列设置为索引列。
forecast = forecast[['ds','yhat']].set_index('ds')
# join:按照索引进行连接,
# dropna能够找到DataFrame类型数据的空值缺失值将空值所在的行/列删除后将新的DataFrame作为返回值返回。
df_all = forecast.join(dd.set_index('ds')).dropna()
df_all['org_name'] = org
df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True)
df_all = df_all[['org_name','真实值','预测值','偏差率']]
10 months ago
list_org.append(org)
10 months ago
try:
10 months ago
result = df_all.loc['2023-7']
10 months ago
result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
10 months ago
list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\9月0.4kv区县预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
# result.to_excel(writer,sheet_name=f'{org}')
10 months ago
except:
yc_org_list.append(org)
10 months ago
df = pd.DataFrame({'org':list_org,'goal':list_fl})
print(df)
print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False))
# print(yc_org_list)
10 months ago
# # 创建一个ExcelWriter对象
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer:
# # 将不同的子文件写入同一个Excel文件的不同工作表
# df_all.to_excel(writer, sheet_name=f'Sheet{i+1}')
11 months ago
10 months ago
# df_all.plot()
# # 设置左上角小标
# plt.legend(['true', 'yhat'])
# plt.show()
11 months ago
11 months ago