You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
5.0 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import numpy as np
import pandas as pd
from prophet import Prophet
import math
import matplotlib.pyplot as plt
import os
from openpyxl import Workbook
pd.set_option('display.width',None)
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return (x<=high)&(x>=low)
df = pd.read_csv(r'C:\Users\鸽子\Desktop\浙江各区县数据(2).csv')
df.columns = df.columns.map(lambda x:x.strip())
df.dropna(subset=['city_name','county_name'],inplace=True)
print(df.info())
print(df.columns)
print(dict(zip(df.columns,[(df[x]==0).sum()/len(df) for x in df.columns])))
yc_org_list = []
list_fl = []
list_org = []
list1 = []
for city in df['city_name'].drop_duplicates():
wb = Workbook()
wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx')
for org in df['county_name'].drop_duplicates():
if org.strip()[-4:] != '供电公司':
continue
df_org = df[df['county_name']==org]
city = df_org['city_name'].iloc[0]
df_result = pd.DataFrame({})
for level in df_org.columns[3:]:
s1 = df_org[['pt_date',level]]
s1.replace(0,np.NaN,inplace=True)
s1.dropna(how='any',inplace=True)
# 更改列名更改为Prophet指定的列名ds和y
dd = s1.rename(columns={'pt_date':'ds',level:'y'})
dd['ds'] = dd['ds'].map(lambda x:x.strip())
dd['ds'] = pd.to_datetime(dd['ds'])
dd.drop_duplicates(inplace=True)
# 划分数据划分为训练集和验证集预测的数据设置为未来4天
df_train = dd[(dd['ds']>='2023-01-01')&(dd['ds']<='2023-11-30')]
# df_train = df_train.loc[normal(df_train['y']).index]
df_train['y'] = df_train['y'].where(normal(df_train['y']),other=np.nan).bfill()
if df_train.shape[0] <= 90:
yc_org_list.append(org)
continue
# df_test = dd[(dd['ds']>='2022-01-01')&(dd['ds']<='2023-07-31')][-3:]
# 数据的变动会受到季节、周、天的影响存在一定的规律性因此我们将这三个参数设置为True
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
# 采用中国的假期模式,其余参数均保持默认
model.add_country_holidays(country_name="CN")
model.fit(df_train)
# make_future_dataframe: 作用是告诉模型我们要预测多长时间,以及时间的周期是什么。生成一个时间戳
future = model.make_future_dataframe(periods=4, freq='D')
# 进行预测返回预测的结果forecast
forecast = model.predict(future)
# forecast['additive_terms'] = forecast['weekly'] + forecast['yearly']
# 有forecast['yhat'] = forecast['trend'] + forecast['additive_terms'] 。
# 因此forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly']。
# 如果有节假日因素那么就会有forecast['yhat'] = forecast['trend'] +forecast['weekly'] + forecast['yearly'] + forecast['holidays']。
# 测试把ds列即data_series列设置为索引列
# df_test = df_test.set_index('ds')
# 把预测到的数据取出ds列预测值列yhat同样把ds列设置为索引列。
forecast = forecast[['ds','yhat']].set_index('ds').sort_index(ascending=True).loc['2023-11']
# 将预测列前25天替换为真实值
forecast.loc['2023-11'][:25] = dd.set_index('ds').loc['2023-11'][:25]
if len(forecast) < 334:
list1.append(org)
# join:按照索引进行连接,
forecast.columns = [level]
df_result = pd.concat([df_result,forecast],axis=1)
# df_all = forecast.join(dd.set_index('ds')).dropna()
# df_all['org_name'] = org
# df_all['偏差率'] = (df_all['y'] - df_all['yhat'])/df_all['y']
# df_all.rename(columns={'y':'真实值','yhat':'预测值'},inplace=True)
# df_all = df_all[['org_name','真实值','预测值','偏差率']]
list_org.append(org)
# result = df_all.loc['2023-7']
# result['goal'] = (result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum()
# list_fl.append((result['真实值'] - result['预测值'])[-3:].sum()/result['真实值'].sum())
with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压预测\{city}.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
df_result.to_excel(writer,sheet_name=f'{org}')
print(yc_org_list)
df = pd.DataFrame({'org':list_org,'goal':list_fl})
print(df)
print(df['goal'].value_counts(bins=[-0.05,-0.01,-0.005,0, 0.005, 0.01, 0.02,0.05],sort=False))
# # 创建一个ExcelWriter对象
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\output.xlsx',mode='a',if_sheet_exists='replace') as writer:
# # 将不同的子文件写入同一个Excel文件的不同工作表
# df_all.to_excel(writer, sheet_name=f'Sheet{i+1}')
# df_all.plot()
# # 设置左上角小标
# plt.legend(['true', 'yhat'])
# plt.show()