You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
4.1 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import xgboost as xgb
import pandas as pd
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def hf_season(x):
list1= []
for i in range(1,13):
if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']:
list1.append(i)
return list1
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
def month(x):
if str(x)[5:7] in ('08','09','10','12','01','02'):
return 1
else:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# for i in range(1,13):
# plt.plot(range(len(data['售电量'][f'2022-{i}'])),data['售电量'][f'2022-{i}'])
# plt.show()
print(data['售电量']['2022-9'])
plt.plot(range(len(data['售电量']['2022-7'])),data['售电量']['2022-7'])
plt.plot(range(len(data['售电量']['2022-7']),len(data['售电量']['2022-7'])+len(data['售电量']['2023-7'])),data['售电量']['2023-7'])
# plt.plot(range(len(data['售电量'][['2022-9','2023-9']])),data['售电量'][['2022-9','2023-9']])
plt.show()
# print(hf_season(data.loc['2021']['售电量']))
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
print(data.head(50))
df_eval = data.loc['2023-7']
df_train = data.loc['2021-1':'2023-6']
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
print(data.drop(columns='city_name').corr(method='pearson')['售电量'])
df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
print(y.describe())
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('hangzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('hangzhou.bin')
# model.predict(X_eval)