You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.7 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import xgboost as xgb
import pandas as pd
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
import random
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'湖州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('huzhou.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('huzhou.bin')
import numpy as np
X_eval = np.array([[22.2,14.8,23,0,0],
[23.4,15.9,23,1,0],
[22.5,15.6,23,1,0],
[23.8,14.3,23,0,0],
[23.9,14.0,23,0,0]])
print(model.predict(X_eval))