You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
4.0 KiB
Python

11 months ago
import xgboost as xgb
import pandas as pd
11 months ago
import numpy as np
11 months ago
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
11 months ago
def season(x):
11 months ago
if str(x)[5:7] in ('04', '10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'):
11 months ago
return 1
else:
11 months ago
return 2
11 months ago
def month(x):
if str(x)[5:7] in ('08','09','10','12','01','02'):
return 1
else:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
11 months ago
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/杭州.xlsx'),index_col='dtdate')
11 months ago
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
11 months ago
data = data.loc[normal(data['售电量']).index]
11 months ago
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
11 months ago
11 months ago
# data['month'] = data.index.strftime('%Y-%m-%d').str[6]
# data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
11 months ago
11 months ago
df_train = data[500:-1]
11 months ago
11 months ago
# df_train = data.loc['2021-01':'2023-08']
11 months ago
df_eval = data.loc['2023-10']
11 months ago
11 months ago
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
11 months ago
y = df_train['售电量']
11 months ago
11 months ago
# best_goal = 1
# best_i = {}
# for i in range(400):
11 months ago
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
11 months ago
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
11 months ago
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
11 months ago
#
# # result_eval.to_csv('asda.csv',encoding='gbk')
# # if abs(goal) < best_goal:
# # best_goal = abs(goal)
# # best_i['best_i'] = i
# # x = goal2
# # print(best_i,best_goal,x)
#
#
#
# # result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# # f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
11 months ago
# model.save_model('hangzhou.bin')
11 months ago
11 months ago
# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
11 months ago
# df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
# df_eval.columns = df_eval.columns.map(lambda x:x.strip())
# df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
# df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
# df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
11 months ago
11 months ago
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('hangzhou.bin')
11 months ago
X_eval = np.array([[24.19,15.30,23,1,0],
[25.1,13.3,23,0,0],
[26.1,11.69,23,0,0]])
print(loaded_model.predict(X_eval))
11 months ago