You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

149 lines
5.0 KiB
Python

11 months ago
import xgboost as xgb
import pandas as pd
11 months ago
import numpy as np
11 months ago
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
11 months ago
def hf_season(x):
list1= []
for i in range(1,13):
if x.loc[f'2021-{i}'].mean() >= x.describe()['75%']:
list1.append(i)
return list1
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
def month(x):
if str(x)[5:7] in ('08','09','10','12','01','02'):
return 1
else:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
11 months ago
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
11 months ago
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
11 months ago
data = data.loc[normal(data['售电量']).index]
11 months ago
# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022'])
# plt.show()
11 months ago
# print(hf_season(data.loc['2021']['售电量']))
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
11 months ago
print(data.tail(50))
11 months ago
11 months ago
df_eval = data.loc['2022-9':'2023-9']
df_train = data.loc['2021-1':'2022-8']
11 months ago
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
11 months ago
print(data.drop(columns='city_name').corr(method='pearson')['售电量'])
11 months ago
11 months ago
df_train = df_train[['tem_max','tem_min','24ST','rh','rh_max','prs','prs_max','prs_min','售电量','month','holiday','season']]
11 months ago
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
11 months ago
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
11 months ago
y = df_train['售电量']
11 months ago
print(y.describe())
11 months ago
# best_goal = 1
# best_i = {}
# for i in range(400):
11 months ago
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
11 months ago
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
11 months ago
# eval_pred = model.predict(X_eval)
#
# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
#
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
#
# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
# print('goal:',goal)
#
# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
#
# print('goal2:',goal2)
# print(result_eval)
# print('r2:',r2_score(y_test,y_pred))
#
# # result_eval.to_csv('asda.csv',encoding='gbk')
# # if abs(goal) < best_goal:
# # best_goal = abs(goal)
# # best_i['best_i'] = i
# # x = goal2
# # print(best_i,best_goal,x)
#
#
#
# # result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# # f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('hangzhou.bin')
11 months ago
11 months ago
# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
11 months ago
11 months ago
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
11 months ago
print(df_hangzhou)
11 months ago
11 months ago
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('hangzhou.bin')
# X_eval = np.array([[26.1,16.1,23,0,0],
# [24.5,14.6,23,1,0],
# [24.0,15.2,23,1,0],
# [22.7,14.9,23,0,0],
# [24.1,13.4,23,0,0]])
#
# print(loaded_model.predict(X_eval))
11 months ago