You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.4 KiB
Python

import xgboost as xgb
import pandas as pd
import numpy as np
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['kaiti']
pd.set_option('display.width', None)
def season(x):
if str(x)[5:7] in ('04', '10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'):
return 1
else:
return 2
def month(x):
if str(x)[5:7] in ('08', '09', '10', '12', '01', '02'):
return 1
else:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
return nd[(nd < high) & (nd > low)]
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx'))
data['dtdate'] = pd.to_datetime(data['dtdate'], format='%Y-%m-%d')
data['year'] = data['dtdate'].dt.year
data.set_index('dtdate', inplace=True)
data = data.loc[normal(data['售电量']).index]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_train = data[-180:]
df_eval = data.loc['2023-12']
print(df_train)
X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season', 'year']]
X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season', 'year']]
y = df_train['售电量']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval': df_eval['售电量'], 'pred': eval_pred}, index=df_eval['售电量'].index)
goal = (result_eval['eval'][-3:].sum() - result_eval['pred'][-3:].sum()) / result_eval['eval'].sum()
print('goal:', goal)
goal2 = (result_eval['eval'][-23:].sum() - result_eval['pred'][-23:].sum()) / result_eval['eval'].sum()
print('goal2:', goal2)
print(result_eval)
print('r2:', r2_score(y_test, y_pred))
X_eval = np.array([
[13.8, 1.4, 0, 0, 1, 2023],
[10.1, 2.7, 0, 0, 1, 2023],
[15.3, 6.3, 0, 3, 1, 2023],
[12.2, 1.8, 0, 3, 1, 2023]
])
print(model.predict(X_eval))
result = model.predict(X_eval)
result = pd.DataFrame(result, index=['2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31'])
result = pd.concat((result_eval['eval'], result))
result.index = result.index.map(lambda x: str(x)[:10])
result.columns = ['预测值']
print(result)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市区日电量预测_1229.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
result.to_excel(writer, sheet_name='杭州')