You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.4 KiB
Python

11 months ago
import xgboost as xgb
import pandas as pd
11 months ago
import numpy as np
11 months ago
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
10 months ago
mpl.rcParams['font.sans-serif'] = ['kaiti']
pd.set_option('display.width', None)
11 months ago
11 months ago
def season(x):
11 months ago
if str(x)[5:7] in ('04', '10'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '05', '06', '09', '11', '12'):
11 months ago
return 1
else:
11 months ago
return 2
10 months ago
11 months ago
def month(x):
10 months ago
if str(x)[5:7] in ('08', '09', '10', '12', '01', '02'):
11 months ago
return 1
else:
return 0
10 months ago
11 months ago
def normal(nd):
10 months ago
high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
return nd[(nd < high) & (nd > low)]
11 months ago
10 months ago
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx'))
data['dtdate'] = pd.to_datetime(data['dtdate'], format='%Y-%m-%d')
data['year'] = data['dtdate'].dt.year
data.set_index('dtdate', inplace=True)
11 months ago
data = data.loc[normal(data['售电量']).index]
11 months ago
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = data.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if data.loc[month_index]['售电量'].mean() >= data['售电量'].describe()['75%']:
# list2.append(i)
# elif data.loc[month_index]['售电量'].mean() <= data['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
11 months ago
11 months ago
data['season'] = data.index.map(season)
df_train = data[-180:]
df_eval = data.loc['2023-12']
11 months ago
11 months ago
print(df_train)
X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season', 'year']]
X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season', 'year']]
11 months ago
y = df_train['售电量']
11 months ago
10 months ago
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
11 months ago
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
10 months ago
model.fit(x_train, y_train)
11 months ago
y_pred = model.predict(x_test)
10 months ago
result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
11 months ago
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
11 months ago
eval_pred = model.predict(X_eval)
10 months ago
result_eval = pd.DataFrame({'eval': df_eval['售电量'], 'pred': eval_pred}, index=df_eval['售电量'].index)
11 months ago
10 months ago
goal = (result_eval['eval'][-3:].sum() - result_eval['pred'][-3:].sum()) / result_eval['eval'].sum()
print('goal:', goal)
11 months ago
10 months ago
goal2 = (result_eval['eval'][-23:].sum() - result_eval['pred'][-23:].sum()) / result_eval['eval'].sum()
11 months ago
10 months ago
print('goal2:', goal2)
11 months ago
print(result_eval)
10 months ago
print('r2:', r2_score(y_test, y_pred))
10 months ago
X_eval = np.array([
[13.8, 1.4, 0, 0, 1, 2023],
[10.1, 2.7, 0, 0, 1, 2023],
[15.3, 6.3, 0, 3, 1, 2023],
[12.2, 1.8, 0, 3, 1, 2023]
10 months ago
])
print(model.predict(X_eval))
10 months ago
result = model.predict(X_eval)
result = pd.DataFrame(result, index=['2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31'])
result = pd.concat((result_eval['eval'], result))
result.index = result.index.map(lambda x: str(x)[:10])
result.columns = ['预测值']
10 months ago
print(result)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市区日电量预测_1229.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
result.to_excel(writer, sheet_name='杭州')