You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pytorch/区域电量19年至今数据.py

174 lines
6.2 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import xgboost as xgb
import pandas as pd
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime
import math
from sklearn.preprocessing import LabelEncoder
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
def season(x):
if str(x)[5:7] in ['07', '08']:
return 2
elif str(x)[5:7] in ['01', '02', '03', '06', '09', '11', '12']:
return 1
elif str(x)[5:7] in['04', '05', '10']:
return 0
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)].index
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州19年至今日电量及气象数据.xlsx',sheet_name=0)
# df_elec = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州19年至今日电量及气象数据.xlsx',sheet_name=1)
# df_elec.columns = df_elec.columns.map(lambda x:x.strip())
# df_elec['售电量'] = df_elec['售电量']/10000
# df.columns = df.columns.map(lambda x:x.strip())
# df = df[['dtdate','tem_max','tem_min']]
# # print(df.head())
# # print(df_elec.head())
#
# merge_df = pd.merge(df_elec,df,left_on='pt_date',right_on='dtdate')[['pt_date','tem_max','tem_min','售电量']]
# merge_df.set_index('pt_date',inplace=True)
# merge_df.index = pd.to_datetime(merge_df.index,format='%Y%m%d')
#
#
# merge_df['month'] = merge_df.index.strftime('%Y-%m-%d').str[5:7]
# merge_df['month'] = merge_df['month'].astype('int')
# merge_df.to_csv('杭州入模数据.csv',encoding='gbk')
data = pd.read_csv(r'杭州入模数据.csv',encoding='gbk')
data.drop_duplicates(subset='pt_date',inplace=True)
data.set_index('pt_date',inplace=True)
data.index = pd.to_datetime(data.index)
print(data.loc['2023-07'])
def jq(y,x):
a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
# print(jq(2020,0))
jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
jq_dict={}
for j in range(2019,2024):
for i in range(24):
jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i]
# print(jq_dict)
data['24ST']=data.index
data['24ST']=data['24ST'].astype('string').map(jq_dict)
data['24ST'].fillna(method='ffill',inplace=True)
data['24ST'].fillna('冬至',inplace=True)
# data为数据集 product_tags为需要编码的特征列(假设为第一列)
le = LabelEncoder()
data['24ST'] = le.fit_transform(data['24ST'])
data = data.loc[normal(data['售电量'])]
data['season'] = data.index.map(season)
print(data['售电量'].describe())
print(data)
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
# data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
# data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
# data = data.loc[normal(data['售电量']).index]
# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022'])
# plt.show()
# # print(hf_season(data.loc['2021']['售电量']))
# data['month'] = data.index.strftime('%Y-%m-%d').str[6]
# data['month'] = data['month'].astype('int')
# data['season'] = data.index.map(season)
# print(data.head(50))
#
df_eval = data.loc['2023-9']
df_train = data.loc['2019-1':'2023-8']
print(len(df_train),len(df_eval))
plt.plot(range(len(data.loc['2019-1':'2023-9'])),data.loc['2019-1':'2023-9'])
plt.show()
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
print(data.corr(method='pearson')['售电量'])
df_train = df_train[['tem_max','tem_min','24ST','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
# low = df['售电量'].describe()['25%'] - 1.5*IQR
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
#
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','season','24ST']]
X_eval = df_eval[['tem_max','tem_min','season','24ST']]
y = df_train['售电量']
print(y.describe())
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('hangzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('hangzhou.bin')
# model.predict(X_eval)