输出预测结果

main
鸽子 1 year ago
parent f8969d4f06
commit dfaab93f61

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,48 @@
import pandas as pd
import math
import datetime
import chinese_calendar as cc
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市气象数据 .xlsx')
df.columns = df.columns.map(lambda x:x.strip())
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y%m%d').astype('str')
df['city_name'] = df['city_name'].map(lambda x:x.strip())
df['city_name'] = df['city_name'].str[:-1]
df['dtdate'] = df['dtdate'].map(lambda x:x.strip())
def holiday_work(x):
if cc.is_workday(x):
return 0
if cc.is_holiday(x):
return 1
def jq(y,x):
a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
# print(jq(2023,1))
jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
jq_dict={}
for j in range(2023,2024):
for i in range(24):
jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i]
# print(jq_dict)
df['24ST'] = df['dtdate'].map(jq_dict)
df.fillna(method='ffill',inplace=True)
df['24ST'].fillna('秋分',inplace=True)
df['holiday'] = pd.to_datetime(df['dtdate']).map(holiday_work)
print(df)
jq_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8, '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17, '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
df_elec = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省各地市日电量及分压数据21-23年.xlsx',sheet_name=0)
df_elec.columns = df_elec.columns.map(lambda x:x.strip())
print(df_elec)
df_elec['stat_date'] = pd.to_datetime(df_elec['stat_date']).astype(str)
df_elec['地市'] = df_elec['地市'].map(lambda x:x.strip())
df_elec['stat_date'] = df_elec['stat_date'].map(lambda x:x.strip())
df_final = pd.merge(df,df_elec,left_on=['city_name','dtdate'],right_on=['地市','stat_date'],how='left')
df_final.drop(columns=['stat_date','地市'],inplace=True)
df_final.rename(columns={'power_sal':'售电量'},inplace=True)
df_final['24ST'] = df_final['24ST'].map(jq_dict)
df_final['售电量'] /= 10000
print(df_final)
df_final = df_final[['dtdate','city_name','tem_max','tem_min','holiday','24ST','售电量']]
for city in df_final['city_name'].drop_duplicates():
df_city = df_final[df_final['city_name']==city]
df_city.to_excel(fr'C:\Users\鸽子\Desktop\追加\{city} .xlsx',index=False)

Binary file not shown.

@ -46,14 +46,14 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-1':'2023-8']
df_train = data[450:900]
df_train = data[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
@ -85,7 +85,7 @@ print(goal2)
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'丽水月末3天偏差率{goal},9号-月底偏差率:{goal2}')
# # 保存模型
# model.save_model('lishui.bin')
model.save_model('lishui.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('lishui.bin')

@ -41,14 +41,14 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-8']
df_eval = data.loc['2023-10']
df_train = data[500:850]
df_train = data[500:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
@ -72,6 +72,7 @@ goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_ev
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i

@ -41,8 +41,8 @@ data = data.loc[normal(data['售电量']).index]
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-08']
df_train = data.iloc[450:900]
df_eval = data.loc['2023-10']
df_train = data.iloc[450:-1]
# df_train = data.loc['2021-01':'2023-07']
@ -50,8 +50,8 @@ df_train = data.iloc[450:900]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1

@ -40,15 +40,17 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-09']
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-01':'2023-08']
df_train = data[450:900]
# df_train = data[450:900]
df_train = data.loc['2022-01':'2023-10'][:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1
@ -72,6 +74,7 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
# best_i['best_i'] = i
@ -91,3 +94,5 @@ X_eval = np.array([
[25.8,14.3,23,0,0]])
print(model.predict(X_eval))

@ -50,10 +50,10 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_train = data[500:850]
df_train = data[500:-1]
# df_train = data.loc['2021-01':'2023-08']
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]

@ -43,16 +43,16 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-1':'2023-8']
df_train = data[450:900]
df_train = data[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1

@ -44,14 +44,14 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
# df_train = data.loc['2022-6':'2023-8']
df_train = data[450:900]
df_train = data[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158)
@ -72,7 +72,7 @@ goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_ev
print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# 保存模型
# model.save_model('huzhou.bin')
loaded_model = xgb.XGBRegressor()
@ -80,6 +80,7 @@ loaded_model.load_model('huzhou.bin')
import numpy as np
X_eval = np.array([[23.89,15.5,23,1,0],
[24.5,13.30,23,0,0],
[25.39,13.5,23,0,0]])
[25.39,13.5,23,0,0]
])
print(model.predict(X_eval))

@ -42,9 +42,9 @@ data = data.loc[normal(data['售电量']).index]
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-1':'2023-8']
df_train = data[450:900]
df_train = data[450:-1]
print(len(df_eval),len(df_train),len(data))
@ -52,8 +52,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1
@ -84,7 +84,7 @@ print(goal2)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
print(result_eval)
# 保存模型
# model.save_model('shaoxing.bin')
loaded_model = xgb.XGBRegressor()

@ -41,13 +41,13 @@ data = data.loc[normal(data['售电量']).index]
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-1':'2023-8']
df_train = data.iloc[450:900]
df_train = data.iloc[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
@ -70,9 +70,9 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# model.save_model('zhoushan.bin')
model.save_model('zhoushan.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('zhoushan.bin')
import numpy as np

@ -41,14 +41,14 @@ data = data.loc[normal(data['售电量']).index]
data['season'] = data.index.map(season)
df_eval = data.loc['2023-08']
df_train = data.iloc[450:900]
df_eval = data.loc['2023-10']
df_train = data.iloc[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
@ -66,7 +66,7 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# 保存模型
# model.save_model('quzhou.bin')
loaded_model = xgb.XGBRegressor()

@ -41,15 +41,14 @@ data = data.loc[normal(data['售电量']).index]
# print(list0,list1,list2)
data['season'] = data.index.map(season)
df_eval = data.loc['2023-09']
# df_train = data.loc['2021-01':'2023-08']
df_train = data.iloc[450:900]
df_eval = data.loc['2023-10']
# df_train = data.loc['2021-01':'2023-10'][:-3]
df_train = data.iloc[450:-1]
df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
X = df_train[['tem_max','tem_min','24ST','holiday','season']]
X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
y = df_train['售电量']
# best_goal = 1
@ -74,6 +73,8 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
print(result_eval)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
# best_i['best_i'] = i

@ -2,8 +2,6 @@ import numpy as np
import pandas as pd
import torch
from torch import nn
from multiprocessing import Pool
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
DAYS_FOR_TRAIN = 10
@ -54,80 +52,79 @@ def data_preprocessing(data):
return data
# 拼接数据集
# file_dir = r'C:\Users\user\Desktop\浙江各地市行业电量数据'
# excel = os.listdir(file_dir)[0]
# data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
# data.drop(columns='地市',inplace=True)
# data = data_preprocessing(data)
#
# df = data[data.columns[0]]
# df.dropna(inplace = True)
# dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
#
# for level in data.columns[1:]:
# df = data[level]
# df.dropna(inplace=True)
# x, y = create_dataset(df, DAYS_FOR_TRAIN)
# dataset_x = np.concatenate((dataset_x, x))
# dataset_y = np.concatenate((dataset_y, y))
#
#
# for excel in os.listdir(file_dir)[1:]:
#
# data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col='stat_date')
# data.drop(columns='地市', inplace=True)
# data = data_preprocessing(data)
#
# for level in data.columns:
# df = data[level]
# df.dropna(inplace=True)
# x,y = create_dataset(df,DAYS_FOR_TRAIN)
# dataset_x = np.concatenate((dataset_x,x))
# dataset_y = np.concatenate((dataset_y,y))
#
#
# print(dataset_x.shape,dataset_y.shape)
file_dir = r'C:\Users\user\Desktop\浙江各地市行业电量数据'
excel = os.listdir(file_dir)[0]
data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
data.drop(columns='地市',inplace=True)
data = data_preprocessing(data)
df = data[data.columns[0]]
df.dropna(inplace = True)
dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
for level in data.columns[1:]:
df = data[level]
df.dropna(inplace=True)
x, y = create_dataset(df, DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x, x))
dataset_y = np.concatenate((dataset_y, y))
for excel in os.listdir(file_dir)[1:]:
data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col='stat_date')
data.drop(columns='地市', inplace=True)
data = data_preprocessing(data)
for level in data.columns:
df = data[level]
df.dropna(inplace=True)
x,y = create_dataset(df,DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x,x))
dataset_y = np.concatenate((dataset_y,y))
print(dataset_x.shape,dataset_y.shape)
# # 训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#
# # 标准化到0~1
# max_value = np.max(dataset_x)
# min_value = np.min(dataset_x)
# dataset_x = (dataset_x - min_value) / (max_value - min_value)
# dataset_y = (dataset_y - min_value) / (max_value - min_value)
# print('max_value:',max_value,'min_value:',min_value)
# # 划分训练集和测试集
# train_size = int(len(dataset_x)*0.7)
# train_x = dataset_x[:train_size]
# train_y = dataset_y[:train_size]
#
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
print('max_value:',max_value,'min_value:',min_value)
# 划分训练集和测试集
train_size = int(len(dataset_x)*0.7)
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
# # 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
# train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
# train_y = train_y.reshape(-1, 1, 5)
#
train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
train_y = train_y.reshape(-1, 1, 5)
# # 转为pytorch的tensor对象
# train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
# train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
#
train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
#
#
# # train_loss = []
# # loss_function = nn.MSELoss()
# # optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
# # for i in range(1500):
# # out = model(train_x)
# # loss = loss_function(out, train_y)
# # loss.backward()
# # optimizer.step()
# # optimizer.zero_grad()
# # train_loss.append(loss.item())
# # if i % 100 == 0:
# # print(f'epoch {i+1}: loss:{loss}')
#
# # 保存/读取模型
# # torch.save(model.state_dict(),'hy5.pth')
#
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
for i in range(1500):
out = model(train_x)
loss = loss_function(out, train_y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
if i % 100 == 0:
print(f'epoch {i+1}: loss:{loss}')
# 保存/读取模型
torch.save(model.state_dict(),'hy5.pth')
# model.load_state_dict(torch.load('hy5.pth'))
# # for test
# model = model.eval() # 转换成测试模式

Loading…
Cancel
Save