You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
4.3 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import numpy as np
import pandas as pd
import torch
from torch import nn
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader,TensorDataset
class LSTM_Regression(nn.Module):
def __init__(self, input_size, hidden_size, output_size=1, num_layers=2):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, _x):
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size)
x = x.view(s * b, h)
x = self.fc(x)
x = x.view(s, b, -1) # 把形状改回来
return x
def create_dataset(data, days_for_train=5) -> (np.array, np.array):
dataset_x, dataset_y = [], []
for i in range(len(data) - days_for_train):
_x = data[i:(i + days_for_train)]
dataset_x.append(_x)
dataset_y.append(data[i + days_for_train])
return (np.array(dataset_x), np.array(dataset_y))
def inverse_transform_col(scaler,y,n_col):
'''scaler是对包含多个feature的X拟合的,y对应其中一个feature,n_col为y在X中对应的列编号.返回y的反归一化结果'''
y = y.copy()
y -= scaler.min_[n_col]
y /= scaler.scale_[n_col]
return y
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
print(data.columns)
data.columns = data.columns.map(lambda x: x.strip())
data.drop(columns='city_name',inplace=True)
# 标准化到0~1
scaler = MinMaxScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(data), columns=data.columns,index=data.index).astype(float)
# 划分训练集和测试集
x_all = df_normalized.drop(columns='售电量').loc['2021-1':'2023-9']
y_all = df_normalized['售电量'].loc['2021-1':'2023-9']
x_train = df_normalized.drop(columns='售电量').loc['2021-1':'2023-8']
y_train = df_normalized['售电量'].loc['2021-1':'2023-8']
eval = df_normalized.loc['2023-9']
x_eval = eval.drop(columns='售电量')
y_eval = eval['售电量']
# 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
x_train = np.array(x_train.values).reshape(-1, 1, 13)
y_train = np.array(y_train.values).reshape(-1, 1, 1)
# 转为pytorch的tensor对象
x_train = torch.from_numpy(x_train).to(device).type(torch.float32)
y_train = torch.from_numpy(y_train).to(device).type(torch.float32)
x_eval = torch.from_numpy(x_eval.values).to(device).type(torch.float32)
model = LSTM_Regression(13, 16, output_size=1, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
ds = TensorDataset(x_train,y_train)
dl = DataLoader(ds,batch_size=3,shuffle=True)
for i in range(300):
for j,(x,y) in enumerate(dl):
x,y = x.to(device),y.to(device)
out = model(x)
loss = loss_function(out, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
if i%100 == 0:
print(f'epoch:{i+1}{j}次loss:{loss}')
# 保存模型
torch.save(model.state_dict(),'lstm.pth')
# torch.save(model.state_dict(),os.path.join(model_save_dir,model_file))
model.load_state_dict(torch.load('lstm.pth'))
# for test
model = model.eval() # 转换成测试模式
# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
x_eval = x_eval.reshape(-1, 1, 13) # (seq_size, batch_size, feature_size)
pred_test = model(x_eval) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1).cpu().data.numpy()
print(x_eval.shape,pred_test.shape)
# 反归一化
pred_test = inverse_transform_col(scaler,pred_test,-1)
pred_test = pred_test.reshape(-1)
y_eval = inverse_transform_col(scaler,y_eval,-1)
# 打印指标
print(abs(pred_test - y_eval).mean() /y_eval.mean())
result_eight = pd.DataFrame({'pred_test': pred_test, 'real': y_eval})
target = (result_eight['pred_test'][-3:].sum() - result_eight['real'][-3:].sum()) / result_eight[
'real'].sum()
print(result_eight)
print(target)