You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

187 lines
6.8 KiB
Python

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
import os
from torch.utils.data import TensorDataset,DataLoader
import datetime
torch.manual_seed(42)
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # 解决OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
pd.set_option('display.width',None)
class LSTM(nn.Module):
def __init__(self,input_size,hidden_size,output_size,num_layers=3):
super().__init__()
self.lstm = nn.LSTM(input_size,hidden_size,num_layers)
self.fc1 = nn.Linear(hidden_size,64)
self.fc2 = nn.Linear(64,128)
self.fc3 = nn.Linear(128, output_size)
self.ReLu = nn.ReLU()
self.dropout = nn.Dropout()
def forward(self,x):
output,_ = self.lstm(x)
s,b,h = output.shape
output = output.reshape(-1,h)
output = self.ReLu(self.fc1(output))
output = self.ReLu(self.fc2(output))
output = self.fc3(output)
return output
def create_data(df_level,volt_level):
dataset_x = []
dataset_y = []
# 按月份分组
grouped = df_level.groupby(df_level['stat_date'].dt.to_period('M'))
# 遍历每个月的数据
for name, group in grouped:
if len(group) == 31:
dataset_x.append(list(group[volt_level].values[1:28]))
dataset_y.append(list(group[volt_level].values[-3:]))
if len(group) == 30:
dataset_x.append(list(group[volt_level].values[:27]))
dataset_y.append(list(group[volt_level].values[-3:]))
if len(group) == 28:
fst = group[volt_level].values[0]
dataset_x.append([fst,fst,fst]+list(group[volt_level].values[1:25]))
dataset_y.append(list(group[volt_level].values[-3:]))
else:
fst = group[volt_level].values[0]
if len([fst, fst]+list(group[volt_level].values[1:26])) != 27:
break
dataset_x.append([fst, fst]+list(group[volt_level].values[1:26]))
dataset_y.append(list(group[volt_level].values[-3:]))
return np.array(dataset_x),np.array(dataset_y)
# 创建数据集
file_dir = './浙江各地市分电压日电量数据'
print(os.listdir(file_dir))
city1 = os.listdir(file_dir)[0]
df_city = pd.read_excel(os.path.join(file_dir,city1)).drop(columns='地市')
df_city = df_city[['stat_date','1-10kv','110kv(含66kv)','35kv']]
df_city[['1-10kv','110kv(含66kv)','35kv']] /= 10000
df_city.stat_date = pd.to_datetime(df_city.stat_date)
volt_level = '1-10kv'
df_level = df_city[['stat_date',volt_level]]
dataset_x,dataset_y = create_data(df_level,volt_level)
for volt_level in df_city.columns[2:]:
df_level = df_city[['stat_date',volt_level]]
x,y = create_data(df_level,volt_level)
dataset_x = np.concatenate([dataset_x,x])
dataset_y = np.concatenate([dataset_y,y])
for excel in os.listdir(file_dir)[1:]:
df_city = pd.read_excel(os.path.join(file_dir, excel)).drop(columns='地市')
df_city = df_city[['stat_date', '1-10kv', '110kv(含66kv)', '35kv']]
df_city[['1-10kv', '110kv(含66kv)', '35kv']] /= 10000
df_city.stat_date = pd.to_datetime(df_city.stat_date)
for volt_level in df_city.columns[1:]:
df_level = df_city[['stat_date', volt_level]]
x, y = create_data(df_level, volt_level)
dataset_x = np.concatenate([dataset_x, x])
dataset_y = np.concatenate([dataset_y, y])
print(dataset_x.shape,dataset_y.shape)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 标准化到0~1
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
print(max_value,min_value)
print(np.max(dataset_x),np.min(dataset_x),np.max(dataset_y),np.min(dataset_y))
# 划分训练集和测试集
train_size = int(len(dataset_x)*0.8)
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
eval_x = dataset_x[train_size:]
eval_y = dataset_y[train_size:]
# # 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
train_x = train_x.reshape(-1, 1, 27)
train_y = train_y.reshape(-1, 1, 3)
eval_x = eval_x.reshape(-1, 1, 27)
eval_y = eval_y.reshape(-1, 1, 3)
# # 转为pytorch的tensor对象
train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
eval_x = torch.from_numpy(eval_x).to(device).type(torch.float32)
eval_y = torch.from_numpy(eval_y).to(device).type(torch.float32)
train_ds = TensorDataset(train_x,train_y)
train_dl = DataLoader(train_ds,batch_size=2,shuffle=True, drop_last=True)
eval_ds = TensorDataset(eval_x,eval_y)
eval_dl = DataLoader(eval_ds,batch_size=4,drop_last=True)
model = LSTM(27, 16, output_size=3, num_layers=3).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
min_loss = 1
# for i in range(200):
# model.train()
# for j,(x,y) in enumerate(train_dl):
# x,y = x.to(device),y.to(device)
# out = model(x)
# loss = loss_function(out, y)
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()
# train_loss.append(loss.item())
# # if (i+1) % 100 == 0:
# # print(f'epoch {i+1}/1500 loss:{round(loss.item(),5)}')
# if (j + 1) % 10 == 0:
# print(f'epoch {i+1}/200 step {j+1}/{len(train_dl)} loss:{loss}' )
# test_running_loss = 0
# model.eval()
# with torch.no_grad():
# for x,y in eval_dl:
# pred = model(eval_x)
# loss = loss_function(pred,y)
# test_running_loss += loss.item()
# test_loss = test_running_loss/len(eval_dl)
# if test_loss < min_loss:
# min_loss = test_loss
# best_model_weight = model.state_dict()
# print(f'epoch {i+1} test_loss:{test_loss}')
#
# total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(f"Total parameters in the LSTM model: {total_params}")
# # 保存模型
# torch.save(best_model_weight,'dy3.pth')
# 读取模型
model = LSTM(27, 16, output_size=3, num_layers=3).to(device)
model.load_state_dict(torch.load('dy3.pth'))
# for test
dataset_x = dataset_x.reshape(-1, 1, 27) # (seq_size, batch_size, feature_size)
dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32)
pred_test = model(dataset_x) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1).cpu().detach().numpy()
plt.plot(pred_test.reshape(-1), 'r', label='prediction')
plt.plot(dataset_y.reshape(-1), 'b', label='real')
plt.plot((train_size*3, train_size*3), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
plt.legend(loc='best')
plt.show()