You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

204 lines
7.4 KiB
Python

import numpy as np
import pandas as pd
import torch
from torch import nn
from multiprocessing import Pool
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader,TensorDataset
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # 解决OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
DAYS_FOR_TRAIN = 10
torch.manual_seed(42)
class LSTM_Regression(nn.Module):
def __init__(self, input_size, hidden_size, output_size=1, num_layers=2):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, _x):
11 months ago
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size)
x = x.view(s * b, h)
x = self.fc(x)
x = x.view(s, b, -1) # 把形状改回来
return x
def create_dataset(data, days_for_train=5) -> (np.array, np.array):
dataset_x, dataset_y = [], []
for i in range(len(data) - days_for_train-3):
dataset_x.append(data[i:(i + days_for_train)])
dataset_y.append(data[i + days_for_train:i + days_for_train+3])
return (np.array(dataset_x), np.array(dataset_y))
def normal(nd):
high = nd.describe()['75%'] + 1.5*(nd.describe()['75%']-nd.describe()['25%'])
low = nd.describe()['25%'] - 1.5*(nd.describe()['75%']-nd.describe()['25%'])
return nd[(nd<high)&(nd>low)]
def data_preprocessing(data):
data.columns = data.columns.map(lambda x: x.strip())
data.index = pd.to_datetime(data.index)
data.sort_index(inplace=True)
data = data.loc['2021-01':'2023-08'][:-3]
data.drop(columns=[i for i in data.columns if (data[i] == 0).sum() / len(data) >= 0.5], inplace=True) # 去除0值列
11 months ago
data = data.astype(float)
for col in data.columns:
data[col] = normal(data[col])
return data
11 months ago
# 拼接数据集
10 months ago
# file_dir = r'./浙江各地市分电压日电量数据'
# excel = os.listdir(file_dir)[0]
# data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
# data.drop(columns='地市',inplace=True)
# data = data_preprocessing(data)
#
# df = data[data.columns[0]]
# df.dropna(inplace = True)
# dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
#
# for level in data.columns[1:]:
# df = data[level]
# df.dropna(inplace=True)
# x, y = create_dataset(df, DAYS_FOR_TRAIN)
# dataset_x = np.concatenate((dataset_x, x))
# dataset_y = np.concatenate((dataset_y, y))
#
#
# for excel in os.listdir(file_dir)[1:]:
#
# data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col='stat_date')
# data.drop(columns='地市', inplace=True)
# data = data_preprocessing(data)
#
# for level in data.columns:
# df = data[level]
# df.dropna(inplace=True)
# x,y = create_dataset(df,DAYS_FOR_TRAIN)
# dataset_x = np.concatenate((dataset_x,x))
# dataset_y = np.concatenate((dataset_y,y))
# # 训练
11 months ago
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#
# # 标准化到0~1
10 months ago
# max_value = np.max(dataset_x)
# min_value = np.min(dataset_x)
# dataset_x = (dataset_x - min_value) / (max_value - min_value)
# dataset_y = (dataset_y - min_value) / (max_value - min_value)
# #
# # print(max_value,min_value)
# # # 划分训练集和测试集
# train_size = int(len(dataset_x)*0.7)
# train_x = dataset_x[:train_size]
# train_y = dataset_y[:train_size]
# eval_x = dataset_x[train_size:]
# eval_y = dataset_y[train_size:]
# # 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
# train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
# train_y = train_y.reshape(-1, 1, 3)
# eval_x = eval_x.reshape(-1, 1, DAYS_FOR_TRAIN)
# eval_y = eval_y.reshape(-1, 1, 3)
#
10 months ago
# # 转为pytorch的tensor对象
# train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
# train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
# eval_x = torch.from_numpy(eval_x).to(device).type(torch.float32)
# eval_y = torch.from_numpy(eval_y).to(device).type(torch.float32)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=3, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
# loss_function = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
#
# min_loss = 1
# for i in range(2500):
# model.train()
# out = model(train_x)
# loss = loss_function(out, train_y)
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()
#
# model.eval()
# with torch.no_grad():
# pred = model(eval_x)
# eval_loss = loss_function(pred,eval_y)
# if eval_loss <= min_loss:
# min_loss = eval_loss
# best_param = model.state_dict()
#
# if (i+1) % 100 == 0:
# print(f'epoch {i+1}/1500 loss:{round(loss.item(),5)}')
#
# # 保存模型
# torch.save(best_param,'best_dy3.pth')
# for test
# model = model.eval()
#
# dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
# dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32)
#
# pred_test = model(dataset_x)
# # 模型输出 (seq_size, batch_size, output_size)
# pred_test = pred_test.view(-1)
# pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
#
# plt.plot(pred_test.reshape(-1), 'r', label='prediction')
# plt.plot(dataset_y.reshape(-1), 'b', label='real')
# plt.plot((train_size*3, train_size*3), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
# plt.legend(loc='best')
# plt.show()
11 months ago
11 months ago
# 创建测试集
max_value,min_value = 192751288.47,0.0
10 months ago
model.load_state_dict(torch.load('best_dy3.pth',map_location=torch.device('cpu'))) # cpu跑加上,map_location=torch.device('cpu')
# file_dir = r'./浙江各地市分电压日电量数据'
10 months ago
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231127.xlsx',sheet_name=1)
df = df[df['county_name'].isnull()]
10 months ago
10 months ago
for city in df['city_name'].drop_duplicates():
df_city = df[df['city_name']==city].drop(columns=['county_name','500kv(含330kv)以上']).set_index('pt_date').sort_index()
# df_city.drop(columns=[i for i in df_city.columns if (df_city[i] == 0).sum() / len(df_city) >= 0.5], inplace=True)
result_dict = {}
for level in df_city.columns[1:]:
x, y = create_dataset(df_city[level], 10)
x = (x - min_value) / (max_value - min_value)
x = x.reshape(-1, 1, 10)
x = torch.from_numpy(x).type(torch.float32).to(device)
pred = model(x).view(-1)
pred = pred * (max_value - min_value) + min_value
result = pred.cpu().detach().numpy()[-3:]
result_dict[level] = list(result)
10 months ago
df1 = pd.DataFrame(result_dict,index=['2023-11-28','2023-11-29','2023-11-30'])
df1.to_excel(fr'C:\Users\鸽子\Desktop\11月分压电量预测28-30\{city} .xlsx')
print(result_dict)
10 months ago
# 打印指标
# print(abs(pred - df[-3:]).mean() / df[-3:].mean())
# result_eight = pd.DataFrame({'pred': np.round(pred,1),'real': df[-3:]})
# target = (result_eight['pred'].sum() - result_eight['real'].sum()) / df[-31:].sum()
# result_eight['loss_rate'] = round(target, 5)
# result_eight['level'] = level
# list_app.append(result_eight)
# print(target)
# print(result_eight)
# final_df = pd.concat(list_app,ignore_index=True)
# final_df.to_csv('市行业电量.csv',encoding='gbk')
# print(final_df)