|
|
|
@ -2,6 +2,27 @@ import os
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
|
|
|
import torch
|
|
|
|
|
from torch import nn
|
|
|
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
class LSTM(nn.Module):
|
|
|
|
|
def __init__(self, input_size, hidden_size, output_size, num_layers):
|
|
|
|
|
super().__init__()
|
|
|
|
|
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
|
|
|
|
|
self.fc1 = nn.Linear(hidden_size, 128)
|
|
|
|
|
self.fc2 = nn.Linear(128, output_size)
|
|
|
|
|
self.ReLu = nn.ReLU()
|
|
|
|
|
self.dropout = nn.Dropout(0.5)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
x, _ = self.lstm(x)
|
|
|
|
|
s, b, h = x.shape
|
|
|
|
|
x = x.reshape(-1, h)
|
|
|
|
|
output = self.ReLU(self.dropout(self.fc1(x)))
|
|
|
|
|
output = self.fc2(output)
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normal(data):
|
|
|
|
@ -63,7 +84,7 @@ dataset_x, dataset_y = create_dataset(df_city_industry)
|
|
|
|
|
for city in df['地市'].drop_duplicates()[1:]:
|
|
|
|
|
df_city_industry = df[df['地市'] == city][industry]
|
|
|
|
|
x, y = create_dataset(df_city_industry)
|
|
|
|
|
dataset_x,dataset_y = np.concatenate([dataset_x,x]),np.concatenate([dataset_y,y])
|
|
|
|
|
dataset_x, dataset_y = np.concatenate([dataset_x, x]), np.concatenate([dataset_y, y])
|
|
|
|
|
|
|
|
|
|
for industry in df.columns[2:][1:]:
|
|
|
|
|
for city in df['地市'].drop_duplicates():
|
|
|
|
@ -73,4 +94,61 @@ for industry in df.columns[2:][1:]:
|
|
|
|
|
|
|
|
|
|
print(dataset_x.shape, dataset_y.shape)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_size = int(0.7 * len(dataset_x))
|
|
|
|
|
x_train, y_train = dataset_x[:train_size], dataset_y[:train_size]
|
|
|
|
|
x_eval, y_eval = dataset_x[train_size:], dataset_y[train_size:]
|
|
|
|
|
x_train, y_train = torch.from_numpy(x_train).type(torch.float32), torch.from_numpy(y_train).type(torch.float32)
|
|
|
|
|
x_eval, y_eval = torch.from_numpy(x_eval).type(torch.float32), torch.from_numpy(y_eval).type(torch.float32)
|
|
|
|
|
|
|
|
|
|
ds = TensorDataset(x_train, y_train)
|
|
|
|
|
dl = DataLoader(ds, batch_size=128, shuffle=True, drop_last=True)
|
|
|
|
|
eval_ds = TensorDataset(x_eval, y_eval)
|
|
|
|
|
eval_dl = DataLoader(eval_ds, batch_size=256, drop_last=True)
|
|
|
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
|
model = LSTM(10,64, 3, num_layers=2).to(device)
|
|
|
|
|
loss_fn = nn.MSELoss()
|
|
|
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
|
|
|
|
|
|
|
|
|
|
min_loss = 1
|
|
|
|
|
for epoch in range(10):
|
|
|
|
|
for step, (x, y) in enumerate(dl):
|
|
|
|
|
x, y = x.to(device), y.to(device)
|
|
|
|
|
pred = model(x)
|
|
|
|
|
loss = loss_fn(pred,y)
|
|
|
|
|
optimizer.zero_grad()
|
|
|
|
|
loss.backward()
|
|
|
|
|
optimizer.step()
|
|
|
|
|
|
|
|
|
|
if (step+1) % 1000 == 0:
|
|
|
|
|
print(f'epoch{epoch+1}: train_step{step}/{len(dl)} train_loss:{round(loss)}\n')
|
|
|
|
|
|
|
|
|
|
model.eval()
|
|
|
|
|
batch_loss = 0
|
|
|
|
|
with torch.no_grad():
|
|
|
|
|
for x,y in eval_dl:
|
|
|
|
|
x, y = x.to(device), y.to(device)
|
|
|
|
|
pred = model(x)
|
|
|
|
|
loss = loss_fn(pred, y)
|
|
|
|
|
batch_loss += loss
|
|
|
|
|
print(f'epoch{epoch+1}: eval_loss:{batch_loss/len(eval_dl)}')
|
|
|
|
|
|
|
|
|
|
if batch_loss/len(eval_dl) < min_loss:
|
|
|
|
|
min_loss = batch_loss/len(eval_dl)
|
|
|
|
|
best_parameters = model.state_dict()
|
|
|
|
|
|
|
|
|
|
torch.save(best_parameters,'best_3.pth')
|
|
|
|
|
|
|
|
|
|
model = LSTM(10,64, 3, num_layers=2).to(device)
|
|
|
|
|
model.load_state_dict(torch.load('best_3.pth'))
|
|
|
|
|
dataset_x = dataset_x.reshape(-1,1,10)
|
|
|
|
|
dataset_x = torch.from_numpy(dataset_x).type(torch.float32).to(device)
|
|
|
|
|
pred = model(dataset_x).reshape(-1)
|
|
|
|
|
pred = np.concatenate((np.zeros(10), pred.cpu().detach().numpy()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plt.plot(pred, 'r', label='prediction')
|
|
|
|
|
plt.plot(dataset_y.reshape(-1), 'b', label='real')
|
|
|
|
|
plt.plot((train_size*3, train_size*3), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
|
|
|
|
|
plt.legend(loc='best')
|
|
|
|
|
plt.show()
|