get 1 year ago
parent ddbf3e5d61
commit 544ac6add4

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="C:\anaconda\envs\pytorch" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="pytorch_gpu" project-jdk-type="Python SDK" />
</project>

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="C:\anaconda\envs\pytorch" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="pytorch_gpu" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

@ -0,0 +1,160 @@
# encoding:utf-8
'''
BP神经网络Python实现
'''
import random
import numpy as np
def sigmoid(x):
'''
激活函数
'''
return 1.0 / (1.0 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
class BPNNRegression:
'''
神经网络回归与分类的差别在于
1. 输出层不需要再经过激活函数
2. 输出层的 w b 更新量计算相应更改
'''
def __init__(self, sizes):
# 神经网络结构
self.num_layers = len(sizes)
self.sizes = sizes
# 初始化偏差,除输入层外, 其它每层每个节点都生成一个 biase 值0-1
self.biases = [np.random.randn(n, 1) for n in sizes[1:]]
# 随机生成每条神经元连接的 weight 值0-1
self.weights = [np.random.randn(r, c)
for c, r in zip(sizes[:-1], sizes[1:])]
def feed_forward(self, a):
'''
前向传输计算输出神经元的值
'''
for i, b, w in zip(range(len(self.biases)), self.biases, self.weights):
# 输出神经元不需要经过激励函数
if i == len(self.biases) - 1:
a = np.dot(w, a) + b
break
a = sigmoid(np.dot(w, a) + b)
return a
def MSGD(self, training_data, epochs, mini_batch_size, eta, error=0.01):
'''
小批量随机梯度下降法
'''
n = len(training_data)
for j in range(epochs):
# 随机打乱训练集顺序
random.shuffle(training_data)
# 根据小样本大小划分子训练集集合
mini_batchs = [training_data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)]
# 利用每一个小样本训练集更新 w 和 b
for mini_batch in mini_batchs:
self.updata_WB_by_mini_batch(mini_batch, eta)
# 迭代一次后结果
err_epoch = self.evaluate(training_data)
if j // 100 == 0:
print("Epoch {0} Error {1}".format(j, err_epoch))
if err_epoch < error:
break
# if test_data:
# print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))
# else:
# print("Epoch {0}".format(j))
return err_epoch
def updata_WB_by_mini_batch(self, mini_batch, eta):
'''
利用小样本训练集更新 w b
mini_batch: 小样本训练集
eta: 学习率
'''
# 创建存储迭代小样本得到的 b 和 w 偏导数空矩阵,大小与 biases 和 weights 一致,初始值为 0
batch_par_b = [np.zeros(b.shape) for b in self.biases]
batch_par_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
# 根据小样本中每个样本的输入 x, 输出 y, 计算 w 和 b 的偏导
delta_b, delta_w = self.back_propagation(x, y)
# 累加偏导 delta_b, delta_w
batch_par_b = [bb + dbb for bb, dbb in zip(batch_par_b, delta_b)]
batch_par_w = [bw + dbw for bw, dbw in zip(batch_par_w, delta_w)]
# 根据累加的偏导值 delta_b, delta_w 更新 b, w
# 由于用了小样本,因此 eta 需除以小样本长度
self.weights = [w - (eta / len(mini_batch)) * dw
for w, dw in zip(self.weights, batch_par_w)]
self.biases = [b - (eta / len(mini_batch)) * db
for b, db in zip(self.biases, batch_par_b)]
def back_propagation(self, x, y):
'''
利用误差后向传播算法对每个样本求解其 w b 的更新量
x: 输入神经元行向量
y: 输出神经元行向量
'''
delta_b = [np.zeros(b.shape) for b in self.biases]
delta_w = [np.zeros(w.shape) for w in self.weights]
# 前向传播,求得输出神经元的值
a = x # 神经元输出值
# 存储每个神经元输出
activations = [x]
# 存储经过 sigmoid 函数计算的神经元的输入值,输入神经元除外
zs = []
for b, w in zip(self.biases, self.weights):
z = np.dot(w, a) + b
zs.append(z)
a = sigmoid(z) # 输出神经元
activations.append(a)
# -------------
activations[-1] = zs[-1] # 更改神经元输出结果
# -------------
# 求解输出层δ
# 与分类问题不同Delta计算不需要乘以神经元输入的倒数
# delta = self.cost_function(activations[-1], y) * sigmoid_prime(zs[-1])
delta = self.cost_function(activations[-1], y) # 更改后
# -------------
delta_b[-1] = delta
delta_w[-1] = np.dot(delta, activations[-2].T)
for lev in range(2, self.num_layers):
# 从倒数第1层开始更新因此需要采用-lev
# 利用 lev + 1 层的 δ 计算 l 层的 δ
z = zs[-lev]
zp = sigmoid_prime(z)
delta = np.dot(self.weights[-lev + 1].T, delta) * zp
delta_b[-lev] = delta
delta_w[-lev] = np.dot(delta, activations[-lev - 1].T)
return (delta_b, delta_w)
def evaluate(self, train_data):
test_result = [[self.feed_forward(x), y]
for x, y in train_data]
return np.sum([0.5 * (x - y) ** 2 for (x, y) in test_result])
def predict(self, test_input):
test_result = [self.feed_forward(x)
for x in test_input]
return test_result
def cost_function(self, output_a, y):
'''
损失函数
'''
return (output_a - y)
pass

@ -3,37 +3,33 @@ import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn import preprocessing
from torch.utils.data import DataLoader,TensorDataset
data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
print(data.columns)
x = np.array(data.drop(columns=['售电量','city_name']).loc['2021-1':'2023-6'])
y = np.array(data['售电量'].loc['2021-1':'2023-6'])
y = np.array(data['售电量']) # 制作标签,用于比对训练结果
x = data.drop(columns=['售电量','city_name']) # 在特征数据集中去掉label
# df.drop(label, axis=0)
# label要删除的列或者行如果要删除多个传入列表
# axis:轴的方向0为行1为列默认为0
fea_train = np.array(x) # 转换为ndarray格式
# 数据标准化操作:(x-均值μ) / 标准差σ ,使数据关于原点对称,提升训练效率
input_features = preprocessing.StandardScaler().fit_transform(np.array(x)) # fit求出均值和标准差 transform求解
# y归一化
min = np.min(y)
max = np.max(y)
y = (y - min)/(max - min)
x_eval = torch.from_numpy(data.drop(columns=['售电量','city_name']).loc['2023-7'].values).type(torch.float32)
y_eval = torch.from_numpy(data['售电量'].loc['2023-7'].values).type(torch.float32)
ds = TensorDataset(torch.from_numpy(x),torch.from_numpy(y))
dl = DataLoader(ds,batch_size=12,shuffle=True,drop_last=True)
input_features = preprocessing.StandardScaler().fit_transform(fea_train) # fit求出均值和标准差 transform求解
# y归一化 防止梯度爆炸
y = (y - np.min(y))/(np.max(y) - np.min(y))
print(y)
# 设定神经网络的输入参数、隐藏层神经元、输出参数的个数
input_size = input_features.shape[1] # 设定输入特征个数
hidden_size = 64
output_size =1
# np.shape[1]
# 0为行1为列默认为0
# 在此表格中因为每行为各样本的值每列为不同的特征分类所以此处0表示样本数1表示特征数
hidden_size = 64 # 设定隐藏层包含64个神经元
output_size = 1 # 设定输出特征个数为1
batch_size = 32 # 每一批迭代的特征数量
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 选择使用GPU训练
@ -44,21 +40,24 @@ my_nn = torch.nn.Sequential(
torch.nn.ReLU().to(device),
torch.nn.Linear(hidden_size, hidden_size).to(device), # 第二层 → 第三层
torch.nn.ReLU().to(device),
torch.nn.Linear(hidden_size, output_size)
torch.nn.Linear(hidden_size, hidden_size).to(device), # 第三层 → 第四层
torch.nn.ReLU().to(device),
torch.nn.Linear(hidden_size, output_size).to(device) # 第四层 → 输出层
).to(device)
cost = torch.nn.MSELoss().to(device)
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.0001)
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)
# 训练网络
losses = []
for i in range(1000):
for i in range(300):
batch_loss = []
# 采用MINI-Batch的方法进行训练
for X,y in dl:
X,y = X.to(device).type(torch.float32),y.to(device).type(torch.float32)
prediction = my_nn(X)
loss = cost(y, prediction)
for start in range(0, len(input_features), batch_size):
end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
x_train = torch.tensor(input_features[start:end], dtype=torch.float32, requires_grad=True).to(device)
y_train = torch.tensor(y[start:end], dtype=torch.float32, requires_grad=True).to(device)
prediction = my_nn(x_train)
loss = cost(y_train, prediction)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
@ -66,16 +65,17 @@ for i in range(1000):
if i % 10 == 0:
losses.append(np.mean(batch_loss))
print(losses)
print(i, np.mean(batch_loss))
# 保存模型
# torch.save(my_nn, 'BP.pt')
# 绘制图像
# dev_x = [i * 10 for i in range(20)]
# plt.xlabel('step count')
# plt.ylabel('loss')
# plt.xlim((0, 200))
# plt.ylim((0, 1000))
# plt.plot(dev_x, losses)
# plt.show()
dev_x = [i * 10 for i in range(20)]
plt.xlabel('step count')
plt.ylabel('loss')
plt.xlim((0, 200))
plt.ylim((0, 1000))
plt.plot(dev_x, losses)
plt.show()

@ -41,7 +41,7 @@ def inverse_transform_col(scaler,y,n_col):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
print(data.columns)
data.columns = data.columns.map(lambda x: x.strip())
data.drop(columns='city_name',inplace=True)
@ -73,16 +73,17 @@ y_train = torch.from_numpy(y_train).to(device).type(torch.float32)
x_eval = torch.from_numpy(x_eval.values).to(device).type(torch.float32)
model = LSTM_Regression(13, 32, output_size=1, num_layers=3).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
model = LSTM_Regression(13, 16, output_size=1, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
ds = TensorDataset(x_train,y_train)
dl = DataLoader(ds,batch_size=32,shuffle=True)
dl = DataLoader(ds,batch_size=3,shuffle=True)
for i in range(2500):
for i in range(300):
for j,(x,y) in enumerate(dl):
x,y = x.to(device),y.to(device)
out = model(x)
@ -91,7 +92,7 @@ for i in range(2500):
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
if j%10 == 0:
if i%100 == 0:
print(f'epoch:{i+1}{j}次loss:{loss}')
# 保存模型

@ -5,15 +5,23 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
plt.plot(range(len(data)),data['售电量'])
plt.show()
data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head())
df_eval = data.loc['2023-9']
@ -22,7 +30,7 @@ df_train = data.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
@ -31,14 +39,14 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(200):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=176)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -62,13 +70,18 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
# print(best_i,best_goal)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\丽水.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'丽水月末3天偏差率{goal},9号-月底偏差率:{goal2}')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'丽水月末3天偏差率{goal},9号-月底偏差率:{goal2}')
# # 保存模型
# model.save_model('lishui.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('lishui.bin')
# model.predict(X_eval)
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('lishui.bin')
X_eval = np.array([
[22.5,15.4,23,0,0],
[22.3,15.5,23,1,0],
[20.0,15.7,23,1,0],
[22.0,15.0,23,0,0],
[23.6,13.9,23,0,0]])
print(model.predict(X_eval))

@ -5,24 +5,30 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\台州数据(1).xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
plt.plot(range(len(data)),data['售电量'])
plt.show()
data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head())
df_eval = data.loc['2023-9']
df_eval = data.loc['2023-8']
# df_train = data.loc['2021-1':'2023-7']
df_train = data[500:850]
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -33,8 +39,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
best_goal = 1
best_i = {}
@ -64,13 +70,19 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
# print(best_i,best_goal)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'台州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('taizhou.bin')
#
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'台州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('taizhou.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('taizhou.bin')
model.predict(X_eval)
X_eval = np.array([[25.1,16.8,23,0,0],
[22.8,16.3,23,1,0],
[22.7,14.6,23,1,0],
[22.5,14.4,23,0,0],
[22.6,15.6,23,0,0]])
print(model.predict(X_eval))

@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -17,7 +22,7 @@ df.set_index('dtdate',inplace=True)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df['season'] = df.index.map(season)
df_eval = df[df.index.str[:7]=='2023-08']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
@ -63,6 +68,7 @@ goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_ev
# print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal,goal2)
# print(goal2)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
@ -73,11 +79,17 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv')
with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'嘉兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv')
# with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'嘉兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('jiaxing.bin')
# model.save_model('jiaxing.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('jiaxing.bin')
model.predict(X_eval)
import numpy as np
X_eval = np.array([[23.4,16.1,23,0,0],
[23.3,16.0,23,1,0],
[22.0,15.8,23,1,0],
[23.8,15.7,23,0,0],
[24.1,15.3,23,0,0]])
print(model.predict(X_eval))

@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -13,6 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\宁波数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -36,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
@ -71,13 +77,19 @@ print(goal2)
# print(best_i,best_goal,x)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'宁波月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'宁波月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('ningbo.bin')
# model.save_model('ningbo.bin')
import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('ningbo.bin')
model.predict(X_eval)
X_eval = np.array([[23.3,15.6,23,0,0],
[22.5,16.0,23,1,0],
[23.4,16.4,23,1,0],
[20.8,15.3,23,0,0],
[23.6,14.0,23,0,0]])
print(model.predict(X_eval))

@ -1,5 +1,6 @@
import xgboost as xgb
import pandas as pd
import numpy as np
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
@ -34,7 +35,7 @@ def normal(nd):
return nd[(nd<high)&(nd>low)]
data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022'])
@ -45,7 +46,7 @@ data = data.loc[normal(data['售电量']).index]
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
print(data.head(50))
print(data.tail(50))
df_eval = data.loc['2022-9':'2023-9']
df_train = data.loc['2021-1':'2022-8']
@ -82,36 +83,53 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
eval_pred = model.predict(X_eval)
result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
print('goal:',goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print('goal2:',goal2)
print(result_eval)
print('r2:',r2_score(y_test,y_pred))
# eval_pred = model.predict(X_eval)
#
# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
#
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
#
# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
# print('goal:',goal)
#
# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
#
# print('goal2:',goal2)
# print(result_eval)
# print('r2:',r2_score(y_test,y_pred))
#
# # result_eval.to_csv('asda.csv',encoding='gbk')
# # if abs(goal) < best_goal:
# # best_goal = abs(goal)
# # best_i['best_i'] = i
# # x = goal2
# # print(best_i,best_goal,x)
#
#
#
# # result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# # f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# model.save_model('hangzhou.bin')
# result_eval.to_csv('asda.csv',encoding='gbk')
# if abs(goal) < best_goal:
# best_goal = abs(goal)
# best_i['best_i'] = i
# x = goal2
# print(best_i,best_goal,x)
# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
print(df_hangzhou)
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'杭州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('hangzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('hangzhou.bin')
# model.predict(X_eval)
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('hangzhou.bin')
# X_eval = np.array([[26.1,16.1,23,0,0],
# [24.5,14.6,23,1,0],
# [24.0,15.2,23,1,0],
# [22.7,14.9,23,0,0],
# [24.1,13.4,23,0,0]])
#
# print(loaded_model.predict(X_eval))

@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -13,6 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\温州数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -36,15 +42,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=304)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -70,11 +76,19 @@ print(goal2)
# print(best_i,best_goal,x)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'温州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'温州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('wenzhou.bin')
# model.save_model('wenzhou.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('wenzhou.bin')
model.predict(X_eval)
import numpy as np
X_eval = np.array([[24.8,17.9,23,0,0],
[23.1,15.4,23,1,0],
[22.2,16.0,23,1,0],
[22.1,14.9,23,0,0],
[23.5,14.3,23,0,0]])
print(model.predict(X_eval))

@ -7,12 +7,17 @@ import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
import random
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
@ -20,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -31,8 +36,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
@ -58,14 +63,20 @@ print(goal2)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'湖州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'湖州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
# # 保存模型
#
# model.save_model('huzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('huzhou.bin')
# model.predict(X_eval)
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('huzhou.bin')
import numpy as np
X_eval = np.array([[22.2,14.8,23,0,0],
[23.4,15.9,23,1,0],
[22.5,15.6,23,1,0],
[23.8,14.3,23,0,0],
[23.9,14.0,23,0,0]])
print(model.predict(X_eval))

@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -13,7 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\绍兴数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index ,format='%Y-%m-%d')
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
@ -46,7 +51,7 @@ y = df_train['售电量']
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=253)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -74,14 +79,20 @@ print(goal2)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'绍兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'绍兴月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('shaoxing.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('shaoxing.bin')
model.predict(X_eval)
import numpy as np
X_eval = np.array([[24.7,15.9,23,0,0],
[24.4,16.2,23,1,0],
[23.6,15.1,23,1,0],
[24.7,15.3,23,0,0],
[24.9,14.6,23,0,0]])
print(model.predict(X_eval))

@ -7,13 +7,17 @@ import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
import random
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\舟山数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
@ -21,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -32,11 +36,11 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -57,11 +61,17 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'舟山月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'舟山月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
model.save_model('zhoushan.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('zhoushan.bin')
model.predict(X_eval)
import numpy as np
X_eval = np.array([[22.6,18.7,23,0,0],
[21.6,17.9,23,1,0],
[21.9,18.2,23,1,0],
[20.7,18.2,23,0,0],
[22.3,18.0,23,0,0]])
print(model.predict(X_eval))

@ -5,6 +5,12 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -14,6 +20,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\衢州数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@ -23,13 +30,13 @@ print(df.head())
# df_train = df[(df.index.str[:7]!='2023-09')&(df.index.str!='2023-08-29')&(df.index.str!='2023-08-30')&(df.index.str!='2023-08-31')]
df_eval = df[df.index.str[:7]=='2023-07']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
df_train = df[450:900]
df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
# df_train = df[450:900]
# max_8,min_8 = df_eval['售电量'].max(),df_eval['售电量'].min()
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
high = df['售电量'].describe()['75%'] + 1.5*IQR
@ -39,11 +46,11 @@ print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <
df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=102)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.1, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
@ -62,17 +69,24 @@ print(goal2)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum())
#
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'衢州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# #
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'衢州月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
#
# 保存模型
# model.save_model('quzhou.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('quzhou.bin')
# model.predict(X_eval)
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('quzhou.bin')
import numpy as np
X_eval = np.array([[24.0,15.6,23,0,0],
[24.2,17.1,23,1,0],
[22.6,16.7,23,1,0],
[23.5,15.5,23,0,0],
[24.5,13.9,23,0,0]])
print(model.predict(X_eval))
# from sklearn.ensemble import RandomForestRegressor

@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
return 1
else:
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@ -14,19 +19,20 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\金华数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df_eval = df[df.index.str[:7]=='2023-09']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
df_train = df[500:850]
df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(df))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -37,15 +43,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(400):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=396)
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=142)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@ -70,13 +76,19 @@ print(goal2)
# x = goal2
# print(best_i,best_goal,x)
result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv')
with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
f.write(f'金华月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv')
# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
# f.write(f'金华月末3天偏差率{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('jinhua.bin')
# loaded_model = xgb.XGBRegressor()
# loaded_model.load_model('jinhua.bin')
# model.predict(X_eval)
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('jinhua.bin')
import numpy as np
X_eval = np.array([[26.0,15.0,23,0,0],
[24.9,17.6,23,1,0],
[24.0,17.7,23,1,0],
[24.8,15.8,23,0,0],
[25.2,14.0,23,0,0]])
print(model.predict(X_eval))

@ -0,0 +1,16 @@
import torch
import pandas as pd
from 电压等级_输出为5 import LSTM_Regression
from 电压等级_输出为5 import create_dataset
model = LSTM_Regression(10, 32, output_size=5, num_layers=2)
model.load_state_dict(torch.load('dy5.pth'))
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval.index = pd.to_datetime(df_eval.index)
x,y = create_dataset(df_eval.loc['2023-10']['10kv以下'],10)
x = x.reshape(-1,1,10)
print(x.shape,y.shape)
x = torch.from_numpy(x).type(torch.float32)
print(model(x),y)

@ -17,8 +17,8 @@ class LSTM_Regression(nn.Module):
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, _x):
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size)
x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 一批多少条样本 多少批样本 每一个样本的输入特征大小10
s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 经过lstm计算后输出为隐藏层大小
x = x.view(s * b, h)
x = self.fc(x)
x = x.view(s, b, -1) # 把形状改回来
@ -52,110 +52,133 @@ def data_preprocessing(data):
return data
# 拼接数据集
file_dir = r'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据'
excel = os.listdir(file_dir)[0]
if __name__ == '__main__':
# 拼接数据集
file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据'
excel = os.listdir(file_dir)[0]
data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ')
data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ')
data = data_preprocessing(data)
data = data_preprocessing(data)
df = data[data.columns[0]]
df.dropna(inplace = True)
dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
for level in data.columns[1:]:
df = data[level]
df.dropna(inplace=True)
x, y = create_dataset(df, DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x, x))
dataset_y = np.concatenate((dataset_y, y))
for excel in os.listdir(file_dir)[1:]:
data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ')
data = data_preprocessing(data)
for level in data.columns:
df = data[level]
df.dropna(inplace=True)
x,y = create_dataset(df,DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x,x))
dataset_y = np.concatenate((dataset_y,y))
print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
# 训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 标准化到0~1
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
# 划分训练集和测试集
train_size = int(len(dataset_x)*0.7)
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
# 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
train_y = train_y.reshape(-1, 1, 5)
# 转为pytorch的tensor对象
train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
# for i in range(1500):
# out = model(train_x)
# loss = loss_function(out, train_y)
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()
# train_loss.append(loss.item())
# # print(loss)
# # 保存模型
# torch.save(model.state_dict(),'dy5.pth')
model.load_state_dict(torch.load('dy5.pth'))
# for test
model = model.eval() # 转换成测试模式
# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32)
pred_test = model(dataset_x) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1)
pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
# plt.plot(pred_test.reshape(-1), 'r', label='prediction')
# plt.plot(dataset_y.reshape(-1), 'b', label='real')
# plt.plot((train_size*5, train_size*5), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
# plt.legend(loc='best')
# plt.show()
df = data[data.columns[0]]
df.dropna(inplace = True)
dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
for level in data.columns[1:]:
df = data[level]
df.dropna(inplace=True)
x, y = create_dataset(df, DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x, x))
dataset_y = np.concatenate((dataset_y, y))
for excel in os.listdir(file_dir)[1:]:
data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ')
data = data_preprocessing(data)
for level in data.columns:
df = data[level]
df.dropna(inplace=True)
x,y = create_dataset(df,DAYS_FOR_TRAIN)
dataset_x = np.concatenate((dataset_x,x))
dataset_y = np.concatenate((dataset_y,y))
print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
# 训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 标准化到0~1
max_value = np.max(dataset_x)
min_value = np.min(dataset_x)
dataset_x = (dataset_x - min_value) / (max_value - min_value)
dataset_y = (dataset_y - min_value) / (max_value - min_value)
# 划分训练集和测试集
train_size = len(dataset_x)*0.7
train_x = dataset_x[:train_size]
train_y = dataset_y[:train_size]
# 将数据改变形状RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
train_y = train_y.reshape(-1, 1, 5)
# 转为pytorch的tensor对象
train_x = torch.from_numpy(train_x).to(device)
train_y = torch.from_numpy(train_y).to(device)
model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=3, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
for i in range(1500):
out = model(train_x)
loss = loss_function(out, train_y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
# print(loss)
# 保存模型
torch.save(model.state_dict(),'dy5.pth')
# for test
model = model.eval() # 转换成测试模式
# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
dataset_x = torch.from_numpy(dataset_x).to(device)
pred_test = model(dataset_x) # 全量训练集
# 模型输出 (seq_size, batch_size, output_size)
pred_test = pred_test.view(-1)
pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
plt.plot(pred_test, 'r', label='prediction')
plt.plot(df, 'b', label='real')
plt.plot((train_size, train_size), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
plt.legend(loc='best')
plt.show()
# 创建测试集
# result_list = []
# 以x为基础实际数据滚动预测未来3天
# x = torch.from_numpy(df[-14:-4]).to(device)
# pred = model(x.reshape(-1,1,DAYS_FOR_TRAIN)).view(-1).detach().numpy()
df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ')
df_eval.columns = df_eval.columns.map(lambda x:x.strip())
df_eval.index = pd.to_datetime(df_eval.index)
x,y = create_dataset(df_eval.loc['2023-7']['10kv以下'],10)
x = (x - min_value) / (max_value - min_value)
x = x.reshape(-1,1,10)
x = torch.from_numpy(x).type(torch.float32).to(device)
pred = model(x)
# 反归一化
# pred = pred * (max_value - min_value) + min_value
pred = pred * (max_value - min_value) + min_value
# df = df * (max_value - min_value) + min_value
# print(pred)
# # 打印指标
print(pred,y)
df = pd.DataFrame({'real':y.reshape(-1),'pred':pred.view(-1).cpu().detach().numpy()})
df.to_csv('7月预测.csv',encoding='gbk')
# 打印指标
# print(abs(pred - df[-3:]).mean() / df[-3:].mean())
# result_eight = pd.DataFrame({'pred': np.round(pred,1),'real': df[-3:]})
# target = (result_eight['pred'].sum() - result_eight['real'].sum()) / df[-31:].sum()

Loading…
Cancel
Save