diff --git a/.idea/misc.xml b/.idea/misc.xml
index 3141537..695b918 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/.idea/pytorch2.iml b/.idea/pytorch2.iml
index 719cec4..5cfdc49 100644
--- a/.idea/pytorch2.iml
+++ b/.idea/pytorch2.iml
@@ -2,7 +2,7 @@
-
+
\ No newline at end of file
diff --git a/BPNN.py b/BPNN.py
new file mode 100644
index 0000000..2e79530
--- /dev/null
+++ b/BPNN.py
@@ -0,0 +1,160 @@
+# encoding:utf-8
+
+'''
+BP神经网络Python实现
+'''
+
+import random
+import numpy as np
+
+
+def sigmoid(x):
+ '''
+ 激活函数
+ '''
+ return 1.0 / (1.0 + np.exp(-x))
+
+
+def sigmoid_prime(x):
+ return sigmoid(x) * (1 - sigmoid(x))
+
+
+class BPNNRegression:
+ '''
+ 神经网络回归与分类的差别在于:
+ 1. 输出层不需要再经过激活函数
+ 2. 输出层的 w 和 b 更新量计算相应更改
+ '''
+
+ def __init__(self, sizes):
+
+ # 神经网络结构
+ self.num_layers = len(sizes)
+ self.sizes = sizes
+
+ # 初始化偏差,除输入层外, 其它每层每个节点都生成一个 biase 值(0-1)
+ self.biases = [np.random.randn(n, 1) for n in sizes[1:]]
+ # 随机生成每条神经元连接的 weight 值(0-1)
+ self.weights = [np.random.randn(r, c)
+ for c, r in zip(sizes[:-1], sizes[1:])]
+
+ def feed_forward(self, a):
+ '''
+ 前向传输计算输出神经元的值
+ '''
+ for i, b, w in zip(range(len(self.biases)), self.biases, self.weights):
+ # 输出神经元不需要经过激励函数
+ if i == len(self.biases) - 1:
+ a = np.dot(w, a) + b
+ break
+ a = sigmoid(np.dot(w, a) + b)
+ return a
+
+ def MSGD(self, training_data, epochs, mini_batch_size, eta, error=0.01):
+ '''
+ 小批量随机梯度下降法
+ '''
+ n = len(training_data)
+ for j in range(epochs):
+ # 随机打乱训练集顺序
+ random.shuffle(training_data)
+ # 根据小样本大小划分子训练集集合
+ mini_batchs = [training_data[k:k + mini_batch_size]
+ for k in range(0, n, mini_batch_size)]
+ # 利用每一个小样本训练集更新 w 和 b
+ for mini_batch in mini_batchs:
+ self.updata_WB_by_mini_batch(mini_batch, eta)
+
+ # 迭代一次后结果
+ err_epoch = self.evaluate(training_data)
+ if j // 100 == 0:
+ print("Epoch {0} Error {1}".format(j, err_epoch))
+ if err_epoch < error:
+ break
+ # if test_data:
+ # print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))
+ # else:
+ # print("Epoch {0}".format(j))
+ return err_epoch
+
+ def updata_WB_by_mini_batch(self, mini_batch, eta):
+ '''
+ 利用小样本训练集更新 w 和 b
+ mini_batch: 小样本训练集
+ eta: 学习率
+ '''
+ # 创建存储迭代小样本得到的 b 和 w 偏导数空矩阵,大小与 biases 和 weights 一致,初始值为 0
+ batch_par_b = [np.zeros(b.shape) for b in self.biases]
+ batch_par_w = [np.zeros(w.shape) for w in self.weights]
+
+ for x, y in mini_batch:
+ # 根据小样本中每个样本的输入 x, 输出 y, 计算 w 和 b 的偏导
+ delta_b, delta_w = self.back_propagation(x, y)
+ # 累加偏导 delta_b, delta_w
+ batch_par_b = [bb + dbb for bb, dbb in zip(batch_par_b, delta_b)]
+ batch_par_w = [bw + dbw for bw, dbw in zip(batch_par_w, delta_w)]
+ # 根据累加的偏导值 delta_b, delta_w 更新 b, w
+ # 由于用了小样本,因此 eta 需除以小样本长度
+ self.weights = [w - (eta / len(mini_batch)) * dw
+ for w, dw in zip(self.weights, batch_par_w)]
+ self.biases = [b - (eta / len(mini_batch)) * db
+ for b, db in zip(self.biases, batch_par_b)]
+
+ def back_propagation(self, x, y):
+ '''
+ 利用误差后向传播算法对每个样本求解其 w 和 b 的更新量
+ x: 输入神经元,行向量
+ y: 输出神经元,行向量
+
+ '''
+ delta_b = [np.zeros(b.shape) for b in self.biases]
+ delta_w = [np.zeros(w.shape) for w in self.weights]
+
+ # 前向传播,求得输出神经元的值
+ a = x # 神经元输出值
+ # 存储每个神经元输出
+ activations = [x]
+ # 存储经过 sigmoid 函数计算的神经元的输入值,输入神经元除外
+ zs = []
+ for b, w in zip(self.biases, self.weights):
+ z = np.dot(w, a) + b
+ zs.append(z)
+ a = sigmoid(z) # 输出神经元
+ activations.append(a)
+ # -------------
+ activations[-1] = zs[-1] # 更改神经元输出结果
+ # -------------
+ # 求解输出层δ
+ # 与分类问题不同,Delta计算不需要乘以神经元输入的倒数
+ # delta = self.cost_function(activations[-1], y) * sigmoid_prime(zs[-1])
+ delta = self.cost_function(activations[-1], y) # 更改后
+ # -------------
+ delta_b[-1] = delta
+ delta_w[-1] = np.dot(delta, activations[-2].T)
+ for lev in range(2, self.num_layers):
+ # 从倒数第1层开始更新,因此需要采用-lev
+ # 利用 lev + 1 层的 δ 计算 l 层的 δ
+ z = zs[-lev]
+ zp = sigmoid_prime(z)
+ delta = np.dot(self.weights[-lev + 1].T, delta) * zp
+ delta_b[-lev] = delta
+ delta_w[-lev] = np.dot(delta, activations[-lev - 1].T)
+ return (delta_b, delta_w)
+
+ def evaluate(self, train_data):
+ test_result = [[self.feed_forward(x), y]
+ for x, y in train_data]
+ return np.sum([0.5 * (x - y) ** 2 for (x, y) in test_result])
+
+ def predict(self, test_input):
+ test_result = [self.feed_forward(x)
+ for x in test_input]
+ return test_result
+
+ def cost_function(self, output_a, y):
+ '''
+ 损失函数
+ '''
+ return (output_a - y)
+
+ pass
diff --git a/bp神经网络.py b/bp神经网络.py
index dd3197f..4dd32e5 100644
--- a/bp神经网络.py
+++ b/bp神经网络.py
@@ -3,37 +3,33 @@ import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn import preprocessing
-from torch.utils.data import DataLoader,TensorDataset
-
-
-data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
+data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
print(data.columns)
-x = np.array(data.drop(columns=['售电量','city_name']).loc['2021-1':'2023-6'])
-y = np.array(data['售电量'].loc['2021-1':'2023-6'])
+y = np.array(data['售电量']) # 制作标签,用于比对训练结果
+x = data.drop(columns=['售电量','city_name']) # 在特征数据集中去掉label
+# df.drop(label, axis=0)
+# label:要删除的列或者行,如果要删除多个,传入列表
+# axis:轴的方向,0为行,1为列,默认为0
+fea_train = np.array(x) # 转换为ndarray格式
# 数据标准化操作:(x-均值μ) / 标准差σ ,使数据关于原点对称,提升训练效率
-input_features = preprocessing.StandardScaler().fit_transform(np.array(x)) # fit:求出均值和标准差 transform:求解
-
-# y归一化
-min = np.min(y)
-max = np.max(y)
-y = (y - min)/(max - min)
-
-x_eval = torch.from_numpy(data.drop(columns=['售电量','city_name']).loc['2023-7'].values).type(torch.float32)
-y_eval = torch.from_numpy(data['售电量'].loc['2023-7'].values).type(torch.float32)
-
-ds = TensorDataset(torch.from_numpy(x),torch.from_numpy(y))
-dl = DataLoader(ds,batch_size=12,shuffle=True,drop_last=True)
+input_features = preprocessing.StandardScaler().fit_transform(fea_train) # fit:求出均值和标准差 transform:求解
+# y归一化 防止梯度爆炸
+y = (y - np.min(y))/(np.max(y) - np.min(y))
+print(y)
# 设定神经网络的输入参数、隐藏层神经元、输出参数的个数
input_size = input_features.shape[1] # 设定输入特征个数
-
-hidden_size = 64
-output_size =1
+# np.shape[1]
+# 0为行,1为列,默认为0
+# 在此表格中因为每行为各样本的值,每列为不同的特征分类,所以此处0表示样本数,1表示特征数
+hidden_size = 64 # 设定隐藏层包含64个神经元
+output_size = 1 # 设定输出特征个数为1
+batch_size = 32 # 每一批迭代的特征数量
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 选择使用GPU训练
@@ -44,21 +40,24 @@ my_nn = torch.nn.Sequential(
torch.nn.ReLU().to(device),
torch.nn.Linear(hidden_size, hidden_size).to(device), # 第二层 → 第三层
torch.nn.ReLU().to(device),
- torch.nn.Linear(hidden_size, output_size)
+ torch.nn.Linear(hidden_size, hidden_size).to(device), # 第三层 → 第四层
+ torch.nn.ReLU().to(device),
+ torch.nn.Linear(hidden_size, output_size).to(device) # 第四层 → 输出层
).to(device)
cost = torch.nn.MSELoss().to(device)
-optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.0001)
+optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)
# 训练网络
losses = []
-for i in range(1000):
+for i in range(300):
batch_loss = []
# 采用MINI-Batch的方法进行训练
- for X,y in dl:
- X,y = X.to(device).type(torch.float32),y.to(device).type(torch.float32)
-
- prediction = my_nn(X)
- loss = cost(y, prediction)
+ for start in range(0, len(input_features), batch_size):
+ end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
+ x_train = torch.tensor(input_features[start:end], dtype=torch.float32, requires_grad=True).to(device)
+ y_train = torch.tensor(y[start:end], dtype=torch.float32, requires_grad=True).to(device)
+ prediction = my_nn(x_train)
+ loss = cost(y_train, prediction)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
@@ -66,16 +65,17 @@ for i in range(1000):
if i % 10 == 0:
losses.append(np.mean(batch_loss))
+ print(losses)
print(i, np.mean(batch_loss))
# 保存模型
# torch.save(my_nn, 'BP.pt')
# 绘制图像
-# dev_x = [i * 10 for i in range(20)]
-# plt.xlabel('step count')
-# plt.ylabel('loss')
-# plt.xlim((0, 200))
-# plt.ylim((0, 1000))
-# plt.plot(dev_x, losses)
-# plt.show()
+dev_x = [i * 10 for i in range(20)]
+plt.xlabel('step count')
+plt.ylabel('loss')
+plt.xlim((0, 200))
+plt.ylim((0, 1000))
+plt.plot(dev_x, losses)
+plt.show()
diff --git a/各地级市日电量模型/hangzhou.bin b/各地级市日电量模型/hangzhou.bin
index d8fc03f..3df6ddc 100644
Binary files a/各地级市日电量模型/hangzhou.bin and b/各地级市日电量模型/hangzhou.bin differ
diff --git a/各地级市日电量模型/huzhou.bin b/各地级市日电量模型/huzhou.bin
index 59128bb..a60096c 100644
Binary files a/各地级市日电量模型/huzhou.bin and b/各地级市日电量模型/huzhou.bin differ
diff --git a/各地级市日电量模型/jiaxing.bin b/各地级市日电量模型/jiaxing.bin
index 598c0a6..3375d37 100644
Binary files a/各地级市日电量模型/jiaxing.bin and b/各地级市日电量模型/jiaxing.bin differ
diff --git a/各地级市日电量模型/jinhua.bin b/各地级市日电量模型/jinhua.bin
index 29a3cb7..e35d168 100644
Binary files a/各地级市日电量模型/jinhua.bin and b/各地级市日电量模型/jinhua.bin differ
diff --git a/各地级市日电量模型/lishui.bin b/各地级市日电量模型/lishui.bin
index 887f183..6c4be98 100644
Binary files a/各地级市日电量模型/lishui.bin and b/各地级市日电量模型/lishui.bin differ
diff --git a/各地级市日电量模型/lstm单日预测.py b/各地级市日电量模型/lstm单日预测.py
index 2d8575c..fdebc7a 100644
--- a/各地级市日电量模型/lstm单日预测.py
+++ b/各地级市日电量模型/lstm单日预测.py
@@ -41,7 +41,7 @@ def inverse_transform_col(scaler,y,n_col):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
+data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
print(data.columns)
data.columns = data.columns.map(lambda x: x.strip())
data.drop(columns='city_name',inplace=True)
@@ -73,16 +73,17 @@ y_train = torch.from_numpy(y_train).to(device).type(torch.float32)
x_eval = torch.from_numpy(x_eval.values).to(device).type(torch.float32)
-model = LSTM_Regression(13, 32, output_size=1, num_layers=3).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
+model = LSTM_Regression(13, 16, output_size=1, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
train_loss = []
loss_function = nn.MSELoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
+optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
ds = TensorDataset(x_train,y_train)
-dl = DataLoader(ds,batch_size=32,shuffle=True)
+dl = DataLoader(ds,batch_size=3,shuffle=True)
-for i in range(2500):
+
+for i in range(300):
for j,(x,y) in enumerate(dl):
x,y = x.to(device),y.to(device)
out = model(x)
@@ -91,7 +92,7 @@ for i in range(2500):
optimizer.step()
optimizer.zero_grad()
train_loss.append(loss.item())
- if j%10 == 0:
+ if i%100 == 0:
print(f'epoch:{i+1} 第{j}次loss:{loss}')
# 保存模型
diff --git a/各地级市日电量模型/ningbo.bin b/各地级市日电量模型/ningbo.bin
index fbcd6ba..fdf04d4 100644
Binary files a/各地级市日电量模型/ningbo.bin and b/各地级市日电量模型/ningbo.bin differ
diff --git a/各地级市日电量模型/quzhou.bin b/各地级市日电量模型/quzhou.bin
index 9429ba1..5a4d40d 100644
Binary files a/各地级市日电量模型/quzhou.bin and b/各地级市日电量模型/quzhou.bin differ
diff --git a/各地级市日电量模型/shaoxing.bin b/各地级市日电量模型/shaoxing.bin
index 4dddc7e..9220a7b 100644
Binary files a/各地级市日电量模型/shaoxing.bin and b/各地级市日电量模型/shaoxing.bin differ
diff --git a/各地级市日电量模型/wenzhou.bin b/各地级市日电量模型/wenzhou.bin
index 381c4c6..a112774 100644
Binary files a/各地级市日电量模型/wenzhou.bin and b/各地级市日电量模型/wenzhou.bin differ
diff --git a/各地级市日电量模型/zhoushan.bin b/各地级市日电量模型/zhoushan.bin
index e24c578..549ebf4 100644
Binary files a/各地级市日电量模型/zhoushan.bin and b/各地级市日电量模型/zhoushan.bin differ
diff --git a/各地级市日电量模型/丽水.py b/各地级市日电量模型/丽水.py
index 39c694c..a3bbbf5 100644
--- a/各地级市日电量模型/丽水.py
+++ b/各地级市日电量模型/丽水.py
@@ -5,15 +5,23 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
+
+
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
-
-plt.plot(range(len(data)),data['售电量'])
-plt.show()
+data['season'] = data.index.map(season)
+# plt.plot(range(len(data)),data['售电量'])
+# plt.show()
print(data.head())
df_eval = data.loc['2023-9']
@@ -22,7 +30,7 @@ df_train = data.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(data))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
# high = df['售电量'].describe()['75%'] + 1.5*IQR
@@ -31,14 +39,14 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(200):
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=176)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@@ -62,13 +70,18 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
# print(best_i,best_goal)
-
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\丽水.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'丽水月末3天偏差率:{goal},9号-月底偏差率:{goal2}')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'丽水月末3天偏差率:{goal},9号-月底偏差率:{goal2}')
# # 保存模型
# model.save_model('lishui.bin')
-# loaded_model = xgb.XGBRegressor()
-# loaded_model.load_model('lishui.bin')
-# model.predict(X_eval)
+import numpy as np
+loaded_model = xgb.XGBRegressor()
+loaded_model.load_model('lishui.bin')
+X_eval = np.array([
+ [22.5,15.4,23,0,0],
+ [22.3,15.5,23,1,0],
+ [20.0,15.7,23,1,0],
+ [22.0,15.0,23,0,0],
+[23.6,13.9,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/台州.py b/各地级市日电量模型/台州.py
index 5cd00c9..16a208a 100644
--- a/各地级市日电量模型/台州.py
+++ b/各地级市日电量模型/台州.py
@@ -5,24 +5,30 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\台州数据(1).xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
-
-plt.plot(range(len(data)),data['售电量'])
-plt.show()
+data['season'] = data.index.map(season)
+# plt.plot(range(len(data)),data['售电量'])
+# plt.show()
print(data.head())
-df_eval = data.loc['2023-9']
+df_eval = data.loc['2023-8']
# df_train = data.loc['2021-1':'2023-7']
df_train = data[500:850]
print(len(df_eval),len(df_train),len(data))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -33,8 +39,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
best_goal = 1
best_i = {}
@@ -64,13 +70,19 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
# print(best_i,best_goal)
-
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'台州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
-# 保存模型
-model.save_model('taizhou.bin')
+#
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'台州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# # 保存模型
+# model.save_model('taizhou.bin')
+import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('taizhou.bin')
-model.predict(X_eval)
+X_eval = np.array([[25.1,16.8,23,0,0],
+ [22.8,16.3,23,1,0],
+ [22.7,14.6,23,1,0],
+ [22.5,14.4,23,0,0],
+ [22.6,15.6,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/嘉兴.py b/各地级市日电量模型/嘉兴.py
index 33a1881..1be7f4c 100644
--- a/各地级市日电量模型/嘉兴.py
+++ b/各地级市日电量模型/嘉兴.py
@@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -17,7 +22,7 @@ df.set_index('dtdate',inplace=True)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
-
+df['season'] = df.index.map(season)
df_eval = df[df.index.str[:7]=='2023-08']
# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
@@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
@@ -63,6 +68,7 @@ goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_ev
# print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
+print(goal,goal2)
# print(goal2)
# if abs(goal) < best_goal :
# best_goal = abs(goal)
@@ -73,11 +79,17 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result
-result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv')
-with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'嘉兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv')
+# with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'嘉兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
-model.save_model('jiaxing.bin')
+# model.save_model('jiaxing.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('jiaxing.bin')
-model.predict(X_eval)
\ No newline at end of file
+import numpy as np
+X_eval = np.array([[23.4,16.1,23,0,0],
+ [23.3,16.0,23,1,0],
+ [22.0,15.8,23,1,0],
+ [23.8,15.7,23,0,0],
+ [24.1,15.3,23,0,0]])
+print(model.predict(X_eval))
\ No newline at end of file
diff --git a/各地级市日电量模型/宁波.py b/各地级市日电量模型/宁波.py
index eb46c5b..86d1569 100644
--- a/各地级市日电量模型/宁波.py
+++ b/各地级市日电量模型/宁波.py
@@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -13,6 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\宁波数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
+df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -36,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
@@ -71,13 +77,19 @@ print(goal2)
# print(best_i,best_goal,x)
-
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'宁波月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'宁波月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
-model.save_model('ningbo.bin')
+# model.save_model('ningbo.bin')
+import numpy as np
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('ningbo.bin')
-model.predict(X_eval)
+
+X_eval = np.array([[23.3,15.6,23,0,0],
+ [22.5,16.0,23,1,0],
+ [23.4,16.4,23,1,0],
+ [20.8,15.3,23,0,0],
+ [23.6,14.0,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/杭州.py b/各地级市日电量模型/杭州.py
index 450296e..4d85119 100644
--- a/各地级市日电量模型/杭州.py
+++ b/各地级市日电量模型/杭州.py
@@ -1,5 +1,6 @@
import xgboost as xgb
import pandas as pd
+import numpy as np
import os
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
@@ -34,7 +35,7 @@ def normal(nd):
return nd[(ndlow)]
-data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate')
+data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data = data.loc[normal(data['售电量']).index]
# plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022'])
@@ -45,7 +46,7 @@ data = data.loc[normal(data['售电量']).index]
data['month'] = data.index.strftime('%Y-%m-%d').str[6]
data['month'] = data['month'].astype('int')
data['season'] = data.index.map(season)
-print(data.head(50))
+print(data.tail(50))
df_eval = data.loc['2022-9':'2023-9']
df_train = data.loc['2021-1':'2022-8']
@@ -82,36 +83,53 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index)
# 指标打印
print(abs(y_test - y_pred).mean() / y_test.mean())
-eval_pred = model.predict(X_eval)
-
-result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
-
-print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
-
-goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
-print('goal:',goal)
-
-goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
-
-print('goal2:',goal2)
-print(result_eval)
-print('r2:',r2_score(y_test,y_pred))
+# eval_pred = model.predict(X_eval)
+#
+# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index)
+#
+# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
+#
+# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum()
+# print('goal:',goal)
+#
+# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
+#
+# print('goal2:',goal2)
+# print(result_eval)
+# print('r2:',r2_score(y_test,y_pred))
+#
+# # result_eval.to_csv('asda.csv',encoding='gbk')
+# # if abs(goal) < best_goal:
+# # best_goal = abs(goal)
+# # best_i['best_i'] = i
+# # x = goal2
+# # print(best_i,best_goal,x)
+#
+#
+#
+# # result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
+# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# # f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# 保存模型
+# model.save_model('hangzhou.bin')
-# result_eval.to_csv('asda.csv',encoding='gbk')
- # if abs(goal) < best_goal:
- # best_goal = abs(goal)
- # best_i['best_i'] = i
- # x = goal2
-# print(best_i,best_goal,x)
+# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
+df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx')
+df_eval.columns = df_eval.columns.map(lambda x:x.strip())
+df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']]
+df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
+df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
+print(df_hangzhou)
-# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv')
-# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
-# f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
-# # 保存模型
-# model.save_model('hangzhou.bin')
-# loaded_model = xgb.XGBRegressor()
-# loaded_model.load_model('hangzhou.bin')
-# model.predict(X_eval)
+loaded_model = xgb.XGBRegressor()
+loaded_model.load_model('hangzhou.bin')
+# X_eval = np.array([[26.1,16.1,23,0,0],
+# [24.5,14.6,23,1,0],
+# [24.0,15.2,23,1,0],
+# [22.7,14.9,23,0,0],
+# [24.1,13.4,23,0,0]])
+#
+# print(loaded_model.predict(X_eval))
diff --git a/各地级市日电量模型/温州.py b/各地级市日电量模型/温州.py
index 69183f4..8c72a08 100644
--- a/各地级市日电量模型/温州.py
+++ b/各地级市日电量模型/温州.py
@@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -13,6 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\温州数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
+df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -36,15 +42,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(400):
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=304)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@@ -70,11 +76,19 @@ print(goal2)
# print(best_i,best_goal,x)
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'温州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'温州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
-model.save_model('wenzhou.bin')
+# model.save_model('wenzhou.bin')
+
loaded_model = xgb.XGBRegressor()
+
loaded_model.load_model('wenzhou.bin')
-model.predict(X_eval)
\ No newline at end of file
+import numpy as np
+X_eval = np.array([[24.8,17.9,23,0,0],
+ [23.1,15.4,23,1,0],
+ [22.2,16.0,23,1,0],
+ [22.1,14.9,23,0,0],
+ [23.5,14.3,23,0,0]])
+print(model.predict(X_eval))
\ No newline at end of file
diff --git a/各地级市日电量模型/湖州.py b/各地级市日电量模型/湖州.py
index e015a06..07d7d53 100644
--- a/各地级市日电量模型/湖州.py
+++ b/各地级市日电量模型/湖州.py
@@ -7,12 +7,17 @@ import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
import random
import matplotlib.pyplot as plt
-
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
+df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
@@ -20,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -31,8 +36,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
@@ -58,14 +63,20 @@ print(goal2)
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'湖州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'湖州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+
+# 保存模型
-# # 保存模型
-#
# model.save_model('huzhou.bin')
-# loaded_model = xgb.XGBRegressor()
-# loaded_model.load_model('huzhou.bin')
-# model.predict(X_eval)
+loaded_model = xgb.XGBRegressor()
+loaded_model.load_model('huzhou.bin')
+import numpy as np
+X_eval = np.array([[22.2,14.8,23,0,0],
+ [23.4,15.9,23,1,0],
+ [22.5,15.6,23,1,0],
+ [23.8,14.3,23,0,0],
+ [23.9,14.0,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/绍兴.py b/各地级市日电量模型/绍兴.py
index fabe943..0dbdc6e 100644
--- a/各地级市日电量模型/绍兴.py
+++ b/各地级市日电量模型/绍兴.py
@@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -13,7 +18,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\绍兴数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index ,format='%Y-%m-%d')
-
+df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
@@ -46,7 +51,7 @@ y = df_train['售电量']
# best_i = {}
# for i in range(400):
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=253)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@@ -74,14 +79,20 @@ print(goal2)
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'绍兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'绍兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# 保存模型
model.save_model('shaoxing.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('shaoxing.bin')
-model.predict(X_eval)
+import numpy as np
+X_eval = np.array([[24.7,15.9,23,0,0],
+ [24.4,16.2,23,1,0],
+ [23.6,15.1,23,1,0],
+ [24.7,15.3,23,0,0],
+ [24.9,14.6,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/舟山.py b/各地级市日电量模型/舟山.py
index 3bd2dab..8535146 100644
--- a/各地级市日电量模型/舟山.py
+++ b/各地级市日电量模型/舟山.py
@@ -7,13 +7,17 @@ import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['kaiti']
import random
import matplotlib.pyplot as plt
-
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\舟山数据(1).xlsx',index_col='dtdate')
df.index = pd.to_datetime(df.index,format='%Y-%m-%d')
-
+df['season'] = df.index.map(season)
print(df.head())
df_eval = df.loc['2023-9']
@@ -21,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8']
print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -32,11 +36,11 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@@ -57,11 +61,17 @@ print(goal)
goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum()
print(goal2)
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'舟山月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'舟山月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
model.save_model('zhoushan.bin')
loaded_model = xgb.XGBRegressor()
loaded_model.load_model('zhoushan.bin')
-model.predict(X_eval)
+import numpy as np
+X_eval = np.array([[22.6,18.7,23,0,0],
+ [21.6,17.9,23,1,0],
+ [21.9,18.2,23,1,0],
+ [20.7,18.2,23,0,0],
+ [22.3,18.0,23,0,0]])
+print(model.predict(X_eval))
diff --git a/各地级市日电量模型/衢州.py b/各地级市日电量模型/衢州.py
index a1e7b93..948779a 100644
--- a/各地级市日电量模型/衢州.py
+++ b/各地级市日电量模型/衢州.py
@@ -5,6 +5,12 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
+
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -14,6 +20,7 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\衢州数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
+df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
@@ -23,13 +30,13 @@ print(df.head())
# df_train = df[(df.index.str[:7]!='2023-09')&(df.index.str!='2023-08-29')&(df.index.str!='2023-08-30')&(df.index.str!='2023-08-31')]
df_eval = df[df.index.str[:7]=='2023-07']
-# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
-df_train = df[450:900]
+df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
+# df_train = df[450:900]
# max_8,min_8 = df_eval['售电量'].max(),df_eval['售电量'].min()
print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
high = df['售电量'].describe()['75%'] + 1.5*IQR
@@ -39,11 +46,11 @@ print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <
df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=102)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.1, n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
@@ -62,17 +69,24 @@ print(goal2)
# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum())
# print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum())
-#
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'衢州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# #
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'衢州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
#
# 保存模型
# model.save_model('quzhou.bin')
-# loaded_model = xgb.XGBRegressor()
-# loaded_model.load_model('quzhou.bin')
-# model.predict(X_eval)
+loaded_model = xgb.XGBRegressor()
+loaded_model.load_model('quzhou.bin')
+import numpy as np
+X_eval = np.array([[24.0,15.6,23,0,0],
+ [24.2,17.1,23,1,0],
+ [22.6,16.7,23,1,0],
+ [23.5,15.5,23,0,0],
+ [24.5,13.9,23,0,0]])
+
+print(model.predict(X_eval))
# from sklearn.ensemble import RandomForestRegressor
diff --git a/各地级市日电量模型/金华.py b/各地级市日电量模型/金华.py
index aa7061b..b99fe5e 100644
--- a/各地级市日电量模型/金华.py
+++ b/各地级市日电量模型/金华.py
@@ -5,6 +5,11 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
+def season(x):
+ if str(x)[5:7] in ('06','07','08','12','01','02'):
+ return 1
+ else:
+ return 0
mpl.rcParams['font.sans-serif']=['kaiti']
@@ -14,19 +19,20 @@ pd.set_option('display.width',None)
df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\金华数据.xlsx')
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string')
df.set_index('dtdate',inplace=True)
+df['season'] = df.index.map(season)
plt.plot(range(len(df)),df['售电量'])
plt.show()
print(df.head())
df_eval = df[df.index.str[:7]=='2023-09']
-# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
-df_train = df[500:850]
+df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')]
+# df_train = df[500:850]
print(len(df_eval),len(df_train),len(df))
-df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@@ -37,15 +43,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
-X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']]
-X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']]
+X = df_train[['tem_max','tem_min','holiday','24ST','season']]
+X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
y = df_train['售电量']
# best_goal = 1
# best_i = {}
# for i in range(400):
-x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=396)
+x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=142)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)
@@ -70,13 +76,19 @@ print(goal2)
# x = goal2
# print(best_i,best_goal,x)
-result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv')
-with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
- f.write(f'金华月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
+# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv')
+# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f:
+# f.write(f'金华月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n')
# # 保存模型
# model.save_model('jinhua.bin')
-# loaded_model = xgb.XGBRegressor()
-# loaded_model.load_model('jinhua.bin')
-# model.predict(X_eval)
+loaded_model = xgb.XGBRegressor()
+loaded_model.load_model('jinhua.bin')
+import numpy as np
+X_eval = np.array([[26.0,15.0,23,0,0],
+ [24.9,17.6,23,1,0],
+ [24.0,17.7,23,1,0],
+ [24.8,15.8,23,0,0],
+ [25.2,14.0,23,0,0]])
+print(model.predict(X_eval))
diff --git a/浙江电压等级电量/测试.py b/浙江电压等级电量/测试.py
new file mode 100644
index 0000000..e1ed8e8
--- /dev/null
+++ b/浙江电压等级电量/测试.py
@@ -0,0 +1,16 @@
+import torch
+import pandas as pd
+from 电压等级_输出为5 import LSTM_Regression
+from 电压等级_输出为5 import create_dataset
+model = LSTM_Regression(10, 32, output_size=5, num_layers=2)
+model.load_state_dict(torch.load('dy5.pth'))
+
+df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ')
+df_eval.columns = df_eval.columns.map(lambda x:x.strip())
+df_eval.index = pd.to_datetime(df_eval.index)
+
+x,y = create_dataset(df_eval.loc['2023-10']['10kv以下'],10)
+x = x.reshape(-1,1,10)
+print(x.shape,y.shape)
+x = torch.from_numpy(x).type(torch.float32)
+print(model(x),y)
\ No newline at end of file
diff --git a/浙江电压等级电量/电压等级_输出为5.py b/浙江电压等级电量/电压等级_输出为5.py
index 9767442..7ffbbac 100644
--- a/浙江电压等级电量/电压等级_输出为5.py
+++ b/浙江电压等级电量/电压等级_输出为5.py
@@ -17,8 +17,8 @@ class LSTM_Regression(nn.Module):
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, _x):
- x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
- s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size)
+ x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 一批多少条样本 多少批样本 每一个样本的输入特征大小(10)
+ s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 经过lstm计算后输出为隐藏层大小
x = x.view(s * b, h)
x = self.fc(x)
x = x.view(s, b, -1) # 把形状改回来
@@ -52,110 +52,133 @@ def data_preprocessing(data):
return data
-# 拼接数据集
-file_dir = r'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据'
-excel = os.listdir(file_dir)[0]
+if __name__ == '__main__':
+ # 拼接数据集
+ file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据'
+ excel = os.listdir(file_dir)[0]
-data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ')
+ data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ')
-data = data_preprocessing(data)
+ data = data_preprocessing(data)
+
+ df = data[data.columns[0]]
+ df.dropna(inplace = True)
+ dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
+
+ for level in data.columns[1:]:
+ df = data[level]
+ df.dropna(inplace=True)
+ x, y = create_dataset(df, DAYS_FOR_TRAIN)
+ dataset_x = np.concatenate((dataset_x, x))
+ dataset_y = np.concatenate((dataset_y, y))
+
+
+ for excel in os.listdir(file_dir)[1:]:
+ data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ')
+ data = data_preprocessing(data)
+
+ for level in data.columns:
+ df = data[level]
+ df.dropna(inplace=True)
+ x,y = create_dataset(df,DAYS_FOR_TRAIN)
+ dataset_x = np.concatenate((dataset_x,x))
+ dataset_y = np.concatenate((dataset_y,y))
+
+ print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
+
+ # 训练
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+ # 标准化到0~1
+ max_value = np.max(dataset_x)
+ min_value = np.min(dataset_x)
+ dataset_x = (dataset_x - min_value) / (max_value - min_value)
+ dataset_y = (dataset_y - min_value) / (max_value - min_value)
+
+ # 划分训练集和测试集
+ train_size = int(len(dataset_x)*0.7)
+
+ train_x = dataset_x[:train_size]
+ train_y = dataset_y[:train_size]
+
+ # 将数据改变形状,RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
+ train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
+ train_y = train_y.reshape(-1, 1, 5)
+
+ # 转为pytorch的tensor对象
+ train_x = torch.from_numpy(train_x).to(device).type(torch.float32)
+ train_y = torch.from_numpy(train_y).to(device).type(torch.float32)
+
+ model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
+
+
+ train_loss = []
+ loss_function = nn.MSELoss()
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
+
+ # for i in range(1500):
+ # out = model(train_x)
+ # loss = loss_function(out, train_y)
+ # loss.backward()
+ # optimizer.step()
+ # optimizer.zero_grad()
+ # train_loss.append(loss.item())
+ # # print(loss)
+ # # 保存模型
+ # torch.save(model.state_dict(),'dy5.pth')
+
+
+ model.load_state_dict(torch.load('dy5.pth'))
+
+ # for test
+ model = model.eval() # 转换成测试模式
+ # model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
+ dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
+ dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32)
+
+ pred_test = model(dataset_x) # 全量训练集
+
+ # 模型输出 (seq_size, batch_size, output_size)
+ pred_test = pred_test.view(-1)
+ pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
+
+ # plt.plot(pred_test.reshape(-1), 'r', label='prediction')
+ # plt.plot(dataset_y.reshape(-1), 'b', label='real')
+ # plt.plot((train_size*5, train_size*5), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
+ # plt.legend(loc='best')
+ # plt.show()
-df = data[data.columns[0]]
-df.dropna(inplace = True)
-dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN)
-for level in data.columns[1:]:
- df = data[level]
- df.dropna(inplace=True)
- x, y = create_dataset(df, DAYS_FOR_TRAIN)
- dataset_x = np.concatenate((dataset_x, x))
- dataset_y = np.concatenate((dataset_y, y))
-for excel in os.listdir(file_dir)[1:]:
- data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ')
- data = data_preprocessing(data)
- for level in data.columns:
- df = data[level]
- df.dropna(inplace=True)
- x,y = create_dataset(df,DAYS_FOR_TRAIN)
- dataset_x = np.concatenate((dataset_x,x))
- dataset_y = np.concatenate((dataset_y,y))
-
-print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape)
-
-# 训练
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-# 标准化到0~1
-max_value = np.max(dataset_x)
-min_value = np.min(dataset_x)
-dataset_x = (dataset_x - min_value) / (max_value - min_value)
-dataset_y = (dataset_y - min_value) / (max_value - min_value)
-
-# 划分训练集和测试集
-train_size = len(dataset_x)*0.7
-train_x = dataset_x[:train_size]
-train_y = dataset_y[:train_size]
-
-# 将数据改变形状,RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
-train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN)
-train_y = train_y.reshape(-1, 1, 5)
-
-# 转为pytorch的tensor对象
-train_x = torch.from_numpy(train_x).to(device)
-train_y = torch.from_numpy(train_y).to(device)
-
-model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=3, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等
-
-
-train_loss = []
-loss_function = nn.MSELoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
-for i in range(1500):
- out = model(train_x)
- loss = loss_function(out, train_y)
- loss.backward()
- optimizer.step()
- optimizer.zero_grad()
- train_loss.append(loss.item())
- # print(loss)
-# 保存模型
-torch.save(model.state_dict(),'dy5.pth')
-
-
-# for test
-model = model.eval() # 转换成测试模式
-# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数
-dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size)
-dataset_x = torch.from_numpy(dataset_x).to(device)
-
-pred_test = model(dataset_x) # 全量训练集
-# 模型输出 (seq_size, batch_size, output_size)
-pred_test = pred_test.view(-1)
-pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy()))
-
-plt.plot(pred_test, 'r', label='prediction')
-plt.plot(df, 'b', label='real')
-plt.plot((train_size, train_size), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出
-plt.legend(loc='best')
-plt.show()
# 创建测试集
# result_list = []
# 以x为基础实际数据,滚动预测未来3天
-# x = torch.from_numpy(df[-14:-4]).to(device)
-# pred = model(x.reshape(-1,1,DAYS_FOR_TRAIN)).view(-1).detach().numpy()
+df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ')
+df_eval.columns = df_eval.columns.map(lambda x:x.strip())
+df_eval.index = pd.to_datetime(df_eval.index)
+
+x,y = create_dataset(df_eval.loc['2023-7']['10kv以下'],10)
+
+x = (x - min_value) / (max_value - min_value)
+x = x.reshape(-1,1,10)
+
+x = torch.from_numpy(x).type(torch.float32).to(device)
+pred = model(x)
# 反归一化
-# pred = pred * (max_value - min_value) + min_value
+pred = pred * (max_value - min_value) + min_value
# df = df * (max_value - min_value) + min_value
-# print(pred)
-# # 打印指标
+print(pred,y)
+df = pd.DataFrame({'real':y.reshape(-1),'pred':pred.view(-1).cpu().detach().numpy()})
+df.to_csv('7月预测.csv',encoding='gbk')
+
+# 打印指标
# print(abs(pred - df[-3:]).mean() / df[-3:].mean())
# result_eight = pd.DataFrame({'pred': np.round(pred,1),'real': df[-3:]})
# target = (result_eight['pred'].sum() - result_eight['real'].sum()) / df[-31:].sum()