diff --git a/.idea/misc.xml b/.idea/misc.xml index 3141537..695b918 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/.idea/pytorch2.iml b/.idea/pytorch2.iml index 719cec4..5cfdc49 100644 --- a/.idea/pytorch2.iml +++ b/.idea/pytorch2.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/BPNN.py b/BPNN.py new file mode 100644 index 0000000..2e79530 --- /dev/null +++ b/BPNN.py @@ -0,0 +1,160 @@ +# encoding:utf-8 + +''' +BP神经网络Python实现 +''' + +import random +import numpy as np + + +def sigmoid(x): + ''' + 激活函数 + ''' + return 1.0 / (1.0 + np.exp(-x)) + + +def sigmoid_prime(x): + return sigmoid(x) * (1 - sigmoid(x)) + + +class BPNNRegression: + ''' + 神经网络回归与分类的差别在于: + 1. 输出层不需要再经过激活函数 + 2. 输出层的 w 和 b 更新量计算相应更改 + ''' + + def __init__(self, sizes): + + # 神经网络结构 + self.num_layers = len(sizes) + self.sizes = sizes + + # 初始化偏差,除输入层外, 其它每层每个节点都生成一个 biase 值(0-1) + self.biases = [np.random.randn(n, 1) for n in sizes[1:]] + # 随机生成每条神经元连接的 weight 值(0-1) + self.weights = [np.random.randn(r, c) + for c, r in zip(sizes[:-1], sizes[1:])] + + def feed_forward(self, a): + ''' + 前向传输计算输出神经元的值 + ''' + for i, b, w in zip(range(len(self.biases)), self.biases, self.weights): + # 输出神经元不需要经过激励函数 + if i == len(self.biases) - 1: + a = np.dot(w, a) + b + break + a = sigmoid(np.dot(w, a) + b) + return a + + def MSGD(self, training_data, epochs, mini_batch_size, eta, error=0.01): + ''' + 小批量随机梯度下降法 + ''' + n = len(training_data) + for j in range(epochs): + # 随机打乱训练集顺序 + random.shuffle(training_data) + # 根据小样本大小划分子训练集集合 + mini_batchs = [training_data[k:k + mini_batch_size] + for k in range(0, n, mini_batch_size)] + # 利用每一个小样本训练集更新 w 和 b + for mini_batch in mini_batchs: + self.updata_WB_by_mini_batch(mini_batch, eta) + + # 迭代一次后结果 + err_epoch = self.evaluate(training_data) + if j // 100 == 0: + print("Epoch {0} Error {1}".format(j, err_epoch)) + if err_epoch < error: + break + # if test_data: + # print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test)) + # else: + # print("Epoch {0}".format(j)) + return err_epoch + + def updata_WB_by_mini_batch(self, mini_batch, eta): + ''' + 利用小样本训练集更新 w 和 b + mini_batch: 小样本训练集 + eta: 学习率 + ''' + # 创建存储迭代小样本得到的 b 和 w 偏导数空矩阵,大小与 biases 和 weights 一致,初始值为 0 + batch_par_b = [np.zeros(b.shape) for b in self.biases] + batch_par_w = [np.zeros(w.shape) for w in self.weights] + + for x, y in mini_batch: + # 根据小样本中每个样本的输入 x, 输出 y, 计算 w 和 b 的偏导 + delta_b, delta_w = self.back_propagation(x, y) + # 累加偏导 delta_b, delta_w + batch_par_b = [bb + dbb for bb, dbb in zip(batch_par_b, delta_b)] + batch_par_w = [bw + dbw for bw, dbw in zip(batch_par_w, delta_w)] + # 根据累加的偏导值 delta_b, delta_w 更新 b, w + # 由于用了小样本,因此 eta 需除以小样本长度 + self.weights = [w - (eta / len(mini_batch)) * dw + for w, dw in zip(self.weights, batch_par_w)] + self.biases = [b - (eta / len(mini_batch)) * db + for b, db in zip(self.biases, batch_par_b)] + + def back_propagation(self, x, y): + ''' + 利用误差后向传播算法对每个样本求解其 w 和 b 的更新量 + x: 输入神经元,行向量 + y: 输出神经元,行向量 + + ''' + delta_b = [np.zeros(b.shape) for b in self.biases] + delta_w = [np.zeros(w.shape) for w in self.weights] + + # 前向传播,求得输出神经元的值 + a = x # 神经元输出值 + # 存储每个神经元输出 + activations = [x] + # 存储经过 sigmoid 函数计算的神经元的输入值,输入神经元除外 + zs = [] + for b, w in zip(self.biases, self.weights): + z = np.dot(w, a) + b + zs.append(z) + a = sigmoid(z) # 输出神经元 + activations.append(a) + # ------------- + activations[-1] = zs[-1] # 更改神经元输出结果 + # ------------- + # 求解输出层δ + # 与分类问题不同,Delta计算不需要乘以神经元输入的倒数 + # delta = self.cost_function(activations[-1], y) * sigmoid_prime(zs[-1]) + delta = self.cost_function(activations[-1], y) # 更改后 + # ------------- + delta_b[-1] = delta + delta_w[-1] = np.dot(delta, activations[-2].T) + for lev in range(2, self.num_layers): + # 从倒数第1层开始更新,因此需要采用-lev + # 利用 lev + 1 层的 δ 计算 l 层的 δ + z = zs[-lev] + zp = sigmoid_prime(z) + delta = np.dot(self.weights[-lev + 1].T, delta) * zp + delta_b[-lev] = delta + delta_w[-lev] = np.dot(delta, activations[-lev - 1].T) + return (delta_b, delta_w) + + def evaluate(self, train_data): + test_result = [[self.feed_forward(x), y] + for x, y in train_data] + return np.sum([0.5 * (x - y) ** 2 for (x, y) in test_result]) + + def predict(self, test_input): + test_result = [self.feed_forward(x) + for x in test_input] + return test_result + + def cost_function(self, output_a, y): + ''' + 损失函数 + ''' + return (output_a - y) + + pass diff --git a/bp神经网络.py b/bp神经网络.py index dd3197f..4dd32e5 100644 --- a/bp神经网络.py +++ b/bp神经网络.py @@ -3,37 +3,33 @@ import pandas as pd import matplotlib.pyplot as plt import torch from sklearn import preprocessing -from torch.utils.data import DataLoader,TensorDataset - - -data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate') +data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate') print(data.columns) -x = np.array(data.drop(columns=['售电量','city_name']).loc['2021-1':'2023-6']) -y = np.array(data['售电量'].loc['2021-1':'2023-6']) +y = np.array(data['售电量']) # 制作标签,用于比对训练结果 +x = data.drop(columns=['售电量','city_name']) # 在特征数据集中去掉label +# df.drop(label, axis=0) +# label:要删除的列或者行,如果要删除多个,传入列表 +# axis:轴的方向,0为行,1为列,默认为0 +fea_train = np.array(x) # 转换为ndarray格式 # 数据标准化操作:(x-均值μ) / 标准差σ ,使数据关于原点对称,提升训练效率 -input_features = preprocessing.StandardScaler().fit_transform(np.array(x)) # fit:求出均值和标准差 transform:求解 - -# y归一化 -min = np.min(y) -max = np.max(y) -y = (y - min)/(max - min) - -x_eval = torch.from_numpy(data.drop(columns=['售电量','city_name']).loc['2023-7'].values).type(torch.float32) -y_eval = torch.from_numpy(data['售电量'].loc['2023-7'].values).type(torch.float32) - -ds = TensorDataset(torch.from_numpy(x),torch.from_numpy(y)) -dl = DataLoader(ds,batch_size=12,shuffle=True,drop_last=True) +input_features = preprocessing.StandardScaler().fit_transform(fea_train) # fit:求出均值和标准差 transform:求解 +# y归一化 防止梯度爆炸 +y = (y - np.min(y))/(np.max(y) - np.min(y)) +print(y) # 设定神经网络的输入参数、隐藏层神经元、输出参数的个数 input_size = input_features.shape[1] # 设定输入特征个数 - -hidden_size = 64 -output_size =1 +# np.shape[1] +# 0为行,1为列,默认为0 +# 在此表格中因为每行为各样本的值,每列为不同的特征分类,所以此处0表示样本数,1表示特征数 +hidden_size = 64 # 设定隐藏层包含64个神经元 +output_size = 1 # 设定输出特征个数为1 +batch_size = 32 # 每一批迭代的特征数量 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 选择使用GPU训练 @@ -44,21 +40,24 @@ my_nn = torch.nn.Sequential( torch.nn.ReLU().to(device), torch.nn.Linear(hidden_size, hidden_size).to(device), # 第二层 → 第三层 torch.nn.ReLU().to(device), - torch.nn.Linear(hidden_size, output_size) + torch.nn.Linear(hidden_size, hidden_size).to(device), # 第三层 → 第四层 + torch.nn.ReLU().to(device), + torch.nn.Linear(hidden_size, output_size).to(device) # 第四层 → 输出层 ).to(device) cost = torch.nn.MSELoss().to(device) -optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.0001) +optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001) # 训练网络 losses = [] -for i in range(1000): +for i in range(300): batch_loss = [] # 采用MINI-Batch的方法进行训练 - for X,y in dl: - X,y = X.to(device).type(torch.float32),y.to(device).type(torch.float32) - - prediction = my_nn(X) - loss = cost(y, prediction) + for start in range(0, len(input_features), batch_size): + end = start + batch_size if start + batch_size < len(input_features) else len(input_features) + x_train = torch.tensor(input_features[start:end], dtype=torch.float32, requires_grad=True).to(device) + y_train = torch.tensor(y[start:end], dtype=torch.float32, requires_grad=True).to(device) + prediction = my_nn(x_train) + loss = cost(y_train, prediction) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() @@ -66,16 +65,17 @@ for i in range(1000): if i % 10 == 0: losses.append(np.mean(batch_loss)) + print(losses) print(i, np.mean(batch_loss)) # 保存模型 # torch.save(my_nn, 'BP.pt') # 绘制图像 -# dev_x = [i * 10 for i in range(20)] -# plt.xlabel('step count') -# plt.ylabel('loss') -# plt.xlim((0, 200)) -# plt.ylim((0, 1000)) -# plt.plot(dev_x, losses) -# plt.show() +dev_x = [i * 10 for i in range(20)] +plt.xlabel('step count') +plt.ylabel('loss') +plt.xlim((0, 200)) +plt.ylim((0, 1000)) +plt.plot(dev_x, losses) +plt.show() diff --git a/各地级市日电量模型/hangzhou.bin b/各地级市日电量模型/hangzhou.bin index d8fc03f..3df6ddc 100644 Binary files a/各地级市日电量模型/hangzhou.bin and b/各地级市日电量模型/hangzhou.bin differ diff --git a/各地级市日电量模型/huzhou.bin b/各地级市日电量模型/huzhou.bin index 59128bb..a60096c 100644 Binary files a/各地级市日电量模型/huzhou.bin and b/各地级市日电量模型/huzhou.bin differ diff --git a/各地级市日电量模型/jiaxing.bin b/各地级市日电量模型/jiaxing.bin index 598c0a6..3375d37 100644 Binary files a/各地级市日电量模型/jiaxing.bin and b/各地级市日电量模型/jiaxing.bin differ diff --git a/各地级市日电量模型/jinhua.bin b/各地级市日电量模型/jinhua.bin index 29a3cb7..e35d168 100644 Binary files a/各地级市日电量模型/jinhua.bin and b/各地级市日电量模型/jinhua.bin differ diff --git a/各地级市日电量模型/lishui.bin b/各地级市日电量模型/lishui.bin index 887f183..6c4be98 100644 Binary files a/各地级市日电量模型/lishui.bin and b/各地级市日电量模型/lishui.bin differ diff --git a/各地级市日电量模型/lstm单日预测.py b/各地级市日电量模型/lstm单日预测.py index 2d8575c..fdebc7a 100644 --- a/各地级市日电量模型/lstm单日预测.py +++ b/各地级市日电量模型/lstm单日预测.py @@ -41,7 +41,7 @@ def inverse_transform_col(scaler,y,n_col): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate') +data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate') print(data.columns) data.columns = data.columns.map(lambda x: x.strip()) data.drop(columns='city_name',inplace=True) @@ -73,16 +73,17 @@ y_train = torch.from_numpy(y_train).to(device).type(torch.float32) x_eval = torch.from_numpy(x_eval.values).to(device).type(torch.float32) -model = LSTM_Regression(13, 32, output_size=1, num_layers=3).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等 +model = LSTM_Regression(13, 16, output_size=1, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等 train_loss = [] loss_function = nn.MSELoss() -optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) +optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) ds = TensorDataset(x_train,y_train) -dl = DataLoader(ds,batch_size=32,shuffle=True) +dl = DataLoader(ds,batch_size=3,shuffle=True) -for i in range(2500): + +for i in range(300): for j,(x,y) in enumerate(dl): x,y = x.to(device),y.to(device) out = model(x) @@ -91,7 +92,7 @@ for i in range(2500): optimizer.step() optimizer.zero_grad() train_loss.append(loss.item()) - if j%10 == 0: + if i%100 == 0: print(f'epoch:{i+1} 第{j}次loss:{loss}') # 保存模型 diff --git a/各地级市日电量模型/ningbo.bin b/各地级市日电量模型/ningbo.bin index fbcd6ba..fdf04d4 100644 Binary files a/各地级市日电量模型/ningbo.bin and b/各地级市日电量模型/ningbo.bin differ diff --git a/各地级市日电量模型/quzhou.bin b/各地级市日电量模型/quzhou.bin index 9429ba1..5a4d40d 100644 Binary files a/各地级市日电量模型/quzhou.bin and b/各地级市日电量模型/quzhou.bin differ diff --git a/各地级市日电量模型/shaoxing.bin b/各地级市日电量模型/shaoxing.bin index 4dddc7e..9220a7b 100644 Binary files a/各地级市日电量模型/shaoxing.bin and b/各地级市日电量模型/shaoxing.bin differ diff --git a/各地级市日电量模型/wenzhou.bin b/各地级市日电量模型/wenzhou.bin index 381c4c6..a112774 100644 Binary files a/各地级市日电量模型/wenzhou.bin and b/各地级市日电量模型/wenzhou.bin differ diff --git a/各地级市日电量模型/zhoushan.bin b/各地级市日电量模型/zhoushan.bin index e24c578..549ebf4 100644 Binary files a/各地级市日电量模型/zhoushan.bin and b/各地级市日电量模型/zhoushan.bin differ diff --git a/各地级市日电量模型/丽水.py b/各地级市日电量模型/丽水.py index 39c694c..a3bbbf5 100644 --- a/各地级市日电量模型/丽水.py +++ b/各地级市日电量模型/丽水.py @@ -5,15 +5,23 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt + +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 + + mpl.rcParams['font.sans-serif']=['kaiti'] pd.set_option('display.width',None) data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') - -plt.plot(range(len(data)),data['售电量']) -plt.show() +data['season'] = data.index.map(season) +# plt.plot(range(len(data)),data['售电量']) +# plt.show() print(data.head()) df_eval = data.loc['2023-9'] @@ -22,7 +30,7 @@ df_train = data.loc['2021-1':'2023-8'] print(len(df_eval),len(df_train),len(data)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] # high = df['售电量'].describe()['75%'] + 1.5*IQR @@ -31,14 +39,14 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] # best_goal = 1 # best_i = {} # for i in range(200): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=176) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -62,13 +70,18 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result # print(best_i,best_goal) - -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\丽水.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'丽水月末3天偏差率:{goal},9号-月底偏差率:{goal2}') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'丽水月末3天偏差率:{goal},9号-月底偏差率:{goal2}') # # 保存模型 # model.save_model('lishui.bin') -# loaded_model = xgb.XGBRegressor() -# loaded_model.load_model('lishui.bin') -# model.predict(X_eval) +import numpy as np +loaded_model = xgb.XGBRegressor() +loaded_model.load_model('lishui.bin') +X_eval = np.array([ + [22.5,15.4,23,0,0], + [22.3,15.5,23,1,0], + [20.0,15.7,23,1,0], + [22.0,15.0,23,0,0], +[23.6,13.9,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/台州.py b/各地级市日电量模型/台州.py index 5cd00c9..16a208a 100644 --- a/各地级市日电量模型/台州.py +++ b/各地级市日电量模型/台州.py @@ -5,24 +5,30 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt + +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] pd.set_option('display.width',None) data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\台州数据(1).xlsx',index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') - -plt.plot(range(len(data)),data['售电量']) -plt.show() +data['season'] = data.index.map(season) +# plt.plot(range(len(data)),data['售电量']) +# plt.show() print(data.head()) -df_eval = data.loc['2023-9'] +df_eval = data.loc['2023-8'] # df_train = data.loc['2021-1':'2023-7'] df_train = data[500:850] print(len(df_eval),len(df_train),len(data)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -33,8 +39,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] best_goal = 1 best_i = {} @@ -64,13 +70,19 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result # print(best_i,best_goal) - -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'台州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') -# 保存模型 -model.save_model('taizhou.bin') +# +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\台州.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'台州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# # 保存模型 +# model.save_model('taizhou.bin') +import numpy as np loaded_model = xgb.XGBRegressor() loaded_model.load_model('taizhou.bin') -model.predict(X_eval) +X_eval = np.array([[25.1,16.8,23,0,0], + [22.8,16.3,23,1,0], + [22.7,14.6,23,1,0], + [22.5,14.4,23,0,0], + [22.6,15.6,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/嘉兴.py b/各地级市日电量模型/嘉兴.py index 33a1881..1be7f4c 100644 --- a/各地级市日电量模型/嘉兴.py +++ b/各地级市日电量模型/嘉兴.py @@ -5,6 +5,11 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] @@ -17,7 +22,7 @@ df.set_index('dtdate',inplace=True) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) - +df['season'] = df.index.map(season) df_eval = df[df.index.str[:7]=='2023-08'] # df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] @@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] # best_goal = 1 @@ -63,6 +68,7 @@ goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_ev # print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() +print(goal,goal2) # print(goal2) # if abs(goal) < best_goal : # best_goal = abs(goal) @@ -73,11 +79,17 @@ goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result -result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv') -with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'嘉兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\8月各地市日电量预测结果\嘉兴.csv') +# with open(r'C:\Users\user\Desktop\8月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'嘉兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -model.save_model('jiaxing.bin') +# model.save_model('jiaxing.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('jiaxing.bin') -model.predict(X_eval) \ No newline at end of file +import numpy as np +X_eval = np.array([[23.4,16.1,23,0,0], + [23.3,16.0,23,1,0], + [22.0,15.8,23,1,0], + [23.8,15.7,23,0,0], + [24.1,15.3,23,0,0]]) +print(model.predict(X_eval)) \ No newline at end of file diff --git a/各地级市日电量模型/宁波.py b/各地级市日电量模型/宁波.py index eb46c5b..86d1569 100644 --- a/各地级市日电量模型/宁波.py +++ b/各地级市日电量模型/宁波.py @@ -5,6 +5,11 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] @@ -13,6 +18,7 @@ pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\宁波数据.xlsx',index_col='dtdate') df.index = pd.to_datetime(df.index,format='%Y-%m-%d') +df['season'] = df.index.map(season) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) @@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -36,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] # best_goal = 1 @@ -71,13 +77,19 @@ print(goal2) # print(best_i,best_goal,x) - -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'宁波月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\宁波.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'宁波月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -model.save_model('ningbo.bin') +# model.save_model('ningbo.bin') +import numpy as np loaded_model = xgb.XGBRegressor() loaded_model.load_model('ningbo.bin') -model.predict(X_eval) + +X_eval = np.array([[23.3,15.6,23,0,0], + [22.5,16.0,23,1,0], + [23.4,16.4,23,1,0], + [20.8,15.3,23,0,0], + [23.6,14.0,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/杭州.py b/各地级市日电量模型/杭州.py index 450296e..4d85119 100644 --- a/各地级市日电量模型/杭州.py +++ b/各地级市日电量模型/杭州.py @@ -1,5 +1,6 @@ import xgboost as xgb import pandas as pd +import numpy as np import os from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split @@ -34,7 +35,7 @@ def normal(nd): return nd[(ndlow)] -data = pd.read_excel(r'C:\python-project\pytorch3\入模数据\杭州数据.xlsx',index_col='dtdate') +data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\杭州数据.xlsx',index_col='dtdate') data.index = pd.to_datetime(data.index,format='%Y-%m-%d') data = data.loc[normal(data['售电量']).index] # plt.plot(range(len(data['售电量']['2021':'2022'])),data['售电量']['2021':'2022']) @@ -45,7 +46,7 @@ data = data.loc[normal(data['售电量']).index] data['month'] = data.index.strftime('%Y-%m-%d').str[6] data['month'] = data['month'].astype('int') data['season'] = data.index.map(season) -print(data.head(50)) +print(data.tail(50)) df_eval = data.loc['2022-9':'2023-9'] df_train = data.loc['2021-1':'2022-8'] @@ -82,36 +83,53 @@ result_test = pd.DataFrame({'test':y_test,'pred':y_pred},index=y_test.index) # 指标打印 print(abs(y_test - y_pred).mean() / y_test.mean()) -eval_pred = model.predict(X_eval) - -result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) - -print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) - -goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() -print('goal:',goal) - -goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() - -print('goal2:',goal2) -print(result_eval) -print('r2:',r2_score(y_test,y_pred)) +# eval_pred = model.predict(X_eval) +# +# result_eval = pd.DataFrame({'eval':df_eval['售电量'],'pred':eval_pred},index=df_eval['售电量'].index) +# +# print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) +# +# goal = (result_eval['eval'][-3:].sum()-result_eval['pred'][-3:].sum())/result_eval['eval'].sum() +# print('goal:',goal) +# +# goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() +# +# print('goal2:',goal2) +# print(result_eval) +# print('r2:',r2_score(y_test,y_pred)) +# +# # result_eval.to_csv('asda.csv',encoding='gbk') +# # if abs(goal) < best_goal: +# # best_goal = abs(goal) +# # best_i['best_i'] = i +# # x = goal2 +# # print(best_i,best_goal,x) +# +# +# +# # result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv') +# # with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# # f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# 保存模型 +# model.save_model('hangzhou.bin') -# result_eval.to_csv('asda.csv',encoding='gbk') - # if abs(goal) < best_goal: - # best_goal = abs(goal) - # best_i['best_i'] = i - # x = goal2 -# print(best_i,best_goal,x) +# X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] +df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江气象1027.xlsx') +df_eval.columns = df_eval.columns.map(lambda x:x.strip()) +df_eval = df_eval[['city_name','dtdate','tem_max','tem_min']] +df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip()) +df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate') +print(df_hangzhou) -# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\杭州.csv') -# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: -# f.write(f'杭州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') -# # 保存模型 -# model.save_model('hangzhou.bin') -# loaded_model = xgb.XGBRegressor() -# loaded_model.load_model('hangzhou.bin') -# model.predict(X_eval) +loaded_model = xgb.XGBRegressor() +loaded_model.load_model('hangzhou.bin') +# X_eval = np.array([[26.1,16.1,23,0,0], +# [24.5,14.6,23,1,0], +# [24.0,15.2,23,1,0], +# [22.7,14.9,23,0,0], +# [24.1,13.4,23,0,0]]) +# +# print(loaded_model.predict(X_eval)) diff --git a/各地级市日电量模型/温州.py b/各地级市日电量模型/温州.py index 69183f4..8c72a08 100644 --- a/各地级市日电量模型/温州.py +++ b/各地级市日电量模型/温州.py @@ -5,6 +5,11 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] @@ -13,6 +18,7 @@ pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\温州数据(1).xlsx',index_col='dtdate') df.index = pd.to_datetime(df.index,format='%Y-%m-%d') +df['season'] = df.index.map(season) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) @@ -25,7 +31,7 @@ print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -36,15 +42,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] # best_goal = 1 # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=304) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -70,11 +76,19 @@ print(goal2) # print(best_i,best_goal,x) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'温州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\温州.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'温州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 -model.save_model('wenzhou.bin') +# model.save_model('wenzhou.bin') + loaded_model = xgb.XGBRegressor() + loaded_model.load_model('wenzhou.bin') -model.predict(X_eval) \ No newline at end of file +import numpy as np +X_eval = np.array([[24.8,17.9,23,0,0], + [23.1,15.4,23,1,0], + [22.2,16.0,23,1,0], + [22.1,14.9,23,0,0], + [23.5,14.3,23,0,0]]) +print(model.predict(X_eval)) \ No newline at end of file diff --git a/各地级市日电量模型/湖州.py b/各地级市日电量模型/湖州.py index e015a06..07d7d53 100644 --- a/各地级市日电量模型/湖州.py +++ b/各地级市日电量模型/湖州.py @@ -7,12 +7,17 @@ import matplotlib as mpl mpl.rcParams['font.sans-serif']=['kaiti'] import random import matplotlib.pyplot as plt - +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\湖州数据.xlsx',index_col='dtdate') df.index = pd.to_datetime(df.index,format='%Y-%m-%d') +df['season'] = df.index.map(season) print(df.head()) df_eval = df.loc['2023-9'] @@ -20,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8'] print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -31,8 +36,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100) @@ -58,14 +63,20 @@ print(goal2) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'湖州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\湖州.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'湖州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') + +# 保存模型 -# # 保存模型 -# # model.save_model('huzhou.bin') -# loaded_model = xgb.XGBRegressor() -# loaded_model.load_model('huzhou.bin') -# model.predict(X_eval) +loaded_model = xgb.XGBRegressor() +loaded_model.load_model('huzhou.bin') +import numpy as np +X_eval = np.array([[22.2,14.8,23,0,0], + [23.4,15.9,23,1,0], + [22.5,15.6,23,1,0], + [23.8,14.3,23,0,0], + [23.9,14.0,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/绍兴.py b/各地级市日电量模型/绍兴.py index fabe943..0dbdc6e 100644 --- a/各地级市日电量模型/绍兴.py +++ b/各地级市日电量模型/绍兴.py @@ -5,6 +5,11 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] @@ -13,7 +18,7 @@ pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\绍兴数据(1).xlsx',index_col='dtdate') df.index = pd.to_datetime(df.index ,format='%Y-%m-%d') - +df['season'] = df.index.map(season) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) @@ -26,7 +31,7 @@ print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -37,8 +42,8 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] @@ -46,7 +51,7 @@ y = df_train['售电量'] # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=253) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -74,14 +79,20 @@ print(goal2) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'绍兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\绍兴.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'绍兴月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # 保存模型 model.save_model('shaoxing.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('shaoxing.bin') -model.predict(X_eval) +import numpy as np +X_eval = np.array([[24.7,15.9,23,0,0], + [24.4,16.2,23,1,0], + [23.6,15.1,23,1,0], + [24.7,15.3,23,0,0], + [24.9,14.6,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/舟山.py b/各地级市日电量模型/舟山.py index 3bd2dab..8535146 100644 --- a/各地级市日电量模型/舟山.py +++ b/各地级市日电量模型/舟山.py @@ -7,13 +7,17 @@ import matplotlib as mpl mpl.rcParams['font.sans-serif']=['kaiti'] import random import matplotlib.pyplot as plt - +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\舟山数据(1).xlsx',index_col='dtdate') df.index = pd.to_datetime(df.index,format='%Y-%m-%d') - +df['season'] = df.index.map(season) print(df.head()) df_eval = df.loc['2023-9'] @@ -21,7 +25,7 @@ df_train = df.loc['2021-1':'2023-8'] print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -32,11 +36,11 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=158) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -57,11 +61,17 @@ print(goal) goal2 = (result_eval['eval'][-23:].sum()-result_eval['pred'][-23:].sum())/result_eval['eval'].sum() print(goal2) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'舟山月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\舟山.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'舟山月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') model.save_model('zhoushan.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('zhoushan.bin') -model.predict(X_eval) +import numpy as np +X_eval = np.array([[22.6,18.7,23,0,0], + [21.6,17.9,23,1,0], + [21.9,18.2,23,1,0], + [20.7,18.2,23,0,0], + [22.3,18.0,23,0,0]]) +print(model.predict(X_eval)) diff --git a/各地级市日电量模型/衢州.py b/各地级市日电量模型/衢州.py index a1e7b93..948779a 100644 --- a/各地级市日电量模型/衢州.py +++ b/各地级市日电量模型/衢州.py @@ -5,6 +5,12 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 + mpl.rcParams['font.sans-serif']=['kaiti'] @@ -14,6 +20,7 @@ pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\衢州数据.xlsx') df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string') df.set_index('dtdate',inplace=True) +df['season'] = df.index.map(season) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) @@ -23,13 +30,13 @@ print(df.head()) # df_train = df[(df.index.str[:7]!='2023-09')&(df.index.str!='2023-08-29')&(df.index.str!='2023-08-30')&(df.index.str!='2023-08-31')] df_eval = df[df.index.str[:7]=='2023-07'] -# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -df_train = df[450:900] +df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] +# df_train = df[450:900] # max_8,min_8 = df_eval['售电量'].max(),df_eval['售电量'].min() print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] high = df['售电量'].describe()['75%'] + 1.5*IQR @@ -39,11 +46,11 @@ print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] < df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=102) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.1, n_estimators=150) model.fit(x_train,y_train) y_pred = model.predict(x_test) @@ -62,17 +69,24 @@ print(goal2) # print((result_eval['eval'].sum()-result_eval['pred'].sum())/result_eval['eval'].sum()) # print((result_eval['eval'].sum()-(result_eval['eval'][:-3].sum()+result_eval['pred'][-3:].sum()))/result_eval['eval'].sum()) -# -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'衢州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# # +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\衢州.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'衢州月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # # 保存模型 # model.save_model('quzhou.bin') -# loaded_model = xgb.XGBRegressor() -# loaded_model.load_model('quzhou.bin') -# model.predict(X_eval) +loaded_model = xgb.XGBRegressor() +loaded_model.load_model('quzhou.bin') +import numpy as np +X_eval = np.array([[24.0,15.6,23,0,0], + [24.2,17.1,23,1,0], + [22.6,16.7,23,1,0], + [23.5,15.5,23,0,0], + [24.5,13.9,23,0,0]]) + +print(model.predict(X_eval)) # from sklearn.ensemble import RandomForestRegressor diff --git a/各地级市日电量模型/金华.py b/各地级市日电量模型/金华.py index aa7061b..b99fe5e 100644 --- a/各地级市日电量模型/金华.py +++ b/各地级市日电量模型/金华.py @@ -5,6 +5,11 @@ from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split import matplotlib as mpl import matplotlib.pyplot as plt +def season(x): + if str(x)[5:7] in ('06','07','08','12','01','02'): + return 1 + else: + return 0 mpl.rcParams['font.sans-serif']=['kaiti'] @@ -14,19 +19,20 @@ pd.set_option('display.width',None) df = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\金华数据.xlsx') df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y-%m-%d').astype('string') df.set_index('dtdate',inplace=True) +df['season'] = df.index.map(season) plt.plot(range(len(df)),df['售电量']) plt.show() print(df.head()) df_eval = df[df.index.str[:7]=='2023-09'] -# df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] -df_train = df[500:850] +df_train = df[(df.index.str[:7]!='2023-08')&(df.index.str[:7]!='2023-09')] +# df_train = df[500:850] print(len(df_eval),len(df_train),len(df)) -df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] @@ -37,15 +43,15 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量' # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] -X = df_train[['tem_max','tem_min','holiday','24ST','rh','prs']] -X_eval = df_eval[['tem_max','tem_min','holiday','24ST','rh','prs']] +X = df_train[['tem_max','tem_min','holiday','24ST','season']] +X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] y = df_train['售电量'] # best_goal = 1 # best_i = {} # for i in range(400): -x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=396) +x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=142) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model.fit(x_train,y_train) @@ -70,13 +76,19 @@ print(goal2) # x = goal2 # print(best_i,best_goal,x) -result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv') -with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: - f.write(f'金华月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') +# result_eval.to_csv(r'C:\Users\user\Desktop\9月各地市日电量预测结果\金华.csv') +# with open(r'C:\Users\user\Desktop\9月各地市日电量预测结果\偏差率.txt','a',encoding='utf-8') as f: +# f.write(f'金华月末3天偏差率:{round(goal,5)},9号-月底偏差率:{round(goal2,5)}\n') # # 保存模型 # model.save_model('jinhua.bin') -# loaded_model = xgb.XGBRegressor() -# loaded_model.load_model('jinhua.bin') -# model.predict(X_eval) +loaded_model = xgb.XGBRegressor() +loaded_model.load_model('jinhua.bin') +import numpy as np +X_eval = np.array([[26.0,15.0,23,0,0], + [24.9,17.6,23,1,0], + [24.0,17.7,23,1,0], + [24.8,15.8,23,0,0], + [25.2,14.0,23,0,0]]) +print(model.predict(X_eval)) diff --git a/浙江电压等级电量/测试.py b/浙江电压等级电量/测试.py new file mode 100644 index 0000000..e1ed8e8 --- /dev/null +++ b/浙江电压等级电量/测试.py @@ -0,0 +1,16 @@ +import torch +import pandas as pd +from 电压等级_输出为5 import LSTM_Regression +from 电压等级_输出为5 import create_dataset +model = LSTM_Regression(10, 32, output_size=5, num_layers=2) +model.load_state_dict(torch.load('dy5.pth')) + +df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ') +df_eval.columns = df_eval.columns.map(lambda x:x.strip()) +df_eval.index = pd.to_datetime(df_eval.index) + +x,y = create_dataset(df_eval.loc['2023-10']['10kv以下'],10) +x = x.reshape(-1,1,10) +print(x.shape,y.shape) +x = torch.from_numpy(x).type(torch.float32) +print(model(x),y) \ No newline at end of file diff --git a/浙江电压等级电量/电压等级_输出为5.py b/浙江电压等级电量/电压等级_输出为5.py index 9767442..7ffbbac 100644 --- a/浙江电压等级电量/电压等级_输出为5.py +++ b/浙江电压等级电量/电压等级_输出为5.py @@ -17,8 +17,8 @@ class LSTM_Regression(nn.Module): self.fc = nn.Linear(hidden_size, output_size) def forward(self, _x): - x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) - s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) + x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 一批多少条样本 多少批样本 每一个样本的输入特征大小(10) + s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 经过lstm计算后输出为隐藏层大小 x = x.view(s * b, h) x = self.fc(x) x = x.view(s, b, -1) # 把形状改回来 @@ -52,110 +52,133 @@ def data_preprocessing(data): return data -# 拼接数据集 -file_dir = r'C:\Users\鸽子\Desktop\浙江各地市分电压日电量数据' -excel = os.listdir(file_dir)[0] +if __name__ == '__main__': + # 拼接数据集 + file_dir = r'C:\Users\user\Desktop\浙江各地市分电压日电量数据' + excel = os.listdir(file_dir)[0] -data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ') + data = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col=' stat_date ') -data = data_preprocessing(data) + data = data_preprocessing(data) + + df = data[data.columns[0]] + df.dropna(inplace = True) + dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN) + + for level in data.columns[1:]: + df = data[level] + df.dropna(inplace=True) + x, y = create_dataset(df, DAYS_FOR_TRAIN) + dataset_x = np.concatenate((dataset_x, x)) + dataset_y = np.concatenate((dataset_y, y)) + + + for excel in os.listdir(file_dir)[1:]: + data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ') + data = data_preprocessing(data) + + for level in data.columns: + df = data[level] + df.dropna(inplace=True) + x,y = create_dataset(df,DAYS_FOR_TRAIN) + dataset_x = np.concatenate((dataset_x,x)) + dataset_y = np.concatenate((dataset_y,y)) + + print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape) + + # 训练 + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # 标准化到0~1 + max_value = np.max(dataset_x) + min_value = np.min(dataset_x) + dataset_x = (dataset_x - min_value) / (max_value - min_value) + dataset_y = (dataset_y - min_value) / (max_value - min_value) + + # 划分训练集和测试集 + train_size = int(len(dataset_x)*0.7) + + train_x = dataset_x[:train_size] + train_y = dataset_y[:train_size] + + # 将数据改变形状,RNN 读入的数据维度是 (seq_size, batch_size, feature_size) + train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN) + train_y = train_y.reshape(-1, 1, 5) + + # 转为pytorch的tensor对象 + train_x = torch.from_numpy(train_x).to(device).type(torch.float32) + train_y = torch.from_numpy(train_y).to(device).type(torch.float32) + + model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=5, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等 + + + train_loss = [] + loss_function = nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) + + # for i in range(1500): + # out = model(train_x) + # loss = loss_function(out, train_y) + # loss.backward() + # optimizer.step() + # optimizer.zero_grad() + # train_loss.append(loss.item()) + # # print(loss) + # # 保存模型 + # torch.save(model.state_dict(),'dy5.pth') + + + model.load_state_dict(torch.load('dy5.pth')) + + # for test + model = model.eval() # 转换成测试模式 + # model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数 + dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size) + dataset_x = torch.from_numpy(dataset_x).to(device).type(torch.float32) + + pred_test = model(dataset_x) # 全量训练集 + + # 模型输出 (seq_size, batch_size, output_size) + pred_test = pred_test.view(-1) + pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy())) + + # plt.plot(pred_test.reshape(-1), 'r', label='prediction') + # plt.plot(dataset_y.reshape(-1), 'b', label='real') + # plt.plot((train_size*5, train_size*5), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出 + # plt.legend(loc='best') + # plt.show() -df = data[data.columns[0]] -df.dropna(inplace = True) -dataset_x, dataset_y = create_dataset(df, DAYS_FOR_TRAIN) -for level in data.columns[1:]: - df = data[level] - df.dropna(inplace=True) - x, y = create_dataset(df, DAYS_FOR_TRAIN) - dataset_x = np.concatenate((dataset_x, x)) - dataset_y = np.concatenate((dataset_y, y)) -for excel in os.listdir(file_dir)[1:]: - data = pd.read_excel(os.path.join(file_dir,excel), sheet_name=0,index_col=' stat_date ') - data = data_preprocessing(data) - for level in data.columns: - df = data[level] - df.dropna(inplace=True) - x,y = create_dataset(df,DAYS_FOR_TRAIN) - dataset_x = np.concatenate((dataset_x,x)) - dataset_y = np.concatenate((dataset_y,y)) - -print(dataset_x,dataset_y,dataset_x.shape,dataset_y.shape) - -# 训练 -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - -# 标准化到0~1 -max_value = np.max(dataset_x) -min_value = np.min(dataset_x) -dataset_x = (dataset_x - min_value) / (max_value - min_value) -dataset_y = (dataset_y - min_value) / (max_value - min_value) - -# 划分训练集和测试集 -train_size = len(dataset_x)*0.7 -train_x = dataset_x[:train_size] -train_y = dataset_y[:train_size] - -# 将数据改变形状,RNN 读入的数据维度是 (seq_size, batch_size, feature_size) -train_x = train_x.reshape(-1, 1, DAYS_FOR_TRAIN) -train_y = train_y.reshape(-1, 1, 5) - -# 转为pytorch的tensor对象 -train_x = torch.from_numpy(train_x).to(device) -train_y = torch.from_numpy(train_y).to(device) - -model = LSTM_Regression(DAYS_FOR_TRAIN, 32, output_size=3, num_layers=2).to(device) # 导入模型并设置模型的参数输入输出层、隐藏层等 - - -train_loss = [] -loss_function = nn.MSELoss() -optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) -for i in range(1500): - out = model(train_x) - loss = loss_function(out, train_y) - loss.backward() - optimizer.step() - optimizer.zero_grad() - train_loss.append(loss.item()) - # print(loss) -# 保存模型 -torch.save(model.state_dict(),'dy5.pth') - - -# for test -model = model.eval() # 转换成测试模式 -# model.load_state_dict(torch.load(os.path.join(model_save_dir,model_file))) # 读取参数 -dataset_x = dataset_x.reshape(-1, 1, DAYS_FOR_TRAIN) # (seq_size, batch_size, feature_size) -dataset_x = torch.from_numpy(dataset_x).to(device) - -pred_test = model(dataset_x) # 全量训练集 -# 模型输出 (seq_size, batch_size, output_size) -pred_test = pred_test.view(-1) -pred_test = np.concatenate((np.zeros(DAYS_FOR_TRAIN), pred_test.cpu().detach().numpy())) - -plt.plot(pred_test, 'r', label='prediction') -plt.plot(df, 'b', label='real') -plt.plot((train_size, train_size), (0, 1), 'g--') # 分割线 左边是训练数据 右边是测试数据的输出 -plt.legend(loc='best') -plt.show() # 创建测试集 # result_list = [] # 以x为基础实际数据,滚动预测未来3天 -# x = torch.from_numpy(df[-14:-4]).to(device) -# pred = model(x.reshape(-1,1,DAYS_FOR_TRAIN)).view(-1).detach().numpy() +df_eval = pd.read_excel(r'C:\Users\user\Desktop\浙江各地市分电压日电量数据\杭州.xlsx',index_col=' stat_date ') +df_eval.columns = df_eval.columns.map(lambda x:x.strip()) +df_eval.index = pd.to_datetime(df_eval.index) + +x,y = create_dataset(df_eval.loc['2023-7']['10kv以下'],10) + +x = (x - min_value) / (max_value - min_value) +x = x.reshape(-1,1,10) + +x = torch.from_numpy(x).type(torch.float32).to(device) +pred = model(x) # 反归一化 -# pred = pred * (max_value - min_value) + min_value +pred = pred * (max_value - min_value) + min_value # df = df * (max_value - min_value) + min_value -# print(pred) -# # 打印指标 +print(pred,y) +df = pd.DataFrame({'real':y.reshape(-1),'pred':pred.view(-1).cpu().detach().numpy()}) +df.to_csv('7月预测.csv',encoding='gbk') + +# 打印指标 # print(abs(pred - df[-3:]).mean() / df[-3:].mean()) # result_eight = pd.DataFrame({'pred': np.round(pred,1),'real': df[-3:]}) # target = (result_eight['pred'].sum() - result_eight['real'].sum()) / df[-31:].sum()