输出预测结果

1 year ago · 71bc236f76
parent e5ecbf7bb8
commit 71bc236f76
35 changed files with 442 additions and 105 deletions
--- a/各地级市日电量模型/hangzhou.bin
+++ b/各地级市日电量模型/hangzhou.bin
--- a/各地级市日电量模型/huzhou.bin
+++ b/各地级市日电量模型/huzhou.bin
--- a/各地级市日电量模型/jiaxing.bin
+++ b/各地级市日电量模型/jiaxing.bin
--- a/各地级市日电量模型/quzhou.bin
+++ b/各地级市日电量模型/quzhou.bin
--- a/各地级市日电量模型/taizhou.bin
+++ b/各地级市日电量模型/taizhou.bin
--- a/各地级市日电量模型/wenzhou.bin
+++ b/各地级市日电量模型/wenzhou.bin
--- a/各地级市日电量模型/zhoushan.bin
+++ b/各地级市日电量模型/zhoushan.bin
--- a/各地级市日电量模型/丽水.py
+++ b/各地级市日电量模型/丽水.py
@ -26,8 +26,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/丽水.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/丽水.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -51,10 +54,12 @@ df_eval = data.loc['2023-11']
 # df_train = data.loc['2021-1':'2023-8']
 df_train = data[450:-1]
 # df_train = data.loc['2022-4':'2023-9'][:-3]
-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+print(df_train.corr()['售电量'])
+
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']
 # best_goal = 1
 # best_i = {}
@ -85,9 +90,9 @@ print(goal2)
 import numpy as np

 X_eval = np.array([
-    [17.2, 5.7, 10, 0, 0],
-    [21.2, 4.3, 10, 0, 0],
-    [11.5, 6.6, 10, 0, 0]
+    [17.2, 5.7, 10, 0, 0,2023],
+    [21.2, 4.3, 10, 0, 0,2023],
+    [11.5, 6.6, 10, 0, 0,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/台州.py
+++ b/各地级市日电量模型/台州.py
@ -22,8 +22,11 @@ def normal(nd):
    return nd[(nd<high)&(nd>low)]

 parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
-data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx'),index_col='dtdate')
-data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -47,10 +50,10 @@ df_train = data[500:-1]
 # df_train = data[500:][:-3]
 print(df_train)

-df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season','year']]

-X = df_train[['tem_max','tem_min','24ST','holiday','season']]
-X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
+X = df_train[['tem_max','tem_min','24ST','holiday','season','year']]
+X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season','year']]
 y = df_train['售电量']
 # best_goal = 1
 # best_i = {}
@ -85,9 +88,9 @@ model.save_model('taizhou.bin')
 import numpy as np

 X_eval = np.array([
-    [18.8, 6.2, 10, 0, 0],
-    [21.7, 6.5, 10, 0, 0],
-    [14.3, 8.4, 10, 0, 0]
+    [18.8, 6.2, 10, 0, 0,2023],
+    [21.7, 6.5, 10, 0, 0,2023],
+    [14.3, 8.4, 10, 0, 0,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/嘉兴.py
+++ b/各地级市日电量模型/嘉兴.py
@ -26,8 +26,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/嘉兴.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/嘉兴.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -47,12 +50,13 @@ data['season'] = data.index.map(season)
 df_eval = data.loc['2023-11']
 df_train = data.iloc[450:-1]
 # df_train = data[450:][:-3]
+
 print(df_train)

-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 # best_goal = 1
@ -89,9 +93,9 @@ loaded_model.load_model('jiaxing.bin')
 import numpy as np

 X_eval = np.array([
-    [14.5, 7.7, 10, 0, 1],
-    [18.2, 7.8, 10, 0, 1],
-    [11.9, 6.6, 10, 0, 1]
+    [14.5, 7.7, 10, 0, 1,2023],
+    [21.6, 10, 10, 0, 1,2023],
+    [11.9, 6.6, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/宁波.py
+++ b/各地级市日电量模型/宁波.py
@ -26,8 +26,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/宁波.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/宁波.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -50,10 +53,10 @@ df_eval = data.loc['2023-11']

 df_train = data.loc['2022-01':'2023-11']

-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 # best_goal = 1
@ -86,9 +89,9 @@ import numpy as np

 X_eval = np.array([

-    [16.5, 6.8, 10, 0, 1],
-    [21.7, 6.8, 10, 0, 1],
-    [13, 8.8, 10, 0, 1]
+    [16.5, 6.8, 10, 0, 1,2023],
+    [21.7, 6.8, 10, 0, 1,2023],
+    [13, 8.8, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/杭州.py
+++ b/各地级市日电量模型/杭州.py
@ -34,8 +34,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -61,8 +64,8 @@ df_train = data[500:-1]
 df_eval = data.loc['2023-11']

 print(df_train)
-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 # best_goal = 1
@ -115,9 +118,9 @@ model.save_model('hangzhou.bin')
 loaded_model = xgb.XGBRegressor()
 loaded_model.load_model('hangzhou.bin')
 X_eval = np.array([
-    [17.2, 5.7, 10, 0, 0],
-    [21.2, 4.3, 10, 0, 0],
-    [11.5, 6.6, 10, 0, 0]
+    [17.2, 5.7, 10, 0, 0,2023],
+    [21.2, 4.3, 10, 0, 0,2023],
+    [11.5, 6.6, 10, 0, 0,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/温州.py
+++ b/各地级市日电量模型/温州.py
@ -26,8 +26,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/温州.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/温州.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -52,10 +55,10 @@ df_train = data[450:]
 # df_train = data[450:][:-3]
 print(df_train)

-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 # best_goal = 1
@ -95,9 +98,9 @@ loaded_model.load_model('wenzhou.bin')
 import numpy as np

 X_eval = np.array([
-    [19.8, 6.6, 10, 0, 1],
-    [22, 6.1, 10, 0, 1],
-    [18.5, 10.1, 10, 0, 1]
+    [19.8, 6.6, 10, 0, 1,2023],
+    [22, 6.1, 10, 0, 1,2023],
+    [18.5, 10.1, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/湖州.py
+++ b/各地级市日电量模型/湖州.py
@ -27,8 +27,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/湖州.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/湖州.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -52,10 +55,10 @@ df_eval = data.loc['2023-11']
 df_train = data[450:-1]
 # df_train = data[450:][:-3]
 print(df_train)
-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=158)
@ -84,9 +87,9 @@ import numpy as np

 X_eval = np.array([

-    [14.9, 7.1, 10, 0, 1],
-    [17.7, 6.6, 10, 0, 1],
-    [10.3, 5.8, 10, 0, 1]
+    [14.9, 7.1, 10, 0, 1,2023],
+    [17.7, 6.6, 10, 0, 1,2023],
+    [10.3, 5.8, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/绍兴.py
+++ b/各地级市日电量模型/绍兴.py
@ -26,8 +26,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/绍兴.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/绍兴.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+# data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -51,10 +54,10 @@ df_train = data[450:]
 # df_train = data[450:][:-3]
 print(df_train)

-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]

-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@ -84,9 +87,9 @@ print(result_eval)
 import numpy as np

 X_eval = np.array([
-    [17.4, 6.6, 10, 0, 0],
-    [21.2, 7, 10, 0, 0],
-    [12.1, 7.3, 10, 0, 0]
+    [17.4, 6.6, 10, 0, 0,2023],
+    [21.2, 7, 10, 0, 0,2023],
+    [12.1, 7.3, 10, 0, 0,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/舟山.py
+++ b/各地级市日电量模型/舟山.py
@ -25,10 +25,11 @@ def normal(nd):


 parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-data = pd.read_excel(os.path.join(parent_dir, '入模数据/舟山.xlsx'), index_col='dtdate')
-data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir, '入模数据/舟山.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]
-
 # list2 = []
 # list0 = []
 # list1 = []
@ -49,9 +50,9 @@ df_eval = data.loc['2023-11']
 df_train = data.iloc[450:]
 # df_train = data.iloc[450:][:-3]

-df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']]
-X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
-X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']]
+df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']]
+X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
+X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']]
 y = df_train['售电量']

 x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@ -80,9 +81,9 @@ loaded_model.load_model('zhoushan.bin')
 import numpy as np

 X_eval = np.array([
-    [14.7, 11.4, 10, 0, 1],
-    [19.4, 11.8, 10, 0, 1],
-    [14.9, 9.4, 10, 0, 1]
+    [14.7, 11.4, 10, 0, 1,2023],
+    [19.4, 11.8, 10, 0, 1,2023],
+    [14.9, 9.4, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/各地级市日电量模型/衢州.py
+++ b/各地级市日电量模型/衢州.py
@ -21,8 +21,11 @@ def normal(nd):
    return nd[(nd<high)&(nd>low)]

 parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
-data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx'),index_col='dtdate')
-data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
+data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx'))
+data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d')
+data['year'] = data['dtdate'].dt.year
+
+data.set_index('dtdate',inplace=True)
 data = data.loc[normal(data['售电量']).index]

 # list2 = []
@ -46,10 +49,10 @@ df_train = data.iloc[450:]
 # df_train = data.iloc[450:-3]


-df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']]
+df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season','year']]

-X = df_train[['tem_max','tem_min','24ST','holiday','season']]
-X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']]
+X = df_train[['tem_max','tem_min','24ST','holiday','season','year']]
+X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season','year']]
 y = df_train['售电量']

 x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
@ -74,9 +77,9 @@ loaded_model = xgb.XGBRegressor()
 loaded_model.load_model('quzhou.bin')
 import numpy as np
 X_eval = np.array([
-    [18.7, 7, 10, 0, 1],
-    [20.2, 6.5, 10, 0, 1],
-    [11.2, 8, 10, 0, 1]
+    [18.7, 7, 10, 0, 1,2023],
+    [20.2, 6.5, 10, 0, 1,2023],
+    [11.2, 8, 10, 0, 1,2023]
 ])
 print(model.predict(X_eval))
 result = model.predict(X_eval)
--- a/文档处理.py
+++ b/文档处理.py
@ -89,22 +89,113 @@ file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
 #                             if_sheet_exists='replace') as writer:
 #             df_result.to_excel(writer, sheet_name=f'{sheet}')

-df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date')
-print(df.columns)
+# df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date')
+# print(df.columns)
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 import matplotlib.dates as mdates
-date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
-mpl.rcParams['font.sans-serif']=['kaiti']
-print(df['4.有色金属矿采选业'][:-1])
-plt.figure(figsize=(10, 6))
-plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1])
+# date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
+# mpl.rcParams['font.sans-serif']=['kaiti']
+# print(df['4.有色金属矿采选业'][:-1])
+# plt.figure(figsize=(10, 6))
+# plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1])
+#
+# plt.title(f'4.有色金属矿采选业')
+# plt.gcf().autofmt_xdate()
+# plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120))
+#
+# plt.xticks(rotation=45)
+# plt.xlabel('时间')
+# plt.ylabel('数值')
+# plt.show()
+
+
+excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
+df_city_real = pd.read_excel(excel_file,sheet_name=0)
+df_city_real = df_city_real[df_city_real['county_name'].isnull()]
+df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
+# print(df_city_real)
+
+file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
+print(os.listdir(file_dir))
+
+# 区域明细及偏差率统计
+
+city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2]))
+# for city in df_city_real['city_name'].drop_duplicates():
+#     df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期')
+#     df_city_pred.index = pd.to_datetime(df_city_pred.index)
+#     df_real = df_city_real[df_city_real['city_name']==city].set_index('pt_date')['power_sal']
+#     df_real.index = pd.to_datetime(df_real.index)
+#     df_city_pred.loc['2023-11-27'] = df_real.loc['2023-11-27']
+#
+#     result = pd.DataFrame(df_real).join(df_city_pred)
+#     result.columns = ['实际值','预测值']
+#     result['偏差率'] = (result['实际值'] - result['预测值'])/result['实际值']
+#     result['指标'] = (df_real.values.sum()-df_city_pred.values.sum())/df_real.values.sum()
+#     result['偏差率'][:27] = 0
+#     print(result)
+#     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\区域电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+#         result.to_excel(writer,sheet_name=f'{city}')
+
+
+
+    # pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
+    # df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
+
+city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
+excel_file1 = pd.ExcelFile(city_volt_file)

-plt.title(f'4.有色金属矿采选业')
-plt.gcf().autofmt_xdate()
-plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120))
+# for sheet_name in excel_file1.sheet_names[1:]:
+#     print(sheet_name)
+#     pred_volt_df = pd.read_excel(excel_file1,sheet_name=sheet_name).dropna()
+#
+#     pred_volt_df.set_index(pred_volt_df.columns[0],inplace=True)
+#     real_volt_df = pd.read_excel(excel_file,sheet_name=1).set_index('pt_date')
+#
+#     real_volt_df = real_volt_df[(real_volt_df['county_name'].isnull())&(real_volt_df['city_name'].str[4:6]==sheet_name)].drop(columns=['county_name','500kv(含330kv)以上'])
+#
+#     result = pd.DataFrame({'实际值':list(real_volt_df.sum()[1:]),
+#                            '预测值':list(pred_volt_df.sum()[1:]),
+#         '偏差':list(real_volt_df.sum()[1:] - pred_volt_df.sum()[1:])},index=real_volt_df.sum()[1:].index)
+#     result['指标'] = result['偏差']/real_volt_df.sum()[1:]
+#
+#
+#     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
+#         result.to_excel(wirter,sheet_name=f'{sheet_name}')
+
+industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
+# for sheet_name in industry_file.sheet_names[1:]:
+#
+#     pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True)
+#     pred_industry_df[pred_industry_df.columns[0]] = pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')
+#     pred_industry_df.set_index(pred_industry_df.columns[0],inplace=True)
+#
+#     real_industry_df = pd.read_excel(excel_file,sheet_name=2).set_index('stat_date')
+#     real_industry_df['city_name'] = real_industry_df['city_name'].str[4:6]
+#     real_industry_df = real_industry_df[real_industry_df['city_name']==sheet_name[:2]].drop(columns=['city_name']).iloc[:30]
+#     print(sheet_name[:2])
+#     print(pd.DataFrame(real_industry_df.sum(),columns=['真实值']))
+#
+#
+#     result = pd.DataFrame(real_industry_df.sum(),columns=['真实值']).join(pd.DataFrame(pred_industry_df.sum(),columns=['预测值']))
+#     print(result)
+#     result['偏差'] = result['真实值'] - result['预测值']
+#     result['指标'] = result['偏差']/result['真实值']
+#
+#     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
+#         result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}')

-plt.xticks(rotation=45)
-plt.xlabel('时间')
-plt.ylabel('数值')
-plt.show()
+e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
+df1 = pd.read_excel(e1,sheet_name=1)
+df1.set_index(df1.columns[0],inplace=True)
+for sheet_name in industry_file.sheet_names[2:]:
+    df2 = pd.read_excel(e1,sheet_name=sheet_name)
+    df2 = df2.set_index(df2.columns[0])
+    df1 += df2
+df1['偏差'] = df1['真实值']-df1['预测值']
+df1['偏差率'] = df1['偏差']/df1['真实值']
+df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
+# writer = pd.ExcelWriter(e1,engine='openpyxl')
+# df1.to_excel(writer,sheet_name=0)
+print(df1)
--- a/浙江电压等级电量/prophet_分压电量_10kv.py
+++ b/浙江电压等级电量/prophet_分压电量_10kv.py
@ -0,0 +1,61 @@
+from prophet import Prophet
+import pandas as pd
+import os
+import numpy as np
+
+
+def normal(data):
+    high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    return (data<=high)&(data>=low)
+
+
+file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据'
+
+for city in os.listdir(file_dir):
+    df_city = pd.read_excel(os.path.join(file_dir, city))
+    # df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
+    df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
+    list_goal = []
+    list_industry = []
+    result_dict = {}
+    for level in df_city.columns[2:]:
+        s1 = df_city[['stat_date', level]]
+
+        ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
+
+        ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True)
+
+        df_train = ds_train.copy()
+        df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
+
+
+        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
+        model.add_country_holidays(country_name="CN")
+        model.fit(df_train)
+        future = model.make_future_dataframe(periods=3, freq='D')
+
+        predict = model.predict(future)
+        print(city[1:3],level)
+
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
+        ds_train.rename(columns={'y':'售电量'},inplace=True)
+
+        result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
+        result_dict[level] = list(result['售电量'])
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+        pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3])
+
+        # df = predict.join(s1.set_index('ds')).loc['2023-8']
+        # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
+        # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
+        # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
+        # list_industry.append(industry)
+
+    # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
+    # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
+    #
+    # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
+    #     f.write(f'{city[:2]}\n')
+    #     df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
--- a/浙江电压等级电量/prophet_分压电量_220v.py
+++ b/浙江电压等级电量/prophet_分压电量_220v.py
@ -0,0 +1,61 @@
+from prophet import Prophet
+import pandas as pd
+import os
+import numpy as np
+
+
+def normal(data):
+    high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    return (data<=high)&(data>=low)
+
+
+file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据'
+
+for city in os.listdir(file_dir):
+    df_city = pd.read_excel(os.path.join(file_dir, city))
+    # df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
+    df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
+    list_goal = []
+    list_industry = []
+    result_dict = {}
+    for level in df_city.columns[2:]:
+        s1 = df_city[['stat_date', level]]
+
+        ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
+
+        ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True)
+
+        df_train = ds_train.copy()
+        df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
+
+
+        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
+        model.add_country_holidays(country_name="CN")
+        model.fit(df_train)
+        future = model.make_future_dataframe(periods=3, freq='D')
+
+        predict = model.predict(future)
+        print(city[1:3],level)
+
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
+        ds_train.rename(columns={'y':'售电量'},inplace=True)
+
+        result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
+        result_dict[level] = list(result['售电量'])
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+        pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3])
+
+        # df = predict.join(s1.set_index('ds')).loc['2023-8']
+        # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
+        # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
+        # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
+        # list_industry.append(industry)
+
+    # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
+    # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
+    #
+    # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
+    #     f.write(f'{city[:2]}\n')
+    #     df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
--- a/浙江电压等级电量/分压_移动平均.py
+++ b/浙江电压等级电量/分压_移动平均.py
@ -0,0 +1,47 @@
+import pandas as pd
+df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=1)
+df['pt_date'] = pd.to_datetime(df['pt_date'])
+
+# 移动平均
+
+dict_big = {}
+dict_ok = {}
+
+# for city in df['city_name'].drop_duplicates():
+#
+#     df_city1 = df[(df['city_name'] == city) & (df['county_name'].isnull())].set_index('pt_date').loc['2023-11']
+#     resut_df = pd.DataFrame({})
+#     index_level = []
+#     tq_list = []
+#     pred_list = []
+#     loss_list = []
+#     rate_list = []
+#     for level in df_city1.columns[2:]:
+#
+#         index_level.append(level)
+#
+#         df_moving_avg = pd.DataFrame(df_city1[:-3][level], index=df_city1[:-3].index)
+#         future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D')
+#
+#         for date in future:
+#             df_moving_avg.loc[date, level] = df_moving_avg[-3:].mean().values
+#         loss = (df_city1[level].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[level].sum()
+#         tq_list.append(df_city1[level].sum())
+#         pred_list.append(df_moving_avg[level].sum())
+#         loss_list.append(df_city1[level].sum()-df_moving_avg[level].sum())
+#         rate_list.append((df_city1[level].sum()-df_moving_avg[level].sum())/df_city1[level].sum())
+#     resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_level)
+#     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+#         resut_df.to_excel(writer,sheet_name=f'{city[4:6]}')
+
+excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx')
+df1 = pd.read_excel(excel_file,sheet_name=1)
+df1.set_index(df1.columns[0],inplace=True)
+for sheet in excel_file.sheet_names[2:]:
+    df = pd.read_excel(excel_file,sheet_name=sheet)
+    df.set_index(df.columns[0],inplace=True)
+    df1 += df
+df1['偏差'] = df1['同期电量']-df1['预测电量']
+df1['偏差率'] = df1['偏差']/df1['同期电量']
+df1.to_excel('移动平均_11月分压汇总.xlsx')
+print(df1)
--- a/浙江电压等级电量/电压等级_输入10_输出3.py
+++ b/浙江电压等级电量/电压等级_输入10_输出3.py
@ -87,7 +87,7 @@ def data_preprocessing(data):

 # # 训练
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-#
+
 # # 标准化到0~1
 # max_value = np.max(dataset_x)
 # min_value = np.min(dataset_x)
--- a/浙江行业电量/prophet_行业电量.py
+++ b/浙江行业电量/prophet_行业电量.py
@ -12,6 +12,7 @@ def normal(data):


 file_dir = './浙江各地市行业电量数据'
+# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
 for city in os.listdir(file_dir):
    df_city = pd.read_excel(os.path.join(file_dir, city))
    df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
@ -19,13 +20,16 @@ for city in os.listdir(file_dir):
    list_goal = []
    list_industry = []
    result_dict = {}
-    for industry in df_city.columns[2:]:
+    for industry in df_city.columns[3:]:
        s1 = df_city[['stat_date', industry]]
-        df_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')].sort_values(by='stat_date')
-        df_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)

+        ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
+
+        ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
+
+        df_train = ds_train.copy()
        df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
-        # df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
+

        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
        model.add_country_holidays(country_name="CN")
@ -33,16 +37,16 @@ for city in os.listdir(file_dir):
        future = model.make_future_dataframe(periods=3, freq='D')

        predict = model.predict(future)
-        print(city,industry)
+        print(city[:2],industry)

-        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-10'].rename(columns={'yhat':'售电量'})
-        df_train.rename(columns={'y':'售电量'},inplace=True)
-        result = pd.concat((df_train.set_index('ds').loc['2023-10'][:28],predict[-3:]))
-        result_dict[industry] = list(result['售电量'])
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
+        ds_train.rename(columns={'y':'售电量'},inplace=True)

+        result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
+        result_dict[industry] = list(result['售电量'])

    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
-        pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-10-01', end=f'2023-10-31', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city)
+        pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])

        # df = predict.join(s1.set_index('ds')).loc['2023-8']
        # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
--- a/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/台州.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/台州.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/温州.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/温州.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx
--- a/浙江行业电量/浙江各地市行业电量数据/金华.xlsx
+++ b/浙江行业电量/浙江各地市行业电量数据/金华.xlsx
--- a/浙江行业电量/移动平均.py
+++ b/浙江行业电量/移动平均.py
@ -0,0 +1,42 @@
+import pandas as pd
+
+df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=2)
+df['stat_date'] = pd.to_datetime(df['stat_date'])
+
+
+# 移动平均
+city = df['city_name'].iloc[0]
+print(city)
+df_city1 = df[df['city_name'] == city].set_index('stat_date').loc['2023-11']
+
+dict_big = {}
+dict_ok = {}
+resut_df = pd.DataFrame({})
+index_industry = []
+tq_list = []
+pred_list = []
+loss_list = []
+rate_list = []
+for industry in df_city1.columns[1:]:
+    index_industry.append(industry)
+
+    df_moving_avg = pd.DataFrame(df_city1[:-3][industry], index=df_city1[:-3].index)
+    future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D')
+
+    for date in future:
+        df_moving_avg.loc[date, industry] = df_moving_avg[-3:].mean().values
+    loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
+    tq_list.append(df_city1[industry].sum())
+    pred_list.append(df_moving_avg[industry].sum())
+    loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
+    rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
+resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
+print(resut_df)
+resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
+
+#     if loss.values >= 0.005:
+#         dict_big[industry] = loss.values[0]
+#     else:
+#         dict_ok[industry] = loss.values[0]
+# print(len(dict_ok))
+# print(len(dict_big))