diff --git a/各地级市日电量模型/hangzhou.bin b/各地级市日电量模型/hangzhou.bin index c7a269b..8a59454 100644 Binary files a/各地级市日电量模型/hangzhou.bin and b/各地级市日电量模型/hangzhou.bin differ diff --git a/各地级市日电量模型/huzhou.bin b/各地级市日电量模型/huzhou.bin index 40d0b22..a64fe00 100644 Binary files a/各地级市日电量模型/huzhou.bin and b/各地级市日电量模型/huzhou.bin differ diff --git a/各地级市日电量模型/jiaxing.bin b/各地级市日电量模型/jiaxing.bin index c8d2cce..7714ae6 100644 Binary files a/各地级市日电量模型/jiaxing.bin and b/各地级市日电量模型/jiaxing.bin differ diff --git a/各地级市日电量模型/quzhou.bin b/各地级市日电量模型/quzhou.bin index 43d3c86..e0fa054 100644 Binary files a/各地级市日电量模型/quzhou.bin and b/各地级市日电量模型/quzhou.bin differ diff --git a/各地级市日电量模型/taizhou.bin b/各地级市日电量模型/taizhou.bin index 7836859..682d1ee 100644 Binary files a/各地级市日电量模型/taizhou.bin and b/各地级市日电量模型/taizhou.bin differ diff --git a/各地级市日电量模型/wenzhou.bin b/各地级市日电量模型/wenzhou.bin index ee46301..7375758 100644 Binary files a/各地级市日电量模型/wenzhou.bin and b/各地级市日电量模型/wenzhou.bin differ diff --git a/各地级市日电量模型/zhoushan.bin b/各地级市日电量模型/zhoushan.bin index fafc41d..d752769 100644 Binary files a/各地级市日电量模型/zhoushan.bin and b/各地级市日电量模型/zhoushan.bin differ diff --git a/各地级市日电量模型/丽水.py b/各地级市日电量模型/丽水.py index 4010981..e5cb964 100644 --- a/各地级市日电量模型/丽水.py +++ b/各地级市日电量模型/丽水.py @@ -26,8 +26,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/丽水.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/丽水.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -51,10 +54,12 @@ df_eval = data.loc['2023-11'] # df_train = data.loc['2021-1':'2023-8'] df_train = data[450:-1] # df_train = data.loc['2022-4':'2023-9'][:-3] -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +print(df_train.corr()['售电量']) + +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] # best_goal = 1 # best_i = {} @@ -85,9 +90,9 @@ print(goal2) import numpy as np X_eval = np.array([ - [17.2, 5.7, 10, 0, 0], - [21.2, 4.3, 10, 0, 0], - [11.5, 6.6, 10, 0, 0] + [17.2, 5.7, 10, 0, 0,2023], + [21.2, 4.3, 10, 0, 0,2023], + [11.5, 6.6, 10, 0, 0,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/台州.py b/各地级市日电量模型/台州.py index ff2296f..679a1ce 100644 --- a/各地级市日电量模型/台州.py +++ b/各地级市日电量模型/台州.py @@ -22,8 +22,11 @@ def normal(nd): return nd[(ndlow)] parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) -data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx'),index_col='dtdate') -data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir,'入模数据/台州.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -47,10 +50,10 @@ df_train = data[500:-1] # df_train = data[500:][:-3] print(df_train) -df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season','year']] -X = df_train[['tem_max','tem_min','24ST','holiday','season']] -X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] +X = df_train[['tem_max','tem_min','24ST','holiday','season','year']] +X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season','year']] y = df_train['售电量'] # best_goal = 1 # best_i = {} @@ -85,9 +88,9 @@ model.save_model('taizhou.bin') import numpy as np X_eval = np.array([ - [18.8, 6.2, 10, 0, 0], - [21.7, 6.5, 10, 0, 0], - [14.3, 8.4, 10, 0, 0] + [18.8, 6.2, 10, 0, 0,2023], + [21.7, 6.5, 10, 0, 0,2023], + [14.3, 8.4, 10, 0, 0,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/嘉兴.py b/各地级市日电量模型/嘉兴.py index c2bcc78..2205735 100644 --- a/各地级市日电量模型/嘉兴.py +++ b/各地级市日电量模型/嘉兴.py @@ -26,8 +26,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/嘉兴.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/嘉兴.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -47,12 +50,13 @@ data['season'] = data.index.map(season) df_eval = data.loc['2023-11'] df_train = data.iloc[450:-1] # df_train = data[450:][:-3] + print(df_train) -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] # best_goal = 1 @@ -89,9 +93,9 @@ loaded_model.load_model('jiaxing.bin') import numpy as np X_eval = np.array([ - [14.5, 7.7, 10, 0, 1], - [18.2, 7.8, 10, 0, 1], - [11.9, 6.6, 10, 0, 1] + [14.5, 7.7, 10, 0, 1,2023], + [21.6, 10, 10, 0, 1,2023], + [11.9, 6.6, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/宁波.py b/各地级市日电量模型/宁波.py index a2e25f1..19d4306 100644 --- a/各地级市日电量模型/宁波.py +++ b/各地级市日电量模型/宁波.py @@ -26,8 +26,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/宁波.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/宁波.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -50,10 +53,10 @@ df_eval = data.loc['2023-11'] df_train = data.loc['2022-01':'2023-11'] -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] # best_goal = 1 @@ -86,9 +89,9 @@ import numpy as np X_eval = np.array([ - [16.5, 6.8, 10, 0, 1], - [21.7, 6.8, 10, 0, 1], - [13, 8.8, 10, 0, 1] + [16.5, 6.8, 10, 0, 1,2023], + [21.7, 6.8, 10, 0, 1,2023], + [13, 8.8, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/杭州.py b/各地级市日电量模型/杭州.py index 2a90ee5..e740cbf 100644 --- a/各地级市日电量模型/杭州.py +++ b/各地级市日电量模型/杭州.py @@ -34,8 +34,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/杭州.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -61,8 +64,8 @@ df_train = data[500:-1] df_eval = data.loc['2023-11'] print(df_train) -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] # best_goal = 1 @@ -115,9 +118,9 @@ model.save_model('hangzhou.bin') loaded_model = xgb.XGBRegressor() loaded_model.load_model('hangzhou.bin') X_eval = np.array([ - [17.2, 5.7, 10, 0, 0], - [21.2, 4.3, 10, 0, 0], - [11.5, 6.6, 10, 0, 0] + [17.2, 5.7, 10, 0, 0,2023], + [21.2, 4.3, 10, 0, 0,2023], + [11.5, 6.6, 10, 0, 0,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/温州.py b/各地级市日电量模型/温州.py index dd25434..d089df9 100644 --- a/各地级市日电量模型/温州.py +++ b/各地级市日电量模型/温州.py @@ -26,8 +26,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/温州.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/温州.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -52,10 +55,10 @@ df_train = data[450:] # df_train = data[450:][:-3] print(df_train) -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] # best_goal = 1 @@ -95,9 +98,9 @@ loaded_model.load_model('wenzhou.bin') import numpy as np X_eval = np.array([ - [19.8, 6.6, 10, 0, 1], - [22, 6.1, 10, 0, 1], - [18.5, 10.1, 10, 0, 1] + [19.8, 6.6, 10, 0, 1,2023], + [22, 6.1, 10, 0, 1,2023], + [18.5, 10.1, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/湖州.py b/各地级市日电量模型/湖州.py index 6575bee..8f685a6 100644 --- a/各地级市日电量模型/湖州.py +++ b/各地级市日电量模型/湖州.py @@ -27,8 +27,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/湖州.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/湖州.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -52,10 +55,10 @@ df_eval = data.loc['2023-11'] df_train = data[450:-1] # df_train = data[450:][:-3] print(df_train) -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=158) @@ -84,9 +87,9 @@ import numpy as np X_eval = np.array([ - [14.9, 7.1, 10, 0, 1], - [17.7, 6.6, 10, 0, 1], - [10.3, 5.8, 10, 0, 1] + [14.9, 7.1, 10, 0, 1,2023], + [17.7, 6.6, 10, 0, 1,2023], + [10.3, 5.8, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/绍兴.py b/各地级市日电量模型/绍兴.py index e5fa10e..915af09 100644 --- a/各地级市日电量模型/绍兴.py +++ b/各地级市日电量模型/绍兴.py @@ -26,8 +26,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/绍兴.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/绍兴.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +# data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -51,10 +54,10 @@ df_train = data[450:] # df_train = data[450:][:-3] print(df_train) -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -84,9 +87,9 @@ print(result_eval) import numpy as np X_eval = np.array([ - [17.4, 6.6, 10, 0, 0], - [21.2, 7, 10, 0, 0], - [12.1, 7.3, 10, 0, 0] + [17.4, 6.6, 10, 0, 0,2023], + [21.2, 7, 10, 0, 0,2023], + [12.1, 7.3, 10, 0, 0,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/舟山.py b/各地级市日电量模型/舟山.py index 86f2f6d..520fc49 100644 --- a/各地级市日电量模型/舟山.py +++ b/各地级市日电量模型/舟山.py @@ -25,10 +25,11 @@ def normal(nd): parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) -data = pd.read_excel(os.path.join(parent_dir, '入模数据/舟山.xlsx'), index_col='dtdate') -data.index = pd.to_datetime(data.index, format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir, '入模数据/舟山.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] - # list2 = [] # list0 = [] # list1 = [] @@ -49,9 +50,9 @@ df_eval = data.loc['2023-11'] df_train = data.iloc[450:] # df_train = data.iloc[450:][:-3] -df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season']] -X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] -X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season']] +df_train = df_train[['tem_max', 'tem_min', 'holiday', '24ST', '售电量', 'season','year']] +X = df_train[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] +X_eval = df_eval[['tem_max', 'tem_min', '24ST', 'holiday', 'season','year']] y = df_train['售电量'] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) @@ -80,9 +81,9 @@ loaded_model.load_model('zhoushan.bin') import numpy as np X_eval = np.array([ - [14.7, 11.4, 10, 0, 1], - [19.4, 11.8, 10, 0, 1], - [14.9, 9.4, 10, 0, 1] + [14.7, 11.4, 10, 0, 1,2023], + [19.4, 11.8, 10, 0, 1,2023], + [14.9, 9.4, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/各地级市日电量模型/衢州.py b/各地级市日电量模型/衢州.py index 1b8b13b..414ef87 100644 --- a/各地级市日电量模型/衢州.py +++ b/各地级市日电量模型/衢州.py @@ -21,8 +21,11 @@ def normal(nd): return nd[(ndlow)] parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) -data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx'),index_col='dtdate') -data.index = pd.to_datetime(data.index,format='%Y-%m-%d') +data = pd.read_excel(os.path.join(parent_dir,'入模数据/衢州.xlsx')) +data['dtdate'] = pd.to_datetime(data['dtdate'],format='%Y-%m-%d') +data['year'] = data['dtdate'].dt.year + +data.set_index('dtdate',inplace=True) data = data.loc[normal(data['售电量']).index] # list2 = [] @@ -46,10 +49,10 @@ df_train = data.iloc[450:] # df_train = data.iloc[450:-3] -df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season']] +df_train = df_train[['tem_max','tem_min','holiday','24ST','售电量','season','year']] -X = df_train[['tem_max','tem_min','24ST','holiday','season']] -X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season']] +X = df_train[['tem_max','tem_min','24ST','holiday','season','year']] +X_eval = df_eval[['tem_max','tem_min','24ST','holiday','season','year']] y = df_train['售电量'] x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) @@ -74,9 +77,9 @@ loaded_model = xgb.XGBRegressor() loaded_model.load_model('quzhou.bin') import numpy as np X_eval = np.array([ - [18.7, 7, 10, 0, 1], - [20.2, 6.5, 10, 0, 1], - [11.2, 8, 10, 0, 1] + [18.7, 7, 10, 0, 1,2023], + [20.2, 6.5, 10, 0, 1,2023], + [11.2, 8, 10, 0, 1,2023] ]) print(model.predict(X_eval)) result = model.predict(X_eval) diff --git a/文档处理.py b/文档处理.py index 24df157..6395243 100644 --- a/文档处理.py +++ b/文档处理.py @@ -89,22 +89,113 @@ file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' # if_sheet_exists='replace') as writer: # df_result.to_excel(writer, sheet_name=f'{sheet}') -df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date') -print(df.columns) +# df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date') +# print(df.columns) import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.dates as mdates -date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D') -mpl.rcParams['font.sans-serif']=['kaiti'] -print(df['4.有色金属矿采选业'][:-1]) -plt.figure(figsize=(10, 6)) -plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1]) +# date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D') +# mpl.rcParams['font.sans-serif']=['kaiti'] +# print(df['4.有色金属矿采选业'][:-1]) +# plt.figure(figsize=(10, 6)) +# plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1]) +# +# plt.title(f'4.有色金属矿采选业') +# plt.gcf().autofmt_xdate() +# plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120)) +# +# plt.xticks(rotation=45) +# plt.xlabel('时间') +# plt.ylabel('数值') +# plt.show() + + +excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') +df_city_real = pd.read_excel(excel_file,sheet_name=0) +df_city_real = df_city_real[df_city_real['county_name'].isnull()] +df_city_real['city_name'] = df_city_real['city_name'].str[4:6] +# print(df_city_real) + +file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129' +print(os.listdir(file_dir)) + +# 区域明细及偏差率统计 + +city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2])) +# for city in df_city_real['city_name'].drop_duplicates(): +# df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期') +# df_city_pred.index = pd.to_datetime(df_city_pred.index) +# df_real = df_city_real[df_city_real['city_name']==city].set_index('pt_date')['power_sal'] +# df_real.index = pd.to_datetime(df_real.index) +# df_city_pred.loc['2023-11-27'] = df_real.loc['2023-11-27'] +# +# result = pd.DataFrame(df_real).join(df_city_pred) +# result.columns = ['实际值','预测值'] +# result['偏差率'] = (result['实际值'] - result['预测值'])/result['实际值'] +# result['指标'] = (df_real.values.sum()-df_city_pred.values.sum())/df_real.values.sum() +# result['偏差率'][:27] = 0 +# print(result) +# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\区域电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: +# result.to_excel(writer,sheet_name=f'{city}') + + + + # pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期') + # df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal'] + +city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2]) +excel_file1 = pd.ExcelFile(city_volt_file) -plt.title(f'4.有色金属矿采选业') -plt.gcf().autofmt_xdate() -plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120)) +# for sheet_name in excel_file1.sheet_names[1:]: +# print(sheet_name) +# pred_volt_df = pd.read_excel(excel_file1,sheet_name=sheet_name).dropna() +# +# pred_volt_df.set_index(pred_volt_df.columns[0],inplace=True) +# real_volt_df = pd.read_excel(excel_file,sheet_name=1).set_index('pt_date') +# +# real_volt_df = real_volt_df[(real_volt_df['county_name'].isnull())&(real_volt_df['city_name'].str[4:6]==sheet_name)].drop(columns=['county_name','500kv(含330kv)以上']) +# +# result = pd.DataFrame({'实际值':list(real_volt_df.sum()[1:]), +# '预测值':list(pred_volt_df.sum()[1:]), +# '偏差':list(real_volt_df.sum()[1:] - pred_volt_df.sum()[1:])},index=real_volt_df.sum()[1:].index) +# result['指标'] = result['偏差']/real_volt_df.sum()[1:] +# +# +# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: +# result.to_excel(wirter,sheet_name=f'{sheet_name}') + +industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4])) +# for sheet_name in industry_file.sheet_names[1:]: +# +# pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True) +# pred_industry_df[pred_industry_df.columns[0]] = pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d') +# pred_industry_df.set_index(pred_industry_df.columns[0],inplace=True) +# +# real_industry_df = pd.read_excel(excel_file,sheet_name=2).set_index('stat_date') +# real_industry_df['city_name'] = real_industry_df['city_name'].str[4:6] +# real_industry_df = real_industry_df[real_industry_df['city_name']==sheet_name[:2]].drop(columns=['city_name']).iloc[:30] +# print(sheet_name[:2]) +# print(pd.DataFrame(real_industry_df.sum(),columns=['真实值'])) +# +# +# result = pd.DataFrame(real_industry_df.sum(),columns=['真实值']).join(pd.DataFrame(pred_industry_df.sum(),columns=['预测值'])) +# print(result) +# result['偏差'] = result['真实值'] - result['预测值'] +# result['指标'] = result['偏差']/result['真实值'] +# +# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: +# result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}') -plt.xticks(rotation=45) -plt.xlabel('时间') -plt.ylabel('数值') -plt.show() \ No newline at end of file +e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx' +df1 = pd.read_excel(e1,sheet_name=1) +df1.set_index(df1.columns[0],inplace=True) +for sheet_name in industry_file.sheet_names[2:]: + df2 = pd.read_excel(e1,sheet_name=sheet_name) + df2 = df2.set_index(df2.columns[0]) + df1 += df2 +df1['偏差'] = df1['真实值']-df1['预测值'] +df1['偏差率'] = df1['偏差']/df1['真实值'] +df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') +# writer = pd.ExcelWriter(e1,engine='openpyxl') +# df1.to_excel(writer,sheet_name=0) +print(df1) \ No newline at end of file diff --git a/浙江电压等级电量/prophet_分压电量_10kv.py b/浙江电压等级电量/prophet_分压电量_10kv.py new file mode 100644 index 0000000..7106f35 --- /dev/null +++ b/浙江电压等级电量/prophet_分压电量_10kv.py @@ -0,0 +1,61 @@ +from prophet import Prophet +import pandas as pd +import os +import numpy as np + + +def normal(data): + high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) + low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) + return (data<=high)&(data>=low) + + +file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据' + +for city in os.listdir(file_dir): + df_city = pd.read_excel(os.path.join(file_dir, city)) + # df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10]) + df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) + list_goal = [] + list_industry = [] + result_dict = {} + for level in df_city.columns[2:]: + s1 = df_city[['stat_date', level]] + + ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date') + + ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True) + + df_train = ds_train.copy() + df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') + + + model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) + model.add_country_holidays(country_name="CN") + model.fit(df_train) + future = model.make_future_dataframe(periods=3, freq='D') + + predict = model.predict(future) + print(city[1:3],level) + + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'}) + ds_train.rename(columns={'y':'售电量'},inplace=True) + + result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:])) + result_dict[level] = list(result['售电量']) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3]) + + # df = predict.join(s1.set_index('ds')).loc['2023-8'] + # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] + # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() + # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) + # list_industry.append(industry) + + # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) + # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') + # + # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f: + # f.write(f'{city[:2]}\n') + # df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t') diff --git a/浙江电压等级电量/prophet_分压电量_220v.py b/浙江电压等级电量/prophet_分压电量_220v.py new file mode 100644 index 0000000..7106f35 --- /dev/null +++ b/浙江电压等级电量/prophet_分压电量_220v.py @@ -0,0 +1,61 @@ +from prophet import Prophet +import pandas as pd +import os +import numpy as np + + +def normal(data): + high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) + low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) + return (data<=high)&(data>=low) + + +file_dir = 'C:\python-project\p1031\浙江电压等级电量\浙江各地市分电压日电量数据' + +for city in os.listdir(file_dir): + df_city = pd.read_excel(os.path.join(file_dir, city)) + # df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10]) + df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) + list_goal = [] + list_industry = [] + result_dict = {} + for level in df_city.columns[2:]: + s1 = df_city[['stat_date', level]] + + ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date') + + ds_train.rename(columns={'stat_date': 'ds', level: 'y'}, inplace=True) + + df_train = ds_train.copy() + df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') + + + model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) + model.add_country_holidays(country_name="CN") + model.fit(df_train) + future = model.make_future_dataframe(periods=3, freq='D') + + predict = model.predict(future) + print(city[1:3],level) + + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'}) + ds_train.rename(columns={'y':'售电量'},inplace=True) + + result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:])) + result_dict[level] = list(result['售电量']) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\分压电量预测v1213.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[1:3]) + + # df = predict.join(s1.set_index('ds')).loc['2023-8'] + # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] + # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum() + # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()) + # list_industry.append(industry) + + # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal}) + # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk') + # + # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f: + # f.write(f'{city[:2]}\n') + # df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t') diff --git a/浙江电压等级电量/分压_移动平均.py b/浙江电压等级电量/分压_移动平均.py new file mode 100644 index 0000000..b788cb1 --- /dev/null +++ b/浙江电压等级电量/分压_移动平均.py @@ -0,0 +1,47 @@ +import pandas as pd +df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=1) +df['pt_date'] = pd.to_datetime(df['pt_date']) + +# 移动平均 + +dict_big = {} +dict_ok = {} + +# for city in df['city_name'].drop_duplicates(): +# +# df_city1 = df[(df['city_name'] == city) & (df['county_name'].isnull())].set_index('pt_date').loc['2023-11'] +# resut_df = pd.DataFrame({}) +# index_level = [] +# tq_list = [] +# pred_list = [] +# loss_list = [] +# rate_list = [] +# for level in df_city1.columns[2:]: +# +# index_level.append(level) +# +# df_moving_avg = pd.DataFrame(df_city1[:-3][level], index=df_city1[:-3].index) +# future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D') +# +# for date in future: +# df_moving_avg.loc[date, level] = df_moving_avg[-3:].mean().values +# loss = (df_city1[level].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[level].sum() +# tq_list.append(df_city1[level].sum()) +# pred_list.append(df_moving_avg[level].sum()) +# loss_list.append(df_city1[level].sum()-df_moving_avg[level].sum()) +# rate_list.append((df_city1[level].sum()-df_moving_avg[level].sum())/df_city1[level].sum()) +# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_level) +# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: +# resut_df.to_excel(writer,sheet_name=f'{city[4:6]}') + +excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\11月移动平均分压.xlsx') +df1 = pd.read_excel(excel_file,sheet_name=1) +df1.set_index(df1.columns[0],inplace=True) +for sheet in excel_file.sheet_names[2:]: + df = pd.read_excel(excel_file,sheet_name=sheet) + df.set_index(df.columns[0],inplace=True) + df1 += df +df1['偏差'] = df1['同期电量']-df1['预测电量'] +df1['偏差率'] = df1['偏差']/df1['同期电量'] +df1.to_excel('移动平均_11月分压汇总.xlsx') +print(df1) \ No newline at end of file diff --git a/浙江电压等级电量/电压等级_输入10_输出3.py b/浙江电压等级电量/电压等级_输入10_输出3.py index ac07c67..ec9d937 100644 --- a/浙江电压等级电量/电压等级_输入10_输出3.py +++ b/浙江电压等级电量/电压等级_输入10_输出3.py @@ -87,7 +87,7 @@ def data_preprocessing(data): # # 训练 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -# + # # 标准化到0~1 # max_value = np.max(dataset_x) # min_value = np.min(dataset_x) diff --git a/浙江行业电量/prophet_行业电量.py b/浙江行业电量/prophet_行业电量.py index 6a35327..965a9d2 100644 --- a/浙江行业电量/prophet_行业电量.py +++ b/浙江行业电量/prophet_行业电量.py @@ -12,6 +12,7 @@ def normal(data): file_dir = './浙江各地市行业电量数据' +# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') for city in os.listdir(file_dir): df_city = pd.read_excel(os.path.join(file_dir, city)) df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10]) @@ -19,13 +20,16 @@ for city in os.listdir(file_dir): list_goal = [] list_industry = [] result_dict = {} - for industry in df_city.columns[2:]: + for industry in df_city.columns[3:]: s1 = df_city[['stat_date', industry]] - df_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')].sort_values(by='stat_date') - df_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) + ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date') + + ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) + + df_train = ds_train.copy() df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') - # df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') + model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model.add_country_holidays(country_name="CN") @@ -33,16 +37,16 @@ for city in os.listdir(file_dir): future = model.make_future_dataframe(periods=3, freq='D') predict = model.predict(future) - print(city,industry) + print(city[:2],industry) - predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-10'].rename(columns={'yhat':'售电量'}) - df_train.rename(columns={'y':'售电量'},inplace=True) - result = pd.concat((df_train.set_index('ds').loc['2023-10'][:28],predict[-3:])) - result_dict[industry] = list(result['售电量']) + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'}) + ds_train.rename(columns={'y':'售电量'},inplace=True) + result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:])) + result_dict[industry] = list(result['售电量']) with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: - pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-10-01', end=f'2023-10-31', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city) + pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2]) # df = predict.join(s1.set_index('ds')).loc['2023-8'] # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] diff --git a/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx b/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx index 8fdb402..da2a915 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx and b/浙江行业电量/浙江各地市行业电量数据/丽水.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/台州.xlsx b/浙江行业电量/浙江各地市行业电量数据/台州.xlsx index 754086c..9299df9 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/台州.xlsx and b/浙江行业电量/浙江各地市行业电量数据/台州.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx b/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx index b52bc0c..3994e0b 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx and b/浙江行业电量/浙江各地市行业电量数据/嘉兴.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx b/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx index e7b5e4e..6d5325d 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx and b/浙江行业电量/浙江各地市行业电量数据/宁波.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx b/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx index 583f974..4fec5ae 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx and b/浙江行业电量/浙江各地市行业电量数据/杭州.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/温州.xlsx b/浙江行业电量/浙江各地市行业电量数据/温州.xlsx index 4ba3820..293baea 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/温州.xlsx and b/浙江行业电量/浙江各地市行业电量数据/温州.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx b/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx index de4d93d..7e52b24 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx and b/浙江行业电量/浙江各地市行业电量数据/湖州.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx b/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx index a79ba45..582830e 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx and b/浙江行业电量/浙江各地市行业电量数据/绍兴.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx b/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx index cdee0d7..db4a1e9 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx and b/浙江行业电量/浙江各地市行业电量数据/舟山.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx b/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx index 965c8e5..d807cc2 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx and b/浙江行业电量/浙江各地市行业电量数据/衢州.xlsx differ diff --git a/浙江行业电量/浙江各地市行业电量数据/金华.xlsx b/浙江行业电量/浙江各地市行业电量数据/金华.xlsx index 6f081ae..1a4cbc3 100644 Binary files a/浙江行业电量/浙江各地市行业电量数据/金华.xlsx and b/浙江行业电量/浙江各地市行业电量数据/金华.xlsx differ diff --git a/浙江行业电量/移动平均.py b/浙江行业电量/移动平均.py new file mode 100644 index 0000000..3594da5 --- /dev/null +++ b/浙江行业电量/移动平均.py @@ -0,0 +1,42 @@ +import pandas as pd + +df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx', sheet_name=2) +df['stat_date'] = pd.to_datetime(df['stat_date']) + + +# 移动平均 +city = df['city_name'].iloc[0] +print(city) +df_city1 = df[df['city_name'] == city].set_index('stat_date').loc['2023-11'] + +dict_big = {} +dict_ok = {} +resut_df = pd.DataFrame({}) +index_industry = [] +tq_list = [] +pred_list = [] +loss_list = [] +rate_list = [] +for industry in df_city1.columns[1:]: + index_industry.append(industry) + + df_moving_avg = pd.DataFrame(df_city1[:-3][industry], index=df_city1[:-3].index) + future = pd.date_range(start=df_city1.index[-3], periods=3, freq='D') + + for date in future: + df_moving_avg.loc[date, industry] = df_moving_avg[-3:].mean().values + loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum() + tq_list.append(df_city1[industry].sum()) + pred_list.append(df_moving_avg[industry].sum()) + loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum()) + rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum()) +resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry) +print(resut_df) +resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx') + +# if loss.values >= 0.005: +# dict_big[industry] = loss.values[0] +# else: +# dict_ok[industry] = loss.values[0] +# print(len(dict_ok)) +# print(len(dict_big)) \ No newline at end of file