From 198bd291741854427c1ef5b49bba44fd03ef6bd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=B8=BD=E5=AD=90?= <2316994765@qq.com>
Date: Wed, 24 Jan 2024 17:31:38 +0800
Subject: [PATCH] =?UTF-8?q?=E8=BE=93=E5=87=BA=E9=A2=84=E6=B5=8B=E7=BB=93?=
 =?UTF-8?q?=E6=9E=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 北京安徽/北京_分压_移动平均.py    |  59 ++++++++
 北京安徽/北京_行业_移动平均.py    |  59 ++++++++
 北京安徽/北京区域电量.py            |  93 ++++++++++++
 .../北京行业电量_时间序列.py        |  74 ++++++++++
 北京安徽/安徽_分压_移动平均.py    |  62 ++++++++
 北京安徽/安徽_行业_移动平均.py    |  59 ++++++++
 北京安徽/安徽区域电量.py            |  90 ++++++++++++
 .../安徽行业电量_时间序列.py        |  74 ++++++++++
 区域电量19年至今数据.py              |   6 +-
 各地级市日电量模型/test01.py         |   0
 .../追加10月数据.py                      |   4 +-
 文档处理.py                               | 137 ++++++++++++++----
 ...电量_10kv.py => prophet_分压电量.py} |   0
 浙江行业电量/prophet_行业电量.py    |  42 +++---
 浙江行业电量/test1.py                   |   1 +
 .../行业电量_输出为3_步长为10.py    |   5 +-
 16 files changed, 710 insertions(+), 55 deletions(-)
 create mode 100644 北京安徽/北京_分压_移动平均.py
 create mode 100644 北京安徽/北京_行业_移动平均.py
 create mode 100644 北京安徽/北京区域电量.py
 create mode 100644 北京安徽/北京行业电量_时间序列.py
 create mode 100644 北京安徽/安徽_分压_移动平均.py
 create mode 100644 北京安徽/安徽_行业_移动平均.py
 create mode 100644 北京安徽/安徽区域电量.py
 create mode 100644 北京安徽/安徽行业电量_时间序列.py
 delete mode 100644 各地级市日电量模型/test01.py
 rename 浙江电压等级电量/{prophet_分压电量_10kv.py => prophet_分压电量.py} (100%)

diff --git a/北京安徽/北京_分压_移动平均.py b/北京安徽/北京_分压_移动平均.py
new file mode 100644
index 0000000..c25d9ae
--- /dev/null
+++ b/北京安徽/北京_分压_移动平均.py
@@ -0,0 +1,59 @@
+import pandas as pd
+pd.set_option('display.width',None)
+df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=0)
+df['pt_date'] = pd.to_datetime(df['pt_date'])
+
+# 移动平均
+for city in df['city_name'].drop_duplicates():
+
+    df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
+
+    dict_big = {}
+    dict_ok = {}
+    resut_df = pd.DataFrame({})
+    index_industry = []
+    tq_list = []
+    pred_list = []
+    loss_list = []
+    rate_list = []
+
+    for industry in df_city.columns[2:]:
+
+        df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
+
+
+        future = pd.date_range(start='2023-12-29', periods=3, freq='D')
+
+        for date in future:
+            df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
+
+        resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
+        "result_df为明细数据"
+    print(city[-6:])
+
+    final_df = resut_df.sum()
+    final_df = pd.DataFrame(final_df,columns=['预测值'])
+    final_df['真实值'] = df_city[df_city.columns[2:]].sum()
+    final_df['偏差'] = final_df['真实值'] - final_df['预测值']
+    final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
+    print(final_df)
+        # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
+        # tq_list.append(df_city1[industry].sum())
+        # pred_list.append(df_moving_avg[industry].sum())
+        # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
+        # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京分压_12月.xlsx', mode='a', if_sheet_exists='replace',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
+
+# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
+# print(resut_df)
+# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
+
+#     if loss.values >= 0.005:
+#         dict_big[industry] = loss.values[0]
+#     else:
+#         dict_ok[industry] = loss.values[0]
+# print(len(dict_ok))
+# print(len(dict_big))
diff --git a/北京安徽/北京_行业_移动平均.py b/北京安徽/北京_行业_移动平均.py
new file mode 100644
index 0000000..ae33433
--- /dev/null
+++ b/北京安徽/北京_行业_移动平均.py
@@ -0,0 +1,59 @@
+import pandas as pd
+pd.set_option('display.width',None)
+df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=0)
+df['stat_date'] = pd.to_datetime(df['stat_date'])
+
+# 移动平均
+for city in df['city_name'].drop_duplicates():
+
+    df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
+
+    dict_big = {}
+    dict_ok = {}
+    resut_df = pd.DataFrame({})
+    index_industry = []
+    tq_list = []
+    pred_list = []
+    loss_list = []
+    rate_list = []
+
+    for industry in df_city.columns[2:]:
+
+        df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
+
+
+        future = pd.date_range(start='2023-12-29', periods=3, freq='D')
+
+        for date in future:
+            df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
+
+        resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
+        "result_df为明细数据"
+    print(city[-6:])
+
+    final_df = resut_df.sum()
+    final_df = pd.DataFrame(final_df,columns=['预测值'])
+    final_df['真实值'] = df_city[df_city.columns[2:]].sum()
+    final_df['偏差'] = final_df['真实值'] - final_df['预测值']
+    final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
+    print(final_df)
+        # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
+        # tq_list.append(df_city1[industry].sum())
+        # pred_list.append(df_moving_avg[industry].sum())
+        # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
+        # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京行业_12月.xlsx', mode='a', if_sheet_exists='replace',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
+
+# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
+# print(resut_df)
+# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
+
+#     if loss.values >= 0.005:
+#         dict_big[industry] = loss.values[0]
+#     else:
+#         dict_ok[industry] = loss.values[0]
+# print(len(dict_ok))
+# print(len(dict_big))
diff --git a/北京安徽/北京区域电量.py b/北京安徽/北京区域电量.py
new file mode 100644
index 0000000..beb5e28
--- /dev/null
+++ b/北京安徽/北京区域电量.py
@@ -0,0 +1,93 @@
+import pandas as pd
+import datetime
+import math
+import chinese_calendar as cc
+import xgboost as xgb
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score
+
+
+def holiday_work(x):
+    if cc.is_workday(x):
+        return 0
+    if cc.is_holiday(x):
+        return 1
+
+def normal(nd):
+    high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
+    low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
+    return nd[(nd < high) & (nd > low)]
+
+def jq(y, x):
+    a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
+    return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
+
+jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
+               '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
+jq_dict = {}
+for j in range(2023, 2025):
+    for i in range(24):
+        jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
+
+ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
+ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
+ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
+
+pd.set_option('display.width', None)
+df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=3)
+df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
+df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
+df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
+
+for city in df_qy_ah['city_name'].drop_duplicates():
+
+    df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
+
+    df_ah_city.drop_duplicates(inplace=True)
+
+    df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
+
+    df_ah_city.fillna(method='ffill', inplace=True)
+    df_ah_city['24ST'].fillna('冬至', inplace=True)
+    label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
+                  '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16,
+                  '立夏': 17,
+                  '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
+    df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
+
+    df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
+
+    holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
+    holiday_null_s = holiday_null_s.map(holiday_work)
+    holiday_null_s.iloc[-2:] = 3
+
+    df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
+    df_ah_city.set_index('pt_date', inplace=True)
+    df_ = df_ah_city.loc['2023-12']['power_sal']
+
+    df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
+    print(city)
+    print(df_ah_city)
+
+    X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
+        df_ah_city['power_sal'].iloc[-180:-3]
+    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
+        df_ah_city['power_sal'].iloc[-3:]
+    model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
+    model.fit(x_train, y_train)
+    y_pred = model.predict(x_test)
+    result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
+
+    print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
+    print(r2_score(y_test, y_pred))
+
+    final_df = pd.DataFrame({'真实值': list(df_.values), '预测值': list(df_.values)[:-3] + list(model.predict(eval_x))},
+                            index=df_.index)
+    final_df['偏差率'] = (final_df['真实值'] - final_df['预测值']).sum() / final_df['真实值'].sum()
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
+    print(final_df)
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\北京区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer, sheet_name=f'{city}')
diff --git a/北京安徽/北京行业电量_时间序列.py b/北京安徽/北京行业电量_时间序列.py
new file mode 100644
index 0000000..45a603c
--- /dev/null
+++ b/北京安徽/北京行业电量_时间序列.py
@@ -0,0 +1,74 @@
+from prophet import Prophet
+import pandas as pd
+import os
+import numpy as np
+
+
+def normal(data):
+    high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    return (data <= high) & (data >= low)
+
+
+excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
+df = pd.read_excel(excel_file, sheet_name=0)
+
+for city in df['city_name'].drop_duplicates().dropna():
+    df_city = df[df['city_name'] == city]
+    df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
+    list_real = []
+    list_pred = []
+    list_industry = []
+    result_dict = {}
+
+    for industry in df_city.columns[3:]:
+        s1 = df_city[['stat_date', industry]]
+
+        ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
+
+        ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
+
+        df_train = ds_train.copy().iloc[:-3]
+        df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
+
+        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
+        model.add_country_holidays(country_name="CN")
+        model.fit(df_train)
+        future = model.make_future_dataframe(periods=3, freq='D')
+
+        predict = model.predict(future)
+        print(city[-6:], industry)
+
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
+        ds_train.rename(columns={'y': '售电量'}, inplace=True)
+
+        result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
+        result_dict[industry] = list(result['售电量'])
+        result['真实值'] = ds_train.set_index('ds').loc['2023-12']
+        result = result[['真实值','售电量']]
+        result.columns = ['真实值','预测值']
+
+        list_industry.append(industry)
+        list_real.append(result['真实值'].sum())
+        list_pred.append(result['预测值'].sum())
+
+    final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
+    final_df['偏差'] = final_df['真实值']-final_df['预测值']
+    final_df['偏差率'] = final_df['偏差']/final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_北京行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+        final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
+
+    # df = predict.join(s1.set_index('ds')).loc['2023-8']
+    # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
+    # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
+    # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
+    # list_industry.append(industry)
+
+    # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
+    # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
+
+    # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
+    #     f.write(f'{city[:2]}\n')
+    #     df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
diff --git a/北京安徽/安徽_分压_移动平均.py b/北京安徽/安徽_分压_移动平均.py
new file mode 100644
index 0000000..bf47872
--- /dev/null
+++ b/北京安徽/安徽_分压_移动平均.py
@@ -0,0 +1,62 @@
+import pandas as pd
+pd.set_option('display.width',None)
+df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=1)
+df['pt_date'] = pd.to_datetime(df['pt_date'])
+
+# 移动平均
+for city in df['city_name'].drop_duplicates():
+
+    df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('pt_date').loc['2023-12'].sort_index()
+
+    dict_big = {}
+    dict_ok = {}
+    resut_df = pd.DataFrame({})
+    index_industry = []
+    tq_list = []
+    pred_list = []
+    loss_list = []
+    rate_list = []
+
+
+    for industry in df_city.columns[2:]:
+
+        df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
+
+        future = pd.date_range(start='2023-12-29', periods=3, freq='D')
+
+        for date in future:
+            df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values[0]
+
+
+        resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
+        "result_df为明细数据"
+
+    print(city[-6:])
+
+    final_df = resut_df.sum()
+    final_df = pd.DataFrame(final_df,columns=['预测值'])
+    final_df['真实值'] = df_city[df_city.columns[2:]].sum()
+    final_df['偏差'] = final_df['真实值'] - final_df['预测值']
+    final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
+    print(final_df)
+        # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
+        # tq_list.append(df_city1[industry].sum())
+        # pred_list.append(df_moving_avg[industry].sum())
+        # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
+        # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽分压_12月.xlsx', mode='a', if_sheet_exists='replace',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
+
+# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
+# print(resut_df)
+# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
+
+#     if loss.values >= 0.005:
+#         dict_big[industry] = loss.values[0]
+#     else:
+#         dict_ok[industry] = loss.values[0]
+# print(len(dict_ok))
+# print(len(dict_big))
diff --git a/北京安徽/安徽_行业_移动平均.py b/北京安徽/安徽_行业_移动平均.py
new file mode 100644
index 0000000..25c4c3b
--- /dev/null
+++ b/北京安徽/安徽_行业_移动平均.py
@@ -0,0 +1,59 @@
+import pandas as pd
+pd.set_option('display.width',None)
+df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=1)
+df['stat_date'] = pd.to_datetime(df['stat_date'])
+
+# 移动平均
+for city in df['city_name'].drop_duplicates().dropna():
+
+    df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
+
+    dict_big = {}
+    dict_ok = {}
+    resut_df = pd.DataFrame({})
+    index_industry = []
+    tq_list = []
+    pred_list = []
+    loss_list = []
+    rate_list = []
+
+    for industry in df_city.columns[2:]:
+
+        df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
+
+
+        future = pd.date_range(start='2023-12-29', periods=3, freq='D')
+
+        for date in future:
+            df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
+
+        resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
+        "result_df为明细数据"
+    print(city[-6:])
+
+    final_df = resut_df.sum()
+    final_df = pd.DataFrame(final_df,columns=['预测值'])
+    final_df['真实值'] = df_city[df_city.columns[2:]].sum()
+    final_df['偏差'] = final_df['真实值'] - final_df['预测值']
+    final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
+    print(final_df)
+        # loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
+        # tq_list.append(df_city1[industry].sum())
+        # pred_list.append(df_moving_avg[industry].sum())
+        # loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
+        # rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_安徽行业_12月.xlsx', mode='a', if_sheet_exists='replace',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
+
+# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
+# print(resut_df)
+# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
+
+#     if loss.values >= 0.005:
+#         dict_big[industry] = loss.values[0]
+#     else:
+#         dict_ok[industry] = loss.values[0]
+# print(len(dict_ok))
+# print(len(dict_big))
diff --git a/北京安徽/安徽区域电量.py b/北京安徽/安徽区域电量.py
new file mode 100644
index 0000000..cf969e1
--- /dev/null
+++ b/北京安徽/安徽区域电量.py
@@ -0,0 +1,90 @@
+import pandas as pd
+import datetime
+import math
+import chinese_calendar as cc
+import xgboost as xgb
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score
+
+
+def holiday_work(x):
+    if cc.is_workday(x):
+        return 0
+    if cc.is_holiday(x):
+        return 1
+def jq(y, x):
+    a = 365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
+    return datetime.date(1899, 12, 31) + datetime.timedelta(days=int(a))
+
+def normal(nd):
+    high = nd.describe()['75%'] + 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
+    low = nd.describe()['25%'] - 1.5 * (nd.describe()['75%'] - nd.describe()['25%'])
+    return nd[(nd < high) & (nd > low)]
+
+jq_list = ['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑',
+               '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪', '冬至']
+label_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8,
+              '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17,
+              '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
+jq_dict = {}
+for j in range(2023, 2025):
+    for i in range(24):
+        jq_dict[jq(j, i).strftime('%Y-%m-%d')] = jq_list[i]
+
+pd.set_option('display.width', None)
+df_qy_ah = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽分压区域.xlsx', sheet_name=2)
+df_qy_ah = df_qy_ah[df_qy_ah['county_name'].isnull()]
+df_qy_ah['pt_date'] = pd.to_datetime(df_qy_ah['pt_date'])
+df_qy_ah = df_qy_ah[df_qy_ah['pt_date'] <= '2023-12-31']
+for city in df_qy_ah['city_name'].drop_duplicates():
+
+    df_ah_city = df_qy_ah[df_qy_ah['city_name'] == city]
+
+    df_ah_city.drop_duplicates(inplace=True)
+
+    df_ah_city['24ST'] = df_qy_ah['pt_date'].astype('string').map(jq_dict)
+    df_ah_city.fillna(method='ffill', inplace=True)
+    df_ah_city['24ST'].fillna('冬至', inplace=True)
+    df_ah_city['24ST'] = df_ah_city['24ST'].map(label_dict)
+
+    ys_df = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
+    ys_df['dtdate'] = pd.to_datetime(ys_df['dtdate'])
+    ys_dict = dict(zip([x for x in ys_df['dtdate']], [y for y in ys_df['holiday']]))
+
+    df_ah_city['holiday'] = df_qy_ah['pt_date'].map(ys_dict)
+
+    holiday_null_s = df_ah_city[df_ah_city['holiday'].isnull()]['pt_date']
+    holiday_null_s = holiday_null_s.map(holiday_work)
+    holiday_null_s.iloc[-2:] = 3
+
+    df_ah_city['holiday'].fillna(holiday_null_s.to_dict(), inplace=True)
+    df_ah_city.set_index('pt_date', inplace=True)
+    # df_12月真实电量数据
+    df_ = df_ah_city.loc['2023-12']['power_sal']
+
+    # 去除异常值
+    df_ah_city = df_ah_city.loc[normal(df_ah_city['power_sal']).index]
+    print(city)
+    print(df_ah_city)
+
+    X, y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-180:-3], \
+        df_ah_city['power_sal'].iloc[-180:-3]
+    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    eval_x, eval_y = df_ah_city.drop(columns=['city_name', 'county_name', 'power_sal']).iloc[-3:], \
+        df_ah_city['power_sal'].iloc[-3:]
+    model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
+    model.fit(x_train, y_train)
+    y_pred = model.predict(x_test)
+    result_test = pd.DataFrame({'test': y_test, 'pred': y_pred}, index=y_test.index)
+
+    print((abs(result_test['pred'] - result_test['test']) / result_test['test']).mean())
+    print(r2_score(y_test, y_pred))
+
+    final_df = pd.DataFrame({'真实值':list(df_.values),'预测值':list(df_.values)[:-3]+list(model.predict(eval_x))},index=df_.index)
+    final_df['偏差率'] = (final_df['真实值']-final_df['预测值']).sum()/final_df['真实值'].sum()
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
+    print(final_df)
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\安徽区域电量_12月.xlsx', if_sheet_exists='replace', mode='a',
+                        engine='openpyxl') as writer:
+        final_df.to_excel(writer,sheet_name=f'{city}')
diff --git a/北京安徽/安徽行业电量_时间序列.py b/北京安徽/安徽行业电量_时间序列.py
new file mode 100644
index 0000000..cf47659
--- /dev/null
+++ b/北京安徽/安徽行业电量_时间序列.py
@@ -0,0 +1,74 @@
+from prophet import Prophet
+import pandas as pd
+import os
+import numpy as np
+
+
+def normal(data):
+    high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
+    return (data <= high) & (data >= low)
+
+
+excel_file = r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx'
+df = pd.read_excel(excel_file, sheet_name=1)
+
+for city in df['city_name'].drop_duplicates().dropna():
+    df_city = df[df['city_name'] == city]
+    df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
+    list_real = []
+    list_pred = []
+    list_industry = []
+    result_dict = {}
+
+    for industry in df_city.columns[3:]:
+        s1 = df_city[['stat_date', industry]]
+
+        ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-31')].sort_values(by='stat_date')
+
+        ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
+
+        df_train = ds_train.copy().iloc[:-3]
+        df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
+
+        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
+        model.add_country_holidays(country_name="CN")
+        model.fit(df_train)
+        future = model.make_future_dataframe(periods=3, freq='D')
+
+        predict = model.predict(future)
+        print(city[-6:], industry)
+
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
+        ds_train.rename(columns={'y': '售电量'}, inplace=True)
+
+        result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:-3], predict[-3:]))
+        result_dict[industry] = list(result['售电量'])
+        result['真实值'] = ds_train.set_index('ds').loc['2023-12']
+        result = result[['真实值','售电量']]
+        result.columns = ['真实值','预测值']
+
+        list_industry.append(industry)
+        list_real.append(result['真实值'].sum())
+        list_pred.append(result['预测值'].sum())
+
+    final_df = pd.DataFrame({'真实值':list_real,'预测值':list_pred},index=list_industry)
+    final_df['偏差'] = final_df['真实值']-final_df['预测值']
+    final_df['偏差率'] = final_df['偏差']/final_df['真实值']
+    final_df['偏差率'] = final_df['偏差率'].apply(lambda x: "{:.5%}".format(x))
+
+    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\时间序列算法_安徽行业_12月.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+        final_df.to_excel(writer,sheet_name=f'{city[-6:]}')
+
+    # df = predict.join(s1.set_index('ds')).loc['2023-8']
+    # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
+    # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
+    # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
+    # list_industry.append(industry)
+
+    # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
+    # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
+
+    # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
+    #     f.write(f'{city[:2]}\n')
+    #     df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')
diff --git a/区域电量19年至今数据.py b/区域电量19年至今数据.py
index bed20b7..7c5b4b3 100644
--- a/区域电量19年至今数据.py
+++ b/区域电量19年至今数据.py
@@ -32,12 +32,12 @@ def normal(nd):
 # df = df[['dtdate','tem_max','tem_min']]
 # # print(df.head())
 # # print(df_elec.head())
-#
+
 # merge_df = pd.merge(df_elec,df,left_on='pt_date',right_on='dtdate')[['pt_date','tem_max','tem_min','售电量']]
 # merge_df.set_index('pt_date',inplace=True)
 # merge_df.index = pd.to_datetime(merge_df.index,format='%Y%m%d')
-#
-#
+
+
 # merge_df['month'] = merge_df.index.strftime('%Y-%m-%d').str[5:7]
 # merge_df['month'] = merge_df['month'].astype('int')
 # merge_df.to_csv('杭州入模数据.csv',encoding='gbk')
diff --git a/各地级市日电量模型/test01.py b/各地级市日电量模型/test01.py
deleted file mode 100644
index e69de29..0000000
diff --git a/各地级市日电量模型/追加10月数据.py b/各地级市日电量模型/追加10月数据.py
index 9b5ee2a..10ff61d 100644
--- a/各地级市日电量模型/追加10月数据.py
+++ b/各地级市日电量模型/追加10月数据.py
@@ -9,15 +9,17 @@ df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y%m%d').astype('str')
 df['city_name'] = df['city_name'].map(lambda x:x.strip())
 df['city_name'] = df['city_name'].str[:-1]
 df['dtdate'] = df['dtdate'].map(lambda x:x.strip())
+# 判断工作日
 def holiday_work(x):
     if cc.is_workday(x):
         return 0
     if cc.is_holiday(x):
         return 1
+# 判断节气
 def jq(y,x):
     a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
     return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
-# print(jq(2023,1))
+print(jq(2023,1))
 jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
 jq_dict={}
 for j in range(2023,2024):
diff --git a/文档处理.py b/文档处理.py
index 6395243..befe42e 100644
--- a/文档处理.py
+++ b/文档处理.py
@@ -63,6 +63,7 @@
 import os
 from openpyxl import Workbook
 import pandas as pd
+
 # df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1)
 # print(df.head())
 # print(df[df.columns[2:]].groupby(df['city_name']).sum().T)
@@ -94,6 +95,7 @@ file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 import matplotlib.dates as mdates
+
 # date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
 # mpl.rcParams['font.sans-serif']=['kaiti']
 # print(df['4.有色金属矿采选业'][:-1])
@@ -110,18 +112,18 @@ import matplotlib.dates as mdates
 # plt.show()
 
 
-excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
-df_city_real = pd.read_excel(excel_file,sheet_name=0)
-df_city_real = df_city_real[df_city_real['county_name'].isnull()]
-df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
-# print(df_city_real)
-
-file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
-print(os.listdir(file_dir))
+# excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
+# df_city_real = pd.read_excel(excel_file,sheet_name=0)
+# df_city_real = df_city_real[df_city_real['county_name'].isnull()]
+# df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
+# # print(df_city_real)
+#
+# file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
+# print(os.listdir(file_dir))
 
 # 区域明细及偏差率统计
 
-city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2]))
+# city_area_file = pd.ExcelFile(os.path.join(file_dir, os.listdir(file_dir)[2]))
 # for city in df_city_real['city_name'].drop_duplicates():
 #     df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期')
 #     df_city_pred.index = pd.to_datetime(df_city_pred.index)
@@ -139,12 +141,11 @@ city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2]))
 #         result.to_excel(writer,sheet_name=f'{city}')
 
 
+# pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
+# df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
 
-    # pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
-    # df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
-
-city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
-excel_file1 = pd.ExcelFile(city_volt_file)
+# city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
+# excel_file1 = pd.ExcelFile(city_volt_file)
 
 # for sheet_name in excel_file1.sheet_names[1:]:
 #     print(sheet_name)
@@ -164,7 +165,7 @@ excel_file1 = pd.ExcelFile(city_volt_file)
 #     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
 #         result.to_excel(wirter,sheet_name=f'{sheet_name}')
 
-industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
+# industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
 # for sheet_name in industry_file.sheet_names[1:]:
 #
 #     pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True)
@@ -186,16 +187,96 @@ industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
 #     with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
 #         result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}')
 
-e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
-df1 = pd.read_excel(e1,sheet_name=1)
-df1.set_index(df1.columns[0],inplace=True)
-for sheet_name in industry_file.sheet_names[2:]:
-    df2 = pd.read_excel(e1,sheet_name=sheet_name)
-    df2 = df2.set_index(df2.columns[0])
-    df1 += df2
-df1['偏差'] = df1['真实值']-df1['预测值']
-df1['偏差率'] = df1['偏差']/df1['真实值']
-df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
-# writer = pd.ExcelWriter(e1,engine='openpyxl')
-# df1.to_excel(writer,sheet_name=0)
-print(df1)
\ No newline at end of file
+# e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
+# df1 = pd.read_excel(e1,sheet_name=1)
+# df1.set_index(df1.columns[0],inplace=True)
+# for sheet_name in industry_file.sheet_names[2:]:
+#     df2 = pd.read_excel(e1,sheet_name=sheet_name)
+#     df2 = df2.set_index(df2.columns[0])
+#     df1 += df2
+# df1['偏差'] = df1['真实值']-df1['预测值']
+# df1['偏差率'] = df1['偏差']/df1['真实值']
+# df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
+# # writer = pd.ExcelWriter(e1,engine='openpyxl')
+# # df1.to_excel(writer,sheet_name=0)
+# print(df1)
+import numpy as np
+
+pd.set_option('display.width', None)
+
+# 同期发行电量差别统计
+df_fx = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江发行202311-202312v2.xlsx')
+df_tq = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江分区202311-202312.xlsx')
+
+# 市级别
+df_tq_city = df_tq[df_tq['county_name'].isnull()]
+
+df_tq_city['pt_date'] = pd.to_datetime(df_tq_city['pt_date'])
+df_tq_city = df_tq_city[df_tq_city['pt_date'].astype('string').str[:7]=='2023-11']
+
+# print(df_tq_city[df_tq_city['city_name']==df_tq_city['city_name'].iloc[0]].set_index('pt_date')['power_sal'].resample('M').sum())
+
+# 同期按月汇总
+df_tq_city = pd.DataFrame(df_tq_city['power_sal'].groupby(df_tq_city['city_name']).sum() * 10000)
+
+df_fx_city = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].isnull())
+                   & (df_fx['city_name'].notnull())].drop(columns='coountry_name').set_index('city_name')
+
+
+df_city = df_fx_city.join(df_tq_city)
+
+df_city = df_city.drop(columns='province_name')
+df_city['bias'] = (df_city['power_pub'] - df_city['power_sal']) / df_city['power_pub']
+
+df_city = df_city.iloc[np.argsort(abs(df_city['bias']))]
+# df_city.to_excel('市区域发行同期偏差.xlsx')
+
+print('------------------------------------------------------------------------')
+
+# 区县偏差
+df_fx_county = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].notnull())
+                     & (df_fx['city_name'].notnull())].drop(columns=['province_name']).set_index('coountry_name')
+# print(df_fx_county.reset_index().sort_values('coountry_name').drop_duplicates())
+
+df_tq_county = df_tq[(df_tq['county_name'].notnull())&(df_tq['pt_date'].astype('string').str[:7]=='2023-11')]
+
+df_tq_county = pd.DataFrame(df_tq_county['power_sal'].groupby(df_tq_county['county_name']).sum()* 10000)
+
+print(df_tq_county.sort_index())
+df_county = df_fx_county.join(df_tq_county).sort_index()
+
+# print(df_county.reset_index().drop_duplicates())
+
+df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
+# df_county = df_county.iloc[np.argsort(abs(df_county['bias']))]
+
+# print(df_county.reset_index().drop_duplicates())
+df_county.reset_index(inplace=True)
+df_county = df_county[['date_pub','city_name','coountry_name','power_pub','power_sal','bias']]
+
+
+
+zjbs_ = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江变损202311-202312.xlsx')
+zjbs = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].notnull())][['county_name','region_power']]
+
+
+
+df_county = pd.merge(df_county,zjbs,left_on='coountry_name',right_on='county_name',how='left')
+df_county.fillna(0,inplace=True)
+
+
+df_county['power_sal'] += df_county['region_power']
+df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
+df_county['_'] = abs(df_county['bias'])
+
+df_county.sort_values(['city_name','_']).drop(columns=['region_power','county_name','_']).to_excel('区县发行同期偏差.xlsx',index=False)
+
+zjbs_qx = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].isnull())][['city_name','region_power']].set_index('city_name')
+print(zjbs_qx)
+print(df_city)
+df_city = df_city.join(zjbs_qx)
+df_city['power_sal'] += df_city['region_power']
+df_city['bias'] = (df_city['power_pub'] - df_city['power_sal'])/df_city['power_pub']
+
+print(df_city.drop(columns='region_power'))
+df_city.drop(columns='region_power').to_excel('市区域发行同期偏差.xlsx')
\ No newline at end of file
diff --git a/浙江电压等级电量/prophet_分压电量_10kv.py b/浙江电压等级电量/prophet_分压电量.py
similarity index 100%
rename from 浙江电压等级电量/prophet_分压电量_10kv.py
rename to 浙江电压等级电量/prophet_分压电量.py
diff --git a/浙江行业电量/prophet_行业电量.py b/浙江行业电量/prophet_行业电量.py
index 965a9d2..31fa80d 100644
--- a/浙江行业电量/prophet_行业电量.py
+++ b/浙江行业电量/prophet_行业电量.py
@@ -1,58 +1,60 @@
 from prophet import Prophet
 import pandas as pd
 import os
-import datetime
 import numpy as np
 
 
 def normal(data):
     high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
     low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
-    return (data<=high)&(data>=low)
+    return (data <= high) & (data >= low)
 
 
-file_dir = './浙江各地市行业电量数据'
-# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
-for city in os.listdir(file_dir):
-    df_city = pd.read_excel(os.path.join(file_dir, city))
-    df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
+excel_file = r'C:\Users\鸽子\Desktop\北京安徽行业.xlsx'
+df = pd.read_excel(excel_file, sheet_name=1)
+
+print(df.columns)
+for city in df['city_name'].drop_duplicates().dropna():
+    df_city = df[df['city_name'] == city]
     df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
     list_goal = []
     list_industry = []
     result_dict = {}
+
     for industry in df_city.columns[3:]:
         s1 = df_city[['stat_date', industry]]
 
-        ds_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-11-27')].sort_values(by='stat_date')
+        ds_train = s1[(s1['stat_date'] >= '2023-01-01') & (s1['stat_date'] <= '2023-12-28')].sort_values(by='stat_date')
+        print(ds_train)
 
         ds_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
 
         df_train = ds_train.copy()
         df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
 
-
         model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
         model.add_country_holidays(country_name="CN")
         model.fit(df_train)
         future = model.make_future_dataframe(periods=3, freq='D')
 
         predict = model.predict(future)
-        print(city[:2],industry)
+        print(city[-6:], industry)
 
-        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-11'].rename(columns={'yhat':'售电量'})
-        ds_train.rename(columns={'y':'售电量'},inplace=True)
+        predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-12'].rename(columns={'yhat': '售电量'})
+        ds_train.rename(columns={'y': '售电量'}, inplace=True)
 
-        result = pd.concat((ds_train.set_index('ds').loc['2023-11'][:27],predict[-3:]))
+        result = pd.concat((ds_train.set_index('ds').loc['2023-12'][:28], predict[-3:]))
         result_dict[industry] = list(result['售电量'])
+        print(result)
 
-    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
-        pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])
+    # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
+    #     pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city[:2])
 
-        # df = predict.join(s1.set_index('ds')).loc['2023-8']
-        # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
-        # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
-        # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
-        # list_industry.append(industry)
+    # df = predict.join(s1.set_index('ds')).loc['2023-8']
+    # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
+    # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
+    # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
+    # list_industry.append(industry)
 
     # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
     # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
diff --git a/浙江行业电量/test1.py b/浙江行业电量/test1.py
index 96023a3..04b0307 100644
--- a/浙江行业电量/test1.py
+++ b/浙江行业电量/test1.py
@@ -112,3 +112,4 @@ print(df)
 
 print("\n根据条件替换后的数据:")
 print(df_new)
+
diff --git a/浙江行业电量/行业电量_输出为3_步长为10.py b/浙江行业电量/行业电量_输出为3_步长为10.py
index 53ec20b..eed256e 100644
--- a/浙江行业电量/行业电量_输出为3_步长为10.py
+++ b/浙江行业电量/行业电量_输出为3_步长为10.py
@@ -26,7 +26,6 @@ class LSTM_Regression(nn.Module):
         x = x.view(s, b, -1)  # 把形状改回来
         return x
 
-
 def create_dataset(data, days_for_train=5) -> (np.array, np.array):
     dataset_x, dataset_y = [], []
     for i in range(len(data) - days_for_train-3):
@@ -190,8 +189,8 @@ for city in df['city_name'].drop_duplicates():
     print(df1)
 
 
-    with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
-        df1.to_excel(writer,sheet_name=f'{city[4:6]}')
+    # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
+    #     df1.to_excel(writer,sheet_name=f'{city[4:6]}')
 print(time.time()-t1)
 print(result_dict)