From e5ecbf7bb8cbaef9c211eb80a9d43a75d068b7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B8=BD=E5=AD=90?= <2316994765@qq.com> Date: Thu, 30 Nov 2023 18:06:48 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BE=93=E5=87=BA=E9=A2=84=E6=B5=8B=E7=BB=93?= =?UTF-8?q?=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 文档处理.py | 52 +++++++++++++------ 浙江行业电量/prophet_行业电量.py | 23 +++++--- .../行业电量_输出为3_步长为10.py | 2 + 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/文档处理.py b/文档处理.py index 462b18f..24df157 100644 --- a/文档处理.py +++ b/文档处理.py @@ -70,21 +70,41 @@ import pandas as pd # df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' -for file in os.listdir(file_dir): - city = file[:-5] - wb = Workbook() - wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx') +# for file in os.listdir(file_dir): +# city = file[:-5] +# wb = Workbook() +# wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx') +# +# for file in os.listdir(file_dir): +# city = file[:-5] +# excel_file = pd.ExcelFile(os.path.join(file_dir,file)) +# sheet_names = excel_file.sheet_names[1:] +# for sheet in sheet_names: +# df = excel_file.parse(sheet) +# df_result = df[df.columns[1:]].sum() +# df_result = pd.DataFrame(df_result) +# df_result.columns = ['售电量'] +# +# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl', +# if_sheet_exists='replace') as writer: +# df_result.to_excel(writer, sheet_name=f'{sheet}') + +df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date') +print(df.columns) +import matplotlib.pyplot as plt +import matplotlib as mpl +import matplotlib.dates as mdates +date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D') +mpl.rcParams['font.sans-serif']=['kaiti'] +print(df['4.有色金属矿采选业'][:-1]) +plt.figure(figsize=(10, 6)) +plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1]) -for file in os.listdir(file_dir): - city = file[:-5] - excel_file = pd.ExcelFile(os.path.join(file_dir,file)) - sheet_names = excel_file.sheet_names[1:] - for sheet in sheet_names: - df = excel_file.parse(sheet) - df_result = df[df.columns[1:]].sum() - df_result = pd.DataFrame(df_result) - df_result.columns = ['售电量'] +plt.title(f'4.有色金属矿采选业') +plt.gcf().autofmt_xdate() +plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120)) - with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl', - if_sheet_exists='replace') as writer: - df_result.to_excel(writer, sheet_name=f'{sheet}') +plt.xticks(rotation=45) +plt.xlabel('时间') +plt.ylabel('数值') +plt.show() \ No newline at end of file diff --git a/浙江行业电量/prophet_行业电量.py b/浙江行业电量/prophet_行业电量.py index 1ff4ac2..6a35327 100644 --- a/浙江行业电量/prophet_行业电量.py +++ b/浙江行业电量/prophet_行业电量.py @@ -8,7 +8,7 @@ import numpy as np def normal(data): high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) - return data[(data<=high)&(data>=low)] + return (data<=high)&(data>=low) file_dir = './浙江各地市行业电量数据' @@ -18,14 +18,14 @@ for city in os.listdir(file_dir): df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) list_goal = [] list_industry = [] + result_dict = {} for industry in df_city.columns[2:]: s1 = df_city[['stat_date', industry]] - s1 = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')] - s1 = s1.loc[normal(s1[industry]).index] - s1.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) + df_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')].sort_values(by='stat_date') + df_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True) - df_train = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') - df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') + df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill') + # df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model.add_country_holidays(country_name="CN") @@ -33,9 +33,16 @@ for city in os.listdir(file_dir): future = model.make_future_dataframe(periods=3, freq='D') predict = model.predict(future) - predict = predict[['ds', 'yhat']].set_index('ds') print(city,industry) - print(predict.loc['2023-10']) + + predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-10'].rename(columns={'yhat':'售电量'}) + df_train.rename(columns={'y':'售电量'},inplace=True) + result = pd.concat((df_train.set_index('ds').loc['2023-10'][:28],predict[-3:])) + result_dict[industry] = list(result['售电量']) + + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: + pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-10-01', end=f'2023-10-31', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city) # df = predict.join(s1.set_index('ds')).loc['2023-8'] # df['偏差率'] = (df['y'] - df['yhat']) / df['y'] diff --git a/浙江行业电量/行业电量_输出为3_步长为10.py b/浙江行业电量/行业电量_输出为3_步长为10.py index 0e0420c..53ec20b 100644 --- a/浙江行业电量/行业电量_输出为3_步长为10.py +++ b/浙江行业电量/行业电量_输出为3_步长为10.py @@ -188,6 +188,8 @@ for city in df['city_name'].drop_duplicates(): df1 = pd.concat((df_city.iloc[:27], df1)) print(df_city) print(df1) + + with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: df1.to_excel(writer,sheet_name=f'{city[4:6]}') print(time.time()-t1)