输出预测结果

main
鸽子 1 year ago
parent b6beaf3554
commit e5ecbf7bb8

@ -70,21 +70,41 @@ import pandas as pd
# df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') # df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
for file in os.listdir(file_dir): # for file in os.listdir(file_dir):
city = file[:-5] # city = file[:-5]
wb = Workbook() # wb = Workbook()
wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx') # wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx')
#
# for file in os.listdir(file_dir):
# city = file[:-5]
# excel_file = pd.ExcelFile(os.path.join(file_dir,file))
# sheet_names = excel_file.sheet_names[1:]
# for sheet in sheet_names:
# df = excel_file.parse(sheet)
# df_result = df[df.columns[1:]].sum()
# df_result = pd.DataFrame(df_result)
# df_result.columns = ['售电量']
#
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl',
# if_sheet_exists='replace') as writer:
# df_result.to_excel(writer, sheet_name=f'{sheet}')
df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date')
print(df.columns)
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
mpl.rcParams['font.sans-serif']=['kaiti']
print(df['4.有色金属矿采选业'][:-1])
plt.figure(figsize=(10, 6))
plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1])
for file in os.listdir(file_dir): plt.title(f'4.有色金属矿采选业')
city = file[:-5] plt.gcf().autofmt_xdate()
excel_file = pd.ExcelFile(os.path.join(file_dir,file)) plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120))
sheet_names = excel_file.sheet_names[1:]
for sheet in sheet_names:
df = excel_file.parse(sheet)
df_result = df[df.columns[1:]].sum()
df_result = pd.DataFrame(df_result)
df_result.columns = ['售电量']
with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl', plt.xticks(rotation=45)
if_sheet_exists='replace') as writer: plt.xlabel('时间')
df_result.to_excel(writer, sheet_name=f'{sheet}') plt.ylabel('数值')
plt.show()

@ -8,7 +8,7 @@ import numpy as np
def normal(data): def normal(data):
high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%']) high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%']) low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
return data[(data<=high)&(data>=low)] return (data<=high)&(data>=low)
file_dir = './浙江各地市行业电量数据' file_dir = './浙江各地市行业电量数据'
@ -18,14 +18,14 @@ for city in os.listdir(file_dir):
df_city['stat_date'] = pd.to_datetime(df_city['stat_date']) df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
list_goal = [] list_goal = []
list_industry = [] list_industry = []
result_dict = {}
for industry in df_city.columns[2:]: for industry in df_city.columns[2:]:
s1 = df_city[['stat_date', industry]] s1 = df_city[['stat_date', industry]]
s1 = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')] df_train = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')].sort_values(by='stat_date')
s1 = s1.loc[normal(s1[industry]).index] df_train.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
s1.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)
df_train = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') df_train['y'] = df_train['y'].where(normal(df_train['y']), other=np.nan).fillna(method='ffill')
df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds') # df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
model.add_country_holidays(country_name="CN") model.add_country_holidays(country_name="CN")
@ -33,9 +33,16 @@ for city in os.listdir(file_dir):
future = model.make_future_dataframe(periods=3, freq='D') future = model.make_future_dataframe(periods=3, freq='D')
predict = model.predict(future) predict = model.predict(future)
predict = predict[['ds', 'yhat']].set_index('ds')
print(city,industry) print(city,industry)
print(predict.loc['2023-10'])
predict = predict[['ds', 'yhat']].set_index('ds').loc['2023-10'].rename(columns={'yhat':'售电量'})
df_train.rename(columns={'y':'售电量'},inplace=True)
result = pd.concat((df_train.set_index('ds').loc['2023-10'][:28],predict[-3:]))
result_dict[industry] = list(result['售电量'])
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量v1130.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
pd.DataFrame(result_dict,index=pd.date_range(start=f'2023-10-01', end=f'2023-10-31', freq='D').strftime('%Y-%m-%d')).to_excel(writer,sheet_name=city)
# df = predict.join(s1.set_index('ds')).loc['2023-8'] # df = predict.join(s1.set_index('ds')).loc['2023-8']
# df['偏差率'] = (df['y'] - df['yhat']) / df['y'] # df['偏差率'] = (df['y'] - df['yhat']) / df['y']

@ -188,6 +188,8 @@ for city in df['city_name'].drop_duplicates():
df1 = pd.concat((df_city.iloc[:27], df1)) df1 = pd.concat((df_city.iloc[:27], df1))
print(df_city) print(df_city)
print(df1) print(df1)
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer: with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量预测v1129.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
df1.to_excel(writer,sheet_name=f'{city[4:6]}') df1.to_excel(writer,sheet_name=f'{city[4:6]}')
print(time.time()-t1) print(time.time()-t1)

Loading…
Cancel
Save