You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pytorch/北京安徽/北京_行业_移动平均.py

60 lines
2.5 KiB
Python

import pandas as pd
pd.set_option('display.width',None)
df = pd.read_excel(r'C:\python-project\p1031\北京安徽\北京安徽电量数据\北京安徽行业.xlsx', sheet_name=0)
df['stat_date'] = pd.to_datetime(df['stat_date'])
# 移动平均
for city in df['city_name'].drop_duplicates():
df_city = df[(df['city_name'] == city)&(df['county_name'].isnull())].set_index('stat_date').loc['2023-12'].sort_index()
dict_big = {}
dict_ok = {}
resut_df = pd.DataFrame({})
index_industry = []
tq_list = []
pred_list = []
loss_list = []
rate_list = []
for industry in df_city.columns[2:]:
df_moving_avg = pd.DataFrame(df_city.iloc[:-3][industry], index=df_city.iloc[:-3].index)
future = pd.date_range(start='2023-12-29', periods=3, freq='D')
for date in future:
df_moving_avg.loc[date, industry] = df_moving_avg[df_moving_avg.values!=0][-3:].mean().values
resut_df = pd.concat([resut_df, df_moving_avg], axis=1)
"result_df为明细数据"
print(city[-6:])
final_df = resut_df.sum()
final_df = pd.DataFrame(final_df,columns=['预测值'])
final_df['真实值'] = df_city[df_city.columns[2:]].sum()
final_df['偏差'] = final_df['真实值'] - final_df['预测值']
final_df['偏差率'] = final_df['偏差'] / final_df['真实值']
final_df['偏差率'] = final_df['偏差率'].apply(lambda x:"{:.5%}".format(x))
print(final_df)
# loss = (df_city1[industry].tail(-3).sum() - df_moving_avg.tail(-3).sum()) / df_city1[industry].sum()
# tq_list.append(df_city1[industry].sum())
# pred_list.append(df_moving_avg[industry].sum())
# loss_list.append(df_city1[industry].sum()-df_moving_avg[industry].sum())
# rate_list.append((df_city1[industry].sum()-df_moving_avg[industry].sum())/df_city1[industry].sum())
with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\移动平均_北京行业_12月.xlsx', mode='a', if_sheet_exists='replace',
engine='openpyxl') as writer:
final_df.to_excel(writer, sheet_name=f'{city[-6:]}')
# resut_df = pd.DataFrame({'同期电量':tq_list,'预测电量':pred_list,'偏差':loss_list,'偏差率':rate_list},index=index_industry)
# print(resut_df)
# resut_df.to_excel(r'C:\Users\鸽子\Desktop\移动平均_丽水_行业.xlsx')
# if loss.values >= 0.005:
# dict_big[industry] = loss.values[0]
# else:
# dict_ok[industry] = loss.values[0]
# print(len(dict_ok))
# print(len(dict_big))