pytorch/浙江行业电量/prophet_行业电量.py

from prophet import Prophet
import pandas as pd
import os
import datetime
import numpy as np


def normal(data):
    high = data.describe()['75%'] + 1.5 * (data.describe()['75%'] - data.describe()['25%'])
    low = data.describe()['25%'] - 1.5 * (data.describe()['75%'] - data.describe()['25%'])
    return data[(data<=high)&(data>=low)]


file_dir = './浙江各地市行业电量数据'
for city in os.listdir(file_dir):
    df_city = pd.read_excel(os.path.join(file_dir, city))
    df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
    df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
    list_goal = []
    list_industry = []
    for industry in df_city.columns[2:]:
        s1 = df_city[['stat_date', industry]]
        s1 = s1[(s1['stat_date'] >= '2022-09-30') & (s1['stat_date'] <= '2023-10-31')]
        s1 = s1.loc[normal(s1[industry]).index]
        s1.rename(columns={'stat_date': 'ds', industry: 'y'}, inplace=True)

        df_train = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')
        df_test = s1[(s1['ds'] >= '2022-08-31') & (s1['ds'] <= '2023-10-31')].sort_values(by='ds')

        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
        model.add_country_holidays(country_name="CN")
        model.fit(df_train)
        future = model.make_future_dataframe(periods=3, freq='D')

        predict = model.predict(future)
        predict = predict[['ds', 'yhat']].set_index('ds')
        print(city,industry)
        print(predict.loc['2023-10'])

        # df = predict.join(s1.set_index('ds')).loc['2023-8']
        # df['偏差率'] = (df['y'] - df['yhat']) / df['y']
        # df['goal'] = (df['y'] - df['yhat'])[-3:].sum() / df['y'].sum()
        # list_goal.append((df['y'] - df['yhat'])[-3:].sum() / df['y'].sum())
        # list_industry.append(industry)

    # df = pd.DataFrame({'industry': list_industry, 'goal': list_goal})
    # df.to_csv(fr'C:\Users\鸽子\Desktop\行业8月偏差\{city[:2]}_goal.csv', index=False, encoding='gbk')
    #
    # with open(r'C:\Users\鸽子\Desktop\goal_8.txt','a') as f:
    #     f.write(f'{city[:2]}\n')
    #     df['goal'].value_counts(bins=[-np.inf,-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.02, 0.05,np.inf], sort=False).to_csv(f,header=False,sep='\t')