You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

282 lines
12 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# import pandas as pd
# import os
# import re
# file_dir1 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\偏差率'
# file_dir2 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\月底3天预测结果'
# file_dir3 = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\偏差'
# import numpy as np
# np.set_printoptions(threshold=np.inf)
#
# # print(os.listdir(file_dir3))
# # str1 = '丽水电压等级10kv以下月底偏差率:0.00229'
# #
# # print(re.split('电压等级|月底偏差率:',str1))
# # with open(os.path.join(file_dir3,'9月底偏差率.txt'),'r',encoding='utf-8') as f:
# # lines = f.readlines()
# # list_city = []
# # list_industry = []
# # list_loss = []
# # for i in lines:
# # i = re.split(':||其中', i)
# # print(i)
# # list_city.append(i[0][:2])
# # list_industry.append(i[-2].replace(i[0][:2],''))
# # list_loss.append(i[-1][:-2])
# # df_level = pd.DataFrame({'城市':list_city,'行业':list_industry,'偏差':list_loss})
# # # df_level.to_csv(os.path.join(file_dir3,'9月底偏差率.csv'),encoding='gbk')
# # print(df_level)
# file_dir = r'C:\python-project\pytorch3\浙江行业电量\浙江所有地市133行业数据'
# # print(os.listdir(file_dir))
# dict1 = {}
#
# for file in os.listdir(file_dir):
#
# df = pd.read_excel(os.path.join(file_dir,file),index_col=' stat_date ')
#
# col_list = df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns
# dict1[file[:2]] = col_list
# print(dict1)
#
# # print(len(df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns))
#
# read_path = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\月底预测结果'
# list1 = []
# for i in os.listdir(read_path):
# print(i)
# data = pd.read_csv(os.path.join(read_path, i), sep='\t',header=None)
# data = data[data.columns[1:]]
#
#
# for j,step in enumerate(range(0, len(data), 4)):
# df = data.iloc[step+1:step + 4, :]
# df.columns = ['预测值', '实际值', '偏差率']
# try:
# df['行业'] = dict1[i[2:4]][j]
# except:
# pass
# df['城市'] = i[2:4]
# list1.append(df)
# print(df)
# df = pd.concat(list1,ignore_index=True)
# df.to_csv('各市行业电量预测结果.csv',encoding='gbk')
# print(df)
import os
from openpyxl import Workbook
import pandas as pd
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1)
# print(df.head())
# print(df[df.columns[2:]].groupby(df['city_name']).sum().T)
# df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T
# df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
# for file in os.listdir(file_dir):
# city = file[:-5]
# wb = Workbook()
# wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx')
#
# for file in os.listdir(file_dir):
# city = file[:-5]
# excel_file = pd.ExcelFile(os.path.join(file_dir,file))
# sheet_names = excel_file.sheet_names[1:]
# for sheet in sheet_names:
# df = excel_file.parse(sheet)
# df_result = df[df.columns[1:]].sum()
# df_result = pd.DataFrame(df_result)
# df_result.columns = ['售电量']
#
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl',
# if_sheet_exists='replace') as writer:
# df_result.to_excel(writer, sheet_name=f'{sheet}')
# df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date')
# print(df.columns)
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
# date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
# mpl.rcParams['font.sans-serif']=['kaiti']
# print(df['4.有色金属矿采选业'][:-1])
# plt.figure(figsize=(10, 6))
# plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1])
#
# plt.title(f'4.有色金属矿采选业')
# plt.gcf().autofmt_xdate()
# plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120))
#
# plt.xticks(rotation=45)
# plt.xlabel('时间')
# plt.ylabel('数值')
# plt.show()
# excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
# df_city_real = pd.read_excel(excel_file,sheet_name=0)
# df_city_real = df_city_real[df_city_real['county_name'].isnull()]
# df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
# # print(df_city_real)
#
# file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
# print(os.listdir(file_dir))
# 区域明细及偏差率统计
# city_area_file = pd.ExcelFile(os.path.join(file_dir, os.listdir(file_dir)[2]))
# for city in df_city_real['city_name'].drop_duplicates():
# df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期')
# df_city_pred.index = pd.to_datetime(df_city_pred.index)
# df_real = df_city_real[df_city_real['city_name']==city].set_index('pt_date')['power_sal']
# df_real.index = pd.to_datetime(df_real.index)
# df_city_pred.loc['2023-11-27'] = df_real.loc['2023-11-27']
#
# result = pd.DataFrame(df_real).join(df_city_pred)
# result.columns = ['实际值','预测值']
# result['偏差率'] = (result['实际值'] - result['预测值'])/result['实际值']
# result['指标'] = (df_real.values.sum()-df_city_pred.values.sum())/df_real.values.sum()
# result['偏差率'][:27] = 0
# print(result)
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\区域电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
# result.to_excel(writer,sheet_name=f'{city}')
# pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
# df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
# city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
# excel_file1 = pd.ExcelFile(city_volt_file)
# for sheet_name in excel_file1.sheet_names[1:]:
# print(sheet_name)
# pred_volt_df = pd.read_excel(excel_file1,sheet_name=sheet_name).dropna()
#
# pred_volt_df.set_index(pred_volt_df.columns[0],inplace=True)
# real_volt_df = pd.read_excel(excel_file,sheet_name=1).set_index('pt_date')
#
# real_volt_df = real_volt_df[(real_volt_df['county_name'].isnull())&(real_volt_df['city_name'].str[4:6]==sheet_name)].drop(columns=['county_name','500kv(含330kv)以上'])
#
# result = pd.DataFrame({'实际值':list(real_volt_df.sum()[1:]),
# '预测值':list(pred_volt_df.sum()[1:]),
# '偏差':list(real_volt_df.sum()[1:] - pred_volt_df.sum()[1:])},index=real_volt_df.sum()[1:].index)
# result['指标'] = result['偏差']/real_volt_df.sum()[1:]
#
#
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name}')
# industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
# for sheet_name in industry_file.sheet_names[1:]:
#
# pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True)
# pred_industry_df[pred_industry_df.columns[0]] = pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')
# pred_industry_df.set_index(pred_industry_df.columns[0],inplace=True)
#
# real_industry_df = pd.read_excel(excel_file,sheet_name=2).set_index('stat_date')
# real_industry_df['city_name'] = real_industry_df['city_name'].str[4:6]
# real_industry_df = real_industry_df[real_industry_df['city_name']==sheet_name[:2]].drop(columns=['city_name']).iloc[:30]
# print(sheet_name[:2])
# print(pd.DataFrame(real_industry_df.sum(),columns=['真实值']))
#
#
# result = pd.DataFrame(real_industry_df.sum(),columns=['真实值']).join(pd.DataFrame(pred_industry_df.sum(),columns=['预测值']))
# print(result)
# result['偏差'] = result['真实值'] - result['预测值']
# result['指标'] = result['偏差']/result['真实值']
#
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}')
# e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
# df1 = pd.read_excel(e1,sheet_name=1)
# df1.set_index(df1.columns[0],inplace=True)
# for sheet_name in industry_file.sheet_names[2:]:
# df2 = pd.read_excel(e1,sheet_name=sheet_name)
# df2 = df2.set_index(df2.columns[0])
# df1 += df2
# df1['偏差'] = df1['真实值']-df1['预测值']
# df1['偏差率'] = df1['偏差']/df1['真实值']
# df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
# # writer = pd.ExcelWriter(e1,engine='openpyxl')
# # df1.to_excel(writer,sheet_name=0)
# print(df1)
import numpy as np
pd.set_option('display.width', None)
# 同期发行电量差别统计
df_fx = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江发行202311-202312v2.xlsx')
df_tq = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江分区202311-202312.xlsx')
# 市级别
df_tq_city = df_tq[df_tq['county_name'].isnull()]
df_tq_city['pt_date'] = pd.to_datetime(df_tq_city['pt_date'])
df_tq_city = df_tq_city[df_tq_city['pt_date'].astype('string').str[:7]=='2023-11']
# print(df_tq_city[df_tq_city['city_name']==df_tq_city['city_name'].iloc[0]].set_index('pt_date')['power_sal'].resample('M').sum())
# 同期按月汇总
df_tq_city = pd.DataFrame(df_tq_city['power_sal'].groupby(df_tq_city['city_name']).sum() * 10000)
df_fx_city = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].isnull())
& (df_fx['city_name'].notnull())].drop(columns='coountry_name').set_index('city_name')
df_city = df_fx_city.join(df_tq_city)
df_city = df_city.drop(columns='province_name')
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal']) / df_city['power_pub']
df_city = df_city.iloc[np.argsort(abs(df_city['bias']))]
# df_city.to_excel('市区域发行同期偏差.xlsx')
print('------------------------------------------------------------------------')
# 区县偏差
df_fx_county = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].notnull())
& (df_fx['city_name'].notnull())].drop(columns=['province_name']).set_index('coountry_name')
# print(df_fx_county.reset_index().sort_values('coountry_name').drop_duplicates())
df_tq_county = df_tq[(df_tq['county_name'].notnull())&(df_tq['pt_date'].astype('string').str[:7]=='2023-11')]
df_tq_county = pd.DataFrame(df_tq_county['power_sal'].groupby(df_tq_county['county_name']).sum()* 10000)
print(df_tq_county.sort_index())
df_county = df_fx_county.join(df_tq_county).sort_index()
# print(df_county.reset_index().drop_duplicates())
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
# df_county = df_county.iloc[np.argsort(abs(df_county['bias']))]
# print(df_county.reset_index().drop_duplicates())
df_county.reset_index(inplace=True)
df_county = df_county[['date_pub','city_name','coountry_name','power_pub','power_sal','bias']]
zjbs_ = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江变损202311-202312.xlsx')
zjbs = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].notnull())][['county_name','region_power']]
df_county = pd.merge(df_county,zjbs,left_on='coountry_name',right_on='county_name',how='left')
df_county.fillna(0,inplace=True)
df_county['power_sal'] += df_county['region_power']
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
df_county['_'] = abs(df_county['bias'])
df_county.sort_values(['city_name','_']).drop(columns=['region_power','county_name','_']).to_excel('区县发行同期偏差.xlsx',index=False)
zjbs_qx = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].isnull())][['city_name','region_power']].set_index('city_name')
print(zjbs_qx)
print(df_city)
df_city = df_city.join(zjbs_qx)
df_city['power_sal'] += df_city['region_power']
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal'])/df_city['power_pub']
print(df_city.drop(columns='region_power'))
df_city.drop(columns='region_power').to_excel('市区域发行同期偏差.xlsx')