You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

282 lines
12 KiB
Python

11 months ago
# import pandas as pd
# import os
# import re
# file_dir1 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\偏差率'
# file_dir2 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\月底3天预测结果'
# file_dir3 = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\偏差'
# import numpy as np
# np.set_printoptions(threshold=np.inf)
#
# # print(os.listdir(file_dir3))
# # str1 = '丽水电压等级10kv以下月底偏差率:0.00229'
# #
# # print(re.split('电压等级|月底偏差率:',str1))
# # with open(os.path.join(file_dir3,'9月底偏差率.txt'),'r',encoding='utf-8') as f:
# # lines = f.readlines()
# # list_city = []
# # list_industry = []
# # list_loss = []
# # for i in lines:
# # i = re.split(':||其中', i)
# # print(i)
# # list_city.append(i[0][:2])
# # list_industry.append(i[-2].replace(i[0][:2],''))
# # list_loss.append(i[-1][:-2])
# # df_level = pd.DataFrame({'城市':list_city,'行业':list_industry,'偏差':list_loss})
# # # df_level.to_csv(os.path.join(file_dir3,'9月底偏差率.csv'),encoding='gbk')
# # print(df_level)
# file_dir = r'C:\python-project\pytorch3\浙江行业电量\浙江所有地市133行业数据'
# # print(os.listdir(file_dir))
# dict1 = {}
#
# for file in os.listdir(file_dir):
#
# df = pd.read_excel(os.path.join(file_dir,file),index_col=' stat_date ')
#
# col_list = df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns
# dict1[file[:2]] = col_list
# print(dict1)
#
# # print(len(df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns))
#
# read_path = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\月底预测结果'
# list1 = []
# for i in os.listdir(read_path):
# print(i)
# data = pd.read_csv(os.path.join(read_path, i), sep='\t',header=None)
# data = data[data.columns[1:]]
#
#
# for j,step in enumerate(range(0, len(data), 4)):
# df = data.iloc[step+1:step + 4, :]
# df.columns = ['预测值', '实际值', '偏差率']
# try:
# df['行业'] = dict1[i[2:4]][j]
# except:
# pass
# df['城市'] = i[2:4]
# list1.append(df)
# print(df)
# df = pd.concat(list1,ignore_index=True)
# df.to_csv('各市行业电量预测结果.csv',encoding='gbk')
# print(df)
10 months ago
import os
from openpyxl import Workbook
10 months ago
import pandas as pd
10 months ago
# df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1)
# print(df.head())
# print(df[df.columns[2:]].groupby(df['city_name']).sum().T)
# df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T
# df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
10 months ago
10 months ago
file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测'
10 months ago
# for file in os.listdir(file_dir):
# city = file[:-5]
# wb = Workbook()
# wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx')
#
# for file in os.listdir(file_dir):
# city = file[:-5]
# excel_file = pd.ExcelFile(os.path.join(file_dir,file))
# sheet_names = excel_file.sheet_names[1:]
# for sheet in sheet_names:
# df = excel_file.parse(sheet)
# df_result = df[df.columns[1:]].sum()
# df_result = pd.DataFrame(df_result)
# df_result.columns = ['售电量']
#
# with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl',
# if_sheet_exists='replace') as writer:
# df_result.to_excel(writer, sheet_name=f'{sheet}')
# df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date')
# print(df.columns)
10 months ago
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
# date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D')
# mpl.rcParams['font.sans-serif']=['kaiti']
# print(df['4.有色金属矿采选业'][:-1])
# plt.figure(figsize=(10, 6))
# plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1])
#
# plt.title(f'4.有色金属矿采选业')
# plt.gcf().autofmt_xdate()
# plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120))
#
# plt.xticks(rotation=45)
# plt.xlabel('时间')
# plt.ylabel('数值')
# plt.show()
# excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx')
# df_city_real = pd.read_excel(excel_file,sheet_name=0)
# df_city_real = df_city_real[df_city_real['county_name'].isnull()]
# df_city_real['city_name'] = df_city_real['city_name'].str[4:6]
# # print(df_city_real)
#
# file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129'
# print(os.listdir(file_dir))
# 区域明细及偏差率统计
# city_area_file = pd.ExcelFile(os.path.join(file_dir, os.listdir(file_dir)[2]))
# for city in df_city_real['city_name'].drop_duplicates():
# df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期')
# df_city_pred.index = pd.to_datetime(df_city_pred.index)
# df_real = df_city_real[df_city_real['city_name']==city].set_index('pt_date')['power_sal']
# df_real.index = pd.to_datetime(df_real.index)
# df_city_pred.loc['2023-11-27'] = df_real.loc['2023-11-27']
#
# result = pd.DataFrame(df_real).join(df_city_pred)
# result.columns = ['实际值','预测值']
# result['偏差率'] = (result['实际值'] - result['预测值'])/result['实际值']
# result['指标'] = (df_real.values.sum()-df_city_pred.values.sum())/df_real.values.sum()
# result['偏差率'][:27] = 0
# print(result)
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\区域电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer:
# result.to_excel(writer,sheet_name=f'{city}')
# pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期')
# df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal']
# city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2])
# excel_file1 = pd.ExcelFile(city_volt_file)
10 months ago
# for sheet_name in excel_file1.sheet_names[1:]:
# print(sheet_name)
# pred_volt_df = pd.read_excel(excel_file1,sheet_name=sheet_name).dropna()
#
# pred_volt_df.set_index(pred_volt_df.columns[0],inplace=True)
# real_volt_df = pd.read_excel(excel_file,sheet_name=1).set_index('pt_date')
#
# real_volt_df = real_volt_df[(real_volt_df['county_name'].isnull())&(real_volt_df['city_name'].str[4:6]==sheet_name)].drop(columns=['county_name','500kv(含330kv)以上'])
#
# result = pd.DataFrame({'实际值':list(real_volt_df.sum()[1:]),
# '预测值':list(pred_volt_df.sum()[1:]),
# '偏差':list(real_volt_df.sum()[1:] - pred_volt_df.sum()[1:])},index=real_volt_df.sum()[1:].index)
# result['指标'] = result['偏差']/real_volt_df.sum()[1:]
#
#
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name}')
# industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4]))
# for sheet_name in industry_file.sheet_names[1:]:
#
# pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True)
# pred_industry_df[pred_industry_df.columns[0]] = pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d')
# pred_industry_df.set_index(pred_industry_df.columns[0],inplace=True)
#
# real_industry_df = pd.read_excel(excel_file,sheet_name=2).set_index('stat_date')
# real_industry_df['city_name'] = real_industry_df['city_name'].str[4:6]
# real_industry_df = real_industry_df[real_industry_df['city_name']==sheet_name[:2]].drop(columns=['city_name']).iloc[:30]
# print(sheet_name[:2])
# print(pd.DataFrame(real_industry_df.sum(),columns=['真实值']))
#
#
# result = pd.DataFrame(real_industry_df.sum(),columns=['真实值']).join(pd.DataFrame(pred_industry_df.sum(),columns=['预测值']))
# print(result)
# result['偏差'] = result['真实值'] - result['预测值']
# result['指标'] = result['偏差']/result['真实值']
#
# with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter:
# result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}')
10 months ago
# e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx'
# df1 = pd.read_excel(e1,sheet_name=1)
# df1.set_index(df1.columns[0],inplace=True)
# for sheet_name in industry_file.sheet_names[2:]:
# df2 = pd.read_excel(e1,sheet_name=sheet_name)
# df2 = df2.set_index(df2.columns[0])
# df1 += df2
# df1['偏差'] = df1['真实值']-df1['预测值']
# df1['偏差率'] = df1['偏差']/df1['真实值']
# df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx')
# # writer = pd.ExcelWriter(e1,engine='openpyxl')
# # df1.to_excel(writer,sheet_name=0)
# print(df1)
import numpy as np
pd.set_option('display.width', None)
# 同期发行电量差别统计
df_fx = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江发行202311-202312v2.xlsx')
df_tq = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江分区202311-202312.xlsx')
# 市级别
df_tq_city = df_tq[df_tq['county_name'].isnull()]
df_tq_city['pt_date'] = pd.to_datetime(df_tq_city['pt_date'])
df_tq_city = df_tq_city[df_tq_city['pt_date'].astype('string').str[:7]=='2023-11']
# print(df_tq_city[df_tq_city['city_name']==df_tq_city['city_name'].iloc[0]].set_index('pt_date')['power_sal'].resample('M').sum())
# 同期按月汇总
df_tq_city = pd.DataFrame(df_tq_city['power_sal'].groupby(df_tq_city['city_name']).sum() * 10000)
df_fx_city = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].isnull())
& (df_fx['city_name'].notnull())].drop(columns='coountry_name').set_index('city_name')
df_city = df_fx_city.join(df_tq_city)
df_city = df_city.drop(columns='province_name')
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal']) / df_city['power_pub']
df_city = df_city.iloc[np.argsort(abs(df_city['bias']))]
# df_city.to_excel('市区域发行同期偏差.xlsx')
print('------------------------------------------------------------------------')
# 区县偏差
df_fx_county = df_fx[(df_fx['date_pub'] == df_fx['date_pub'].iloc[0]) & (df_fx['coountry_name'].notnull())
& (df_fx['city_name'].notnull())].drop(columns=['province_name']).set_index('coountry_name')
# print(df_fx_county.reset_index().sort_values('coountry_name').drop_duplicates())
df_tq_county = df_tq[(df_tq['county_name'].notnull())&(df_tq['pt_date'].astype('string').str[:7]=='2023-11')]
df_tq_county = pd.DataFrame(df_tq_county['power_sal'].groupby(df_tq_county['county_name']).sum()* 10000)
print(df_tq_county.sort_index())
df_county = df_fx_county.join(df_tq_county).sort_index()
# print(df_county.reset_index().drop_duplicates())
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
# df_county = df_county.iloc[np.argsort(abs(df_county['bias']))]
# print(df_county.reset_index().drop_duplicates())
df_county.reset_index(inplace=True)
df_county = df_county[['date_pub','city_name','coountry_name','power_pub','power_sal','bias']]
zjbs_ = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江变损202311-202312.xlsx')
zjbs = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].notnull())][['county_name','region_power']]
df_county = pd.merge(df_county,zjbs,left_on='coountry_name',right_on='county_name',how='left')
df_county.fillna(0,inplace=True)
df_county['power_sal'] += df_county['region_power']
df_county['bias'] = (df_county['power_pub'] - df_county['power_sal'])/df_county['power_pub']
df_county['_'] = abs(df_county['bias'])
df_county.sort_values(['city_name','_']).drop(columns=['region_power','county_name','_']).to_excel('区县发行同期偏差.xlsx',index=False)
zjbs_qx = zjbs_[(zjbs_['ds']=='2023-11')&(zjbs_['county_name'].isnull())][['city_name','region_power']].set_index('city_name')
print(zjbs_qx)
print(df_city)
df_city = df_city.join(zjbs_qx)
df_city['power_sal'] += df_city['region_power']
df_city['bias'] = (df_city['power_pub'] - df_city['power_sal'])/df_city['power_pub']
print(df_city.drop(columns='region_power'))
df_city.drop(columns='region_power').to_excel('市区域发行同期偏差.xlsx')