# import pandas as pd # import os # import re # file_dir1 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\偏差率' # file_dir2 = r'C:\Users\鸽子\Desktop\一版结果\电压等级电量预测结果\月底3天预测结果' # file_dir3 = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\偏差' # import numpy as np # np.set_printoptions(threshold=np.inf) # # # print(os.listdir(file_dir3)) # # str1 = '丽水电压等级10kv以下月底偏差率:0.00229' # # # # print(re.split('电压等级|月底偏差率:',str1)) # # with open(os.path.join(file_dir3,'9月底偏差率.txt'),'r',encoding='utf-8') as f: # # lines = f.readlines() # # list_city = [] # # list_industry = [] # # list_loss = [] # # for i in lines: # # i = re.split(':|:|其中', i) # # print(i) # # list_city.append(i[0][:2]) # # list_industry.append(i[-2].replace(i[0][:2],'')) # # list_loss.append(i[-1][:-2]) # # df_level = pd.DataFrame({'城市':list_city,'行业':list_industry,'偏差':list_loss}) # # # df_level.to_csv(os.path.join(file_dir3,'9月底偏差率.csv'),encoding='gbk') # # print(df_level) # file_dir = r'C:\python-project\pytorch3\浙江行业电量\浙江所有地市133行业数据' # # print(os.listdir(file_dir)) # dict1 = {} # # for file in os.listdir(file_dir): # # df = pd.read_excel(os.path.join(file_dir,file),index_col=' stat_date ') # # col_list = df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns # dict1[file[:2]] = col_list # print(dict1) # # # print(len(df.drop(columns=[i for i in df.columns if (df[i] == 0).sum() / len(df) >= 0.5]).columns)) # # read_path = r'C:\Users\鸽子\Desktop\一版结果\行业电量预测结果\月底预测结果' # list1 = [] # for i in os.listdir(read_path): # print(i) # data = pd.read_csv(os.path.join(read_path, i), sep='\t',header=None) # data = data[data.columns[1:]] # # # for j,step in enumerate(range(0, len(data), 4)): # df = data.iloc[step+1:step + 4, :] # df.columns = ['预测值', '实际值', '偏差率'] # try: # df['行业'] = dict1[i[2:4]][j] # except: # pass # df['城市'] = i[2:4] # list1.append(df) # print(df) # df = pd.concat(list1,ignore_index=True) # df.to_csv('各市行业电量预测结果.csv',encoding='gbk') # print(df) import os from openpyxl import Workbook import pandas as pd # df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省11月分行业售电量预测v2.xlsx',sheet_name=1) # print(df.head()) # print(df[df.columns[2:]].groupby(df['city_name']).sum().T) # df2 = df[df.columns[2:]].groupby(df['city_name']).sum().T # df2.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') file_dir = r'C:\Users\鸽子\Desktop\11月区县分压预测' # for file in os.listdir(file_dir): # city = file[:-5] # wb = Workbook() # wb.save(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx') # # for file in os.listdir(file_dir): # city = file[:-5] # excel_file = pd.ExcelFile(os.path.join(file_dir,file)) # sheet_names = excel_file.sheet_names[1:] # for sheet in sheet_names: # df = excel_file.parse(sheet) # df_result = df[df.columns[1:]].sum() # df_result = pd.DataFrame(df_result) # df_result.columns = ['售电量'] # # with pd.ExcelWriter(fr'C:\Users\鸽子\Desktop\11月区县分压汇总\{city}.xlsx', mode='a', engine='openpyxl', # if_sheet_exists='replace') as writer: # df_result.to_excel(writer, sheet_name=f'{sheet}') # df = pd.read_excel('C:\python-project\p1031\浙江行业电量\浙江各地市行业电量数据\台州.xlsx').set_index('stat_date') # print(df.columns) import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.dates as mdates # date_rng = pd.date_range(start=df['4.有色金属矿采选业'].index[0], end=df['4.有色金属矿采选业'].index[-1], freq='D') # mpl.rcParams['font.sans-serif']=['kaiti'] # print(df['4.有色金属矿采选业'][:-1]) # plt.figure(figsize=(10, 6)) # plt.plot(df['4.有色金属矿采选业'].index[:-1],df['4.有色金属矿采选业'][:-1]) # # plt.title(f'4.有色金属矿采选业') # plt.gcf().autofmt_xdate() # plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=120)) # # plt.xticks(rotation=45) # plt.xlabel('时间') # plt.ylabel('数值') # plt.show() excel_file = pd.ExcelFile(r'C:\Users\鸽子\Desktop\浙江电量20231202.xlsx') df_city_real = pd.read_excel(excel_file,sheet_name=0) df_city_real = df_city_real[df_city_real['county_name'].isnull()] df_city_real['city_name'] = df_city_real['city_name'].str[4:6] # print(df_city_real) file_dir = r'C:\Users\鸽子\Desktop\发行&预测\区域行业分压预测v1129' print(os.listdir(file_dir)) # 区域明细及偏差率统计 city_area_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[2])) # for city in df_city_real['city_name'].drop_duplicates(): # df_city_pred = pd.read_excel(city_area_file,sheet_name=city).dropna().set_index('日期') # df_city_pred.index = pd.to_datetime(df_city_pred.index) # df_real = df_city_real[df_city_real['city_name']==city].set_index('pt_date')['power_sal'] # df_real.index = pd.to_datetime(df_real.index) # df_city_pred.loc['2023-11-27'] = df_real.loc['2023-11-27'] # # result = pd.DataFrame(df_real).join(df_city_pred) # result.columns = ['实际值','预测值'] # result['偏差率'] = (result['实际值'] - result['预测值'])/result['实际值'] # result['指标'] = (df_real.values.sum()-df_city_pred.values.sum())/df_real.values.sum() # result['偏差率'][:27] = 0 # print(result) # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\区域电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as writer: # result.to_excel(writer,sheet_name=f'{city}') # pd.read_excel(city_area_file,sheet_name='舟山').dropna().set_index('日期') # df_city_real[df_city_real['city_name']=='舟山'].set_index('pt_date')['power_sal'] city_volt_file = os.path.join(file_dir,os.listdir(file_dir)[2]) excel_file1 = pd.ExcelFile(city_volt_file) # for sheet_name in excel_file1.sheet_names[1:]: # print(sheet_name) # pred_volt_df = pd.read_excel(excel_file1,sheet_name=sheet_name).dropna() # # pred_volt_df.set_index(pred_volt_df.columns[0],inplace=True) # real_volt_df = pd.read_excel(excel_file,sheet_name=1).set_index('pt_date') # # real_volt_df = real_volt_df[(real_volt_df['county_name'].isnull())&(real_volt_df['city_name'].str[4:6]==sheet_name)].drop(columns=['county_name','500kv(含330kv)以上']) # # result = pd.DataFrame({'实际值':list(real_volt_df.sum()[1:]), # '预测值':list(pred_volt_df.sum()[1:]), # '偏差':list(real_volt_df.sum()[1:] - pred_volt_df.sum()[1:])},index=real_volt_df.sum()[1:].index) # result['指标'] = result['偏差']/real_volt_df.sum()[1:] # # # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\市分压电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: # result.to_excel(wirter,sheet_name=f'{sheet_name}') industry_file = pd.ExcelFile(os.path.join(file_dir,os.listdir(file_dir)[4])) # for sheet_name in industry_file.sheet_names[1:]: # # pred_industry_df = pd.concat([pd.read_excel(industry_file,sheet_name=sheet_name).iloc[:27],pd.read_excel(industry_file,sheet_name=sheet_name).iloc[-3:]],ignore_index=True) # pred_industry_df[pred_industry_df.columns[0]] = pd.date_range(start=f'2023-11-01', end=f'2023-11-30', freq='D').strftime('%Y-%m-%d') # pred_industry_df.set_index(pred_industry_df.columns[0],inplace=True) # # real_industry_df = pd.read_excel(excel_file,sheet_name=2).set_index('stat_date') # real_industry_df['city_name'] = real_industry_df['city_name'].str[4:6] # real_industry_df = real_industry_df[real_industry_df['city_name']==sheet_name[:2]].drop(columns=['city_name']).iloc[:30] # print(sheet_name[:2]) # print(pd.DataFrame(real_industry_df.sum(),columns=['真实值'])) # # # result = pd.DataFrame(real_industry_df.sum(),columns=['真实值']).join(pd.DataFrame(pred_industry_df.sum(),columns=['预测值'])) # print(result) # result['偏差'] = result['真实值'] - result['预测值'] # result['指标'] = result['偏差']/result['真实值'] # # with pd.ExcelWriter(r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx',mode='a',if_sheet_exists='replace',engine='openpyxl') as wirter: # result.to_excel(wirter,sheet_name=f'{sheet_name[:2]}') e1 = r'C:\Users\鸽子\Desktop\行业电量同期预测对比.xlsx' df1 = pd.read_excel(e1,sheet_name=1) df1.set_index(df1.columns[0],inplace=True) for sheet_name in industry_file.sheet_names[2:]: df2 = pd.read_excel(e1,sheet_name=sheet_name) df2 = df2.set_index(df2.columns[0]) df1 += df2 df1['偏差'] = df1['真实值']-df1['预测值'] df1['偏差率'] = df1['偏差']/df1['真实值'] df1.to_excel(r'C:\Users\鸽子\Desktop\1.xlsx') # writer = pd.ExcelWriter(e1,engine='openpyxl') # df1.to_excel(writer,sheet_name=0) print(df1)