You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
2.6 KiB
Python

import pandas as pd
import math
import datetime
import chinese_calendar as cc
df = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市气象数据 .xlsx')
df.columns = df.columns.map(lambda x:x.strip())
df['dtdate'] = pd.to_datetime(df['dtdate'],format='%Y%m%d').astype('str')
df['city_name'] = df['city_name'].map(lambda x:x.strip())
df['city_name'] = df['city_name'].str[:-1]
df['dtdate'] = df['dtdate'].map(lambda x:x.strip())
def holiday_work(x):
if cc.is_workday(x):
return 0
if cc.is_holiday(x):
return 1
def jq(y,x):
a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
# print(jq(2023,1))
jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
jq_dict={}
for j in range(2023,2024):
for i in range(24):
jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i]
# print(jq_dict)
df['24ST'] = df['dtdate'].map(jq_dict)
df.fillna(method='ffill',inplace=True)
df['24ST'].fillna('秋分',inplace=True)
df['holiday'] = pd.to_datetime(df['dtdate']).map(holiday_work)
print(df)
jq_dict = {'冬至': 0, '处暑': 1, '夏至': 2, '大寒': 3, '大暑': 4, '大雪': 5, '寒露': 6, '小寒': 7, '小暑': 8, '小满': 9, '小雪': 10, '惊蛰': 11, '春分': 12, '清明': 13, '白露': 14, '秋分': 15, '立冬': 16, '立夏': 17, '立春': 18, '立秋': 19, '芒种': 20, '谷雨': 21, '雨水': 22, '霜降': 23}
df_elec = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江省各地市日电量及分压数据21-23年.xlsx',sheet_name=0)
df_elec.columns = df_elec.columns.map(lambda x:x.strip())
print(df_elec)
df_elec['stat_date'] = pd.to_datetime(df_elec['stat_date']).astype(str)
df_elec['地市'] = df_elec['地市'].map(lambda x:x.strip())
df_elec['stat_date'] = df_elec['stat_date'].map(lambda x:x.strip())
df_final = pd.merge(df,df_elec,left_on=['city_name','dtdate'],right_on=['地市','stat_date'],how='left')
df_final.drop(columns=['stat_date','地市'],inplace=True)
df_final.rename(columns={'power_sal':'售电量'},inplace=True)
df_final['24ST'] = df_final['24ST'].map(jq_dict)
df_final['售电量'] /= 10000
print(df_final)
df_final = df_final[['dtdate','city_name','tem_max','tem_min','holiday','24ST','售电量']]
for city in df_final['city_name'].drop_duplicates():
df_city = df_final[df_final['city_name']==city]
df_city.to_excel(fr'C:\Users\鸽子\Desktop\追加\{city} .xlsx',index=False)