import pandas as pd import datetime from sklearn.preprocessing import LabelEncoder import os import math import chinese_calendar as cc is_holiday = cc.is_workday(datetime.date(2023, 10, 7)) print(is_holiday) def holiday_work(x): if cc.is_workday(x): return 0 if cc.is_holiday(x): return 1 tq_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州气象数据.xlsx',sheet_name=0) pd.set_option('display.width',None) tq_df.columns = tq_df.columns.map(lambda x:x.strip()) tq_df = tq_df[['city_name','dtdate','tem_max','tem_min','rh','rh_max','rh_min','pre','prs','prs_max','prs_min','win_s_max','win_s_min']] tq_df.drop_duplicates(subset='dtdate',inplace=True) tq_df = tq_df.sort_values(by='dtdate',ascending=True) tq_df.reset_index(inplace=True,drop=True) tq_df['dtdate'] = pd.to_datetime(tq_df['dtdate'],format='%Y%m%d') tq_df['holiday'] = tq_df['dtdate'].apply(holiday_work) print(tq_df.columns) print(tq_df.head()) print(tq_df.info()) def jq(y,x): a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x) return datetime.date(1899,12,31)+datetime.timedelta(days=int(a)) # print(jq(2020,0)) jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至'] jq_dict={} for j in range(2019,2024): for i in range(24): jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i] # print(jq_dict) tq_df['24ST']=tq_df.dtdate tq_df['24ST']=tq_df['24ST'].astype('string').map(jq_dict) tq_df['24ST'].fillna(method='ffill',inplace=True) tq_df['24ST'].fillna('冬至',inplace=True) # data为数据集 product_tags为需要编码的特征列(假设为第一列) le = LabelEncoder() tq_df['24ST'] = le.fit_transform(tq_df['24ST']) print(tq_df) file2=os.getcwd()+'/气象数据.csv' try: tq_df.to_csv(file2,encoding='gbk') except: tq_df.to_csv(file2,encoding='utf-8') elec_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州日电量.xlsx') elec_df['售电量'] = (elec_df['售电量']/10000).map(lambda x:round(x,2)) print(elec_df.columns) elec_df.sort_values(by=' 天 ',ascending=True,inplace=True) elec_df[' 天 '] = pd.to_datetime(elec_df[' 天 '],format='%Y%m%d') print(elec_df.info()) print(elec_df) df = pd.merge(tq_df,elec_df,left_on='dtdate',right_on=' 天 ') df.drop(columns=' 天 ',inplace=True) df.set_index('dtdate',inplace=True,drop=True) print(df) df.to_csv(os.getcwd()+'\入模数据.csv',encoding='gbk')