From e723fa0d417bbd36ec058ed05916e523977ab6d8 Mon Sep 17 00:00:00 2001 From: get <2316994765@qq.com> Date: Wed, 18 Oct 2023 14:49:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'=E6=9D=AD=E5=B7=9E=E6=97=A5=E7=94=B5=E9=87=8F'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../杭州日电量数据预处理.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 杭州日电量/杭州日电量数据预处理.py diff --git a/杭州日电量/杭州日电量数据预处理.py b/杭州日电量/杭州日电量数据预处理.py new file mode 100644 index 0000000..aaa6555 --- /dev/null +++ b/杭州日电量/杭州日电量数据预处理.py @@ -0,0 +1,79 @@ +import pandas as pd +import datetime +from sklearn.preprocessing import LabelEncoder +import os +import math +import chinese_calendar as cc +is_holiday = cc.is_workday(datetime.date(2023, 10, 7)) +print(is_holiday) + +def holiday_work(x): + if cc.is_workday(x): + return 0 + if cc.is_holiday(x): + return 1 + +tq_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州气象数据.xlsx',sheet_name=0) +pd.set_option('display.width',None) +tq_df.columns = tq_df.columns.map(lambda x:x.strip()) +tq_df = tq_df[['city_name','dtdate','tem_max','tem_min','rh','rh_max','rh_min','pre','prs','prs_max','prs_min','win_s_max','win_s_min']] +tq_df.drop_duplicates(subset='dtdate',inplace=True) +tq_df = tq_df.sort_values(by='dtdate',ascending=True) +tq_df.reset_index(inplace=True,drop=True) + +tq_df['dtdate'] = pd.to_datetime(tq_df['dtdate'],format='%Y%m%d') +tq_df['holiday'] = tq_df['dtdate'].apply(holiday_work) +print(tq_df.columns) +print(tq_df.head()) + + + +print(tq_df.info()) +def jq(y,x): + a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x) + return datetime.date(1899,12,31)+datetime.timedelta(days=int(a)) +# print(jq(2020,0)) +jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至'] +jq_dict={} +for j in range(2019,2024): + for i in range(24): + jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i] +# print(jq_dict) + +tq_df['24ST']=tq_df.dtdate +tq_df['24ST']=tq_df['24ST'].astype('string').map(jq_dict) +tq_df['24ST'].fillna(method='ffill',inplace=True) +tq_df['24ST'].fillna('冬至',inplace=True) + + +# data为数据集 product_tags为需要编码的特征列(假设为第一列) +le = LabelEncoder() +tq_df['24ST'] = le.fit_transform(tq_df['24ST']) +print(tq_df) + +file2=os.getcwd()+'/气象数据.csv' +try: + tq_df.to_csv(file2,encoding='gbk') +except: + tq_df.to_csv(file2,encoding='utf-8') + +elec_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州日电量.xlsx') +elec_df['售电量'] = (elec_df['售电量']/10000).map(lambda x:round(x,2)) + +print(elec_df.columns) +elec_df.sort_values(by=' 天 ',ascending=True,inplace=True) +elec_df[' 天 '] = pd.to_datetime(elec_df[' 天 '],format='%Y%m%d') +print(elec_df.info()) +print(elec_df) + +df = pd.merge(tq_df,elec_df,left_on='dtdate',right_on=' 天 ') +df.drop(columns=' 天 ',inplace=True) + +df.set_index('dtdate',inplace=True,drop=True) +print(df) +df.to_csv(os.getcwd()+'\入模数据.csv',encoding='gbk') + + + + +