You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
1 year ago
|
import pandas as pd
|
||
|
import datetime
|
||
|
from sklearn.preprocessing import LabelEncoder
|
||
|
import os
|
||
|
import math
|
||
|
import chinese_calendar as cc
|
||
|
is_holiday = cc.is_workday(datetime.date(2023, 10, 7))
|
||
|
print(is_holiday)
|
||
|
|
||
|
def holiday_work(x):
|
||
|
if cc.is_workday(x):
|
||
|
return 0
|
||
|
if cc.is_holiday(x):
|
||
|
return 1
|
||
|
|
||
|
tq_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州气象数据.xlsx',sheet_name=0)
|
||
|
pd.set_option('display.width',None)
|
||
|
tq_df.columns = tq_df.columns.map(lambda x:x.strip())
|
||
|
tq_df = tq_df[['city_name','dtdate','tem_max','tem_min','rh','rh_max','rh_min','pre','prs','prs_max','prs_min','win_s_max','win_s_min']]
|
||
|
tq_df.drop_duplicates(subset='dtdate',inplace=True)
|
||
|
tq_df = tq_df.sort_values(by='dtdate',ascending=True)
|
||
|
tq_df.reset_index(inplace=True,drop=True)
|
||
|
|
||
|
tq_df['dtdate'] = pd.to_datetime(tq_df['dtdate'],format='%Y%m%d')
|
||
|
tq_df['holiday'] = tq_df['dtdate'].apply(holiday_work)
|
||
|
print(tq_df.columns)
|
||
|
print(tq_df.head())
|
||
|
|
||
|
|
||
|
|
||
|
print(tq_df.info())
|
||
|
def jq(y,x):
|
||
|
a=365.242 * (y - 1900) + 6.2 + 15.22 * x - 1.9 * math.sin(0.262 * x)
|
||
|
return datetime.date(1899,12,31)+datetime.timedelta(days=int(a))
|
||
|
# print(jq(2020,0))
|
||
|
jq_list=['小寒', '大寒', '立春', '雨水', '惊蛰', '春分', '清明', '谷雨', '立夏', '小满', '芒种', '夏至', '小暑', '大暑', '立秋', '处暑', '白露', '秋分', '寒露', '霜降', '立冬', '小雪', '大雪','冬至']
|
||
|
jq_dict={}
|
||
|
for j in range(2019,2024):
|
||
|
for i in range(24):
|
||
|
jq_dict[jq(j,i).strftime('%Y-%m-%d')]=jq_list[i]
|
||
|
print(jq_dict)
|
||
|
|
||
|
tq_df['24ST']=tq_df.dtdate
|
||
|
tq_df['24ST']=tq_df['24ST'].astype('string').map(jq_dict)
|
||
|
tq_df['24ST'].fillna(method='ffill',inplace=True)
|
||
|
tq_df['24ST'].fillna('冬至',inplace=True)
|
||
|
|
||
|
|
||
|
# data为数据集 product_tags为需要编码的特征列(假设为第一列)
|
||
|
le = LabelEncoder()
|
||
|
tq_df['24ST'] = le.fit_transform(tq_df['24ST'])
|
||
|
print(tq_df)
|
||
|
|
||
|
file2=os.getcwd()+'/气象数据.csv'
|
||
|
try:
|
||
|
tq_df.to_csv(file2,encoding='gbk')
|
||
|
except:
|
||
|
tq_df.to_csv(file2,encoding='utf-8')
|
||
|
|
||
|
elec_df = pd.read_excel(r'C:\Users\鸽子\Desktop\杭州\杭州日电量.xlsx')
|
||
|
elec_df['售电量'] = (elec_df['售电量']/10000).map(lambda x:round(x,2))
|
||
|
|
||
|
print(elec_df.columns)
|
||
|
elec_df.sort_values(by=' 天 ',ascending=True,inplace=True)
|
||
|
elec_df[' 天 '] = pd.to_datetime(elec_df[' 天 '],format='%Y%m%d')
|
||
|
print(elec_df.info())
|
||
|
print(elec_df)
|
||
|
|
||
|
df = pd.merge(tq_df,elec_df,left_on='dtdate',right_on=' 天 ')
|
||
|
df.drop(columns=' 天 ',inplace=True)
|
||
|
|
||
|
df.set_index('dtdate',inplace=True,drop=True)
|
||
|
print(df)
|
||
|
df.to_csv(os.getcwd()+'\入模数据.csv',encoding='gbk')
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|