输出预测结果
parent
6049161ed9
commit
765dba1ed1
@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="pytorch_gpu" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="C:\anaconda\envs\pytorch" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
@ -0,0 +1,70 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import r2_score
|
||||||
|
def normal(x):
|
||||||
|
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||||
|
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||||
|
return x[(x<=high)&(x>=low)]
|
||||||
|
|
||||||
|
def season(x):
|
||||||
|
if str(x)[5:7] in ('04', '05'):
|
||||||
|
return 0
|
||||||
|
elif str(x)[5:7] in ('01', '02', '03', '06', '09', '10', '11', '12'):
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 2
|
||||||
|
|
||||||
|
df = pd.read_excel('./浙江各地市分电压日电量数据/杭州 .xlsx')
|
||||||
|
df = df[['stat_date','0.4kv及以下']]
|
||||||
|
df['0.4kv及以下'] = df['0.4kv及以下']/10000
|
||||||
|
df['stat_date'] = df['stat_date'].map(lambda x:x.strip())
|
||||||
|
df['stat_date'] = pd.to_datetime(df['stat_date'])
|
||||||
|
|
||||||
|
|
||||||
|
df_qw = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx')
|
||||||
|
df_qw.columns = df_qw.columns.map(lambda x:x.strip())
|
||||||
|
|
||||||
|
df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']]
|
||||||
|
df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate'])
|
||||||
|
|
||||||
|
|
||||||
|
df = pd.merge(df,df_qw,left_on='stat_date',right_on='dtdate',how='left')
|
||||||
|
df.drop(columns='dtdate',inplace=True)
|
||||||
|
df.set_index('stat_date',inplace=True)
|
||||||
|
df['season'] = df.index.map(season)
|
||||||
|
df = df.loc[normal(df['0.4kv及以下']).index]
|
||||||
|
|
||||||
|
print(df.head())
|
||||||
|
|
||||||
|
x_train = df.loc['2022-7':'2023-7'].drop(columns='0.4kv及以下')
|
||||||
|
y_train = df.loc['2022-7':'2023-7']['0.4kv及以下']
|
||||||
|
x_eval = df.loc['2023-8'].drop(columns='0.4kv及以下')
|
||||||
|
y_eval = df.loc['2023-8']['0.4kv及以下']
|
||||||
|
|
||||||
|
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||||
|
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||||
|
model.fit(x_train,y_train)
|
||||||
|
y_pred = model.predict(x_test)
|
||||||
|
print(r2_score(y_test,y_pred))
|
||||||
|
|
||||||
|
predict = model.predict(x_eval)
|
||||||
|
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||||
|
print(result)
|
||||||
|
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||||
|
|
||||||
|
# list2 = []
|
||||||
|
# list0 = []
|
||||||
|
# list1 = []
|
||||||
|
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
|
||||||
|
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
|
||||||
|
# print(df.loc[month_index]['0.4kv及以下'].max(),df['0.4kv及以下'].describe()['75%'])
|
||||||
|
# if df.loc[month_index]['0.4kv及以下'].mean() >= df['0.4kv及以下'].describe()['75%']:
|
||||||
|
# list2.append(i)
|
||||||
|
# elif df.loc[month_index]['0.4kv及以下'].mean() <= df['0.4kv及以下'].describe()['25%']:
|
||||||
|
# list0.append(i)
|
||||||
|
# else:
|
||||||
|
# list1.append(i)
|
||||||
|
# print(list0,list1,list2)
|
||||||
|
|
@ -0,0 +1,65 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
def normal(x):
|
||||||
|
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||||
|
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||||
|
return x[(x<=high)&(x>=low)]
|
||||||
|
|
||||||
|
|
||||||
|
fir_dir = './浙江各地市分电压日电量数据'
|
||||||
|
qw_dir = 'C:\python-project\p1031\入模数据'
|
||||||
|
result = pd.DataFrame({})
|
||||||
|
for excel,qw_excel in zip(os.listdir(fir_dir),os.listdir(qw_dir)):
|
||||||
|
|
||||||
|
df_city = pd.read_excel(os.path.join(fir_dir,excel))
|
||||||
|
|
||||||
|
df_city = df_city[['stat_date','0.4kv及以下']]
|
||||||
|
df_city['0.4kv及以下'] = df_city['0.4kv及以下']/10000
|
||||||
|
df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip())
|
||||||
|
df_city['stat_date'] = pd.to_datetime(df_city['stat_date'])
|
||||||
|
|
||||||
|
|
||||||
|
df_qw = pd.read_excel(os.path.join(qw_dir,qw_excel))
|
||||||
|
df_qw.columns = df_qw.columns.map(lambda x:x.strip())
|
||||||
|
|
||||||
|
df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']]
|
||||||
|
df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate'])
|
||||||
|
|
||||||
|
|
||||||
|
df = pd.merge(df_city,df_qw,left_on='stat_date',right_on='dtdate',how='left')
|
||||||
|
df.drop(columns='dtdate',inplace=True)
|
||||||
|
df.set_index('stat_date',inplace=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
list2 = []
|
||||||
|
list0 = []
|
||||||
|
list1 = []
|
||||||
|
for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
|
||||||
|
month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
|
||||||
|
# print(df.loc[month_index]['0.4kv及以下'].max(),df['0.4kv及以下'].describe()['75%'])
|
||||||
|
if df.loc[month_index]['0.4kv及以下'].mean() >= df['0.4kv及以下'].describe()['75%']:
|
||||||
|
list2.append(i)
|
||||||
|
elif df.loc[month_index]['0.4kv及以下'].mean() <= df['0.4kv及以下'].describe()['25%']:
|
||||||
|
list0.append(i)
|
||||||
|
else:
|
||||||
|
list1.append(i)
|
||||||
|
def season(x):
|
||||||
|
if str(x)[5:7] in list0:
|
||||||
|
return 0
|
||||||
|
elif str(x)[5:7] in list1:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 2
|
||||||
|
|
||||||
|
|
||||||
|
df['season'] = df.index.map(season)
|
||||||
|
|
||||||
|
dict1 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10}
|
||||||
|
df['city'] = dict1[excel[:2]]
|
||||||
|
df.reset_index(inplace=True)
|
||||||
|
result = pd.concat(result,df)
|
||||||
|
|
||||||
|
print(df)
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue