输出预测结果
parent
967f1e5edf
commit
94b7b0f01f
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,75 +0,0 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
def normal(x):
|
||||
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||
return x[(x<=high)&(x>=low)]
|
||||
|
||||
def season(x):
|
||||
if str(x)[5:7] in ('04', '05', '06', '11'):
|
||||
return 0
|
||||
elif str(x)[5:7] in ('01', '02', '03', '09', '10', '12'):
|
||||
return 1
|
||||
else:
|
||||
return 2
|
||||
|
||||
df = pd.read_excel('./浙江各地市分电压日电量数据/衢州 .xlsx')
|
||||
df = df[['stat_date','0.4kv及以下']]
|
||||
df['0.4kv及以下'] = df['0.4kv及以下']/10000
|
||||
df['stat_date'] = df['stat_date'].map(lambda x:x.strip())
|
||||
df['stat_date'] = pd.to_datetime(df['stat_date'])
|
||||
|
||||
|
||||
df_qw = pd.read_excel(r'C:\python-project\p1031\入模数据\衢州.xlsx')
|
||||
df_qw.columns = df_qw.columns.map(lambda x:x.strip())
|
||||
|
||||
df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']]
|
||||
df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate'])
|
||||
|
||||
|
||||
df = pd.merge(df,df_qw,left_on='stat_date',right_on='dtdate',how='left')
|
||||
df.drop(columns='dtdate',inplace=True)
|
||||
df.set_index('stat_date',inplace=True)
|
||||
|
||||
|
||||
# list2 = []
|
||||
# list0 = []
|
||||
# list1 = []
|
||||
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
|
||||
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
|
||||
# # print(df.loc[month_index]['0.4kv及以下'].max(),df['0.4kv及以下'].describe()['75%'])
|
||||
# if df.loc[month_index]['0.4kv及以下'].mean() >= df['0.4kv及以下'].describe()['75%']:
|
||||
# list2.append(i)
|
||||
# elif df.loc[month_index]['0.4kv及以下'].mean() <= df['0.4kv及以下'].describe()['25%']:
|
||||
# list0.append(i)
|
||||
# else:
|
||||
# list1.append(i)
|
||||
# print(list0,list1,list2)
|
||||
|
||||
|
||||
df['season'] = df.index.map(season)
|
||||
df = df.loc[normal(df['0.4kv及以下']).index]
|
||||
|
||||
x_train = df.loc['2021-7':'2023-9'][:-3].drop(columns='0.4kv及以下')
|
||||
|
||||
y_train = df.loc['2021-7':'2023-9'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-9']['0.4kv及以下']
|
||||
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,29 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
|
||||
df = pd.read_excel('../400v入模数据/丽水.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2021-1':'2023-7'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2021-1':'2023-7'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-7']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
|
||||
|
||||
df = pd.read_excel('../400v入模数据/台州.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-7']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/嘉兴.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-7']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=158)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/宁波.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-7':'2023-10'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-7':'2023-10'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-10']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/温州.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-7']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/湖州.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2021-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2021-1':'2023-10'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-10']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/绍兴.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2021-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2021-1':'2023-10'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-10']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/舟山.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-1':'2023-9'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-1':'2023-9'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-9']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=158)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,40 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
def normal(x):
|
||||
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
|
||||
return x[(x<=high)&(x>=low)]
|
||||
|
||||
def season(x):
|
||||
if str(x)[5:7] in ('04', '05', '06', '11'):
|
||||
return 0
|
||||
elif str(x)[5:7] in ('01', '02', '03', '09', '10', '12'):
|
||||
return 1
|
||||
else:
|
||||
return 2
|
||||
|
||||
df = pd.read_excel('../400v入模数据/衢州.xlsx',index_col='stat_date')
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
|
||||
x_train = df.loc['2021-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2021-7':'2023-7'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-7']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=142)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
@ -0,0 +1,28 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import r2_score
|
||||
|
||||
df = pd.read_excel('../400v入模数据/金华.xlsx',index_col='stat_date')
|
||||
|
||||
df.index = pd.to_datetime(df.index)
|
||||
|
||||
x_train = df.loc['2022-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
|
||||
y_train = df.loc['2022-1':'2023-10'][:-3]['0.4kv及以下']
|
||||
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
|
||||
y_eval = df.loc['2023-10']['0.4kv及以下']
|
||||
|
||||
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
|
||||
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
|
||||
model.fit(x_train,y_train)
|
||||
y_pred = model.predict(x_test)
|
||||
print(r2_score(y_test,y_pred))
|
||||
|
||||
predict = model.predict(x_eval)
|
||||
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
|
||||
print(result)
|
||||
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue