输出预测结果

main
鸽子 1 year ago
parent 967f1e5edf
commit 94b7b0f01f

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,75 +0,0 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)]
def season(x):
if str(x)[5:7] in ('04', '05', '06', '11'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '09', '10', '12'):
return 1
else:
return 2
df = pd.read_excel('./浙江各地市分电压日电量数据/衢州 .xlsx')
df = df[['stat_date','0.4kv及以下']]
df['0.4kv及以下'] = df['0.4kv及以下']/10000
df['stat_date'] = df['stat_date'].map(lambda x:x.strip())
df['stat_date'] = pd.to_datetime(df['stat_date'])
df_qw = pd.read_excel(r'C:\python-project\p1031\入模数据\衢州.xlsx')
df_qw.columns = df_qw.columns.map(lambda x:x.strip())
df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']]
df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate'])
df = pd.merge(df,df_qw,left_on='stat_date',right_on='dtdate',how='left')
df.drop(columns='dtdate',inplace=True)
df.set_index('stat_date',inplace=True)
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# # print(df.loc[month_index]['0.4kv及以下'].max(),df['0.4kv及以下'].describe()['75%'])
# if df.loc[month_index]['0.4kv及以下'].mean() >= df['0.4kv及以下'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['0.4kv及以下'].mean() <= df['0.4kv及以下'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
df['season'] = df.index.map(season)
df = df.loc[normal(df['0.4kv及以下']).index]
x_train = df.loc['2021-7':'2023-9'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2021-7':'2023-9'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-9']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,29 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/丽水.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2021-1':'2023-7'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2021-1':'2023-7'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-7']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,30 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/台州.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-7']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/嘉兴.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-7']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=158)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/宁波.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-7':'2023-10'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-7':'2023-10'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-10']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -8,40 +8,17 @@ def normal(x):
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%']) low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)] return x[(x<=high)&(x>=low)]
def season(x):
if str(x)[5:7] in ('04', '05'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '06', '09', '10', '11', '12'):
return 1
else:
return 2
df = pd.read_excel('./浙江各地市分电压日电量数据/杭州 .xlsx')
df = df[['stat_date','0.4kv及以下']]
df['0.4kv及以下'] = df['0.4kv及以下']/10000
df['stat_date'] = df['stat_date'].map(lambda x:x.strip())
df['stat_date'] = pd.to_datetime(df['stat_date'])
df = pd.read_excel('../400v入模数据/杭州.xlsx',index_col='stat_date')
df_qw = pd.read_excel(r'C:\python-project\p1031\入模数据\杭州.xlsx') df.index = pd.to_datetime(df.index)
df_qw.columns = df_qw.columns.map(lambda x:x.strip())
df_qw = df_qw[['dtdate','tem_max','tem_min','holiday','24ST']]
df_qw['dtdate'] = pd.to_datetime(df_qw['dtdate'])
x_train = df.loc['2022-1':'2023-8'][:-3].drop(columns='0.4kv及以下')
df = pd.merge(df,df_qw,left_on='stat_date',right_on='dtdate',how='left') y_train = df.loc['2022-1':'2023-8'][:-3]['0.4kv及以下']
df.drop(columns='dtdate',inplace=True) x_eval = df.loc['2023-8'].drop(columns='0.4kv及以下')
df.set_index('stat_date',inplace=True) y_eval = df.loc['2023-8']['0.4kv及以下']
df['season'] = df.index.map(season)
df = df.loc[normal(df['0.4kv及以下']).index]
print(df.head())
x_train = df.loc['2022-7':'2023-8'].drop(columns='0.4kv及以下')
y_train = df.loc['2022-7':'2023-8']['0.4kv及以下']
x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-9']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42) x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150) model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/温州.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-7':'2023-7'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-7']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/湖州.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2021-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2021-1':'2023-10'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-10']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/绍兴.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2021-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2021-1':'2023-10'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-10']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/舟山.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-1':'2023-9'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-1':'2023-9'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-9'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-9']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=158)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,40 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
def normal(x):
high = x.describe()['75%'] + 1.5*(x.describe()['75%']-x.describe()['25%'])
low = x.describe()['25%'] - 1.5*(x.describe()['75%']-x.describe()['25%'])
return x[(x<=high)&(x>=low)]
def season(x):
if str(x)[5:7] in ('04', '05', '06', '11'):
return 0
elif str(x)[5:7] in ('01', '02', '03', '09', '10', '12'):
return 1
else:
return 2
df = pd.read_excel('../400v入模数据/衢州.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2021-7':'2023-7'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2021-7':'2023-7'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-7'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-7']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=142)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -0,0 +1,28 @@
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
df = pd.read_excel('../400v入模数据/金华.xlsx',index_col='stat_date')
df.index = pd.to_datetime(df.index)
x_train = df.loc['2022-1':'2023-10'][:-3].drop(columns='0.4kv及以下')
y_train = df.loc['2022-1':'2023-10'][:-3]['0.4kv及以下']
x_eval = df.loc['2023-10'].drop(columns='0.4kv及以下')
y_eval = df.loc['2023-10']['0.4kv及以下']
x_train,x_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(max_depth=6,learning_rate=0.05,n_estimators=150)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
print(r2_score(y_test,y_pred))
predict = model.predict(x_eval)
result = pd.DataFrame({'eval':y_eval,'pred':predict},index=y_eval.index)
print(result)
print((result['eval'][-3:].sum()-result['pred'][-3:].sum())/result['eval'].sum())

@ -32,8 +32,6 @@ for excel,qw_excel in zip(os.listdir(fir_dir),os.listdir(qw_dir)):
df.drop(columns='dtdate',inplace=True) df.drop(columns='dtdate',inplace=True)
df.set_index('stat_date',inplace=True) df.set_index('stat_date',inplace=True)
list2 = [] list2 = []
list0 = [] list0 = []
list1 = [] list1 = []
@ -56,7 +54,7 @@ for excel,qw_excel in zip(os.listdir(fir_dir),os.listdir(qw_dir)):
print(f'{excel[:2]}',list0) print(f'{excel[:2]}',list0)
df['season'] = df.index.map(season) df['season'] = df.index.map(season)
df.dropna(how='any',inplace=True)
df.to_excel(f'./400v入模数据/{excel[:2]}.xlsx') df.to_excel(f'./400v入模数据/{excel[:2]}.xlsx')
# dict1 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10} # dict1 = {'杭州':0,'湖州':1,'嘉兴':2,'金华':3,'丽水':4,'宁波':5,'衢州':6,'绍兴':7,'台州':8,'温州':9,'舟山':10}
# df['city'] = dict1[excel[:2]] # df['city'] = dict1[excel[:2]]

Loading…
Cancel
Save