更新入模数据

main
鸽子 1 year ago
parent cb599702a1
commit 9d0daf370e

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="pytorch_gpu" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="C:\anaconda\envs\pytorch" project-jdk-type="Python SDK" />
</project> </project>

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="pytorch_gpu" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="C:\anaconda\envs\pytorch" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -5,6 +5,7 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import matplotlib as mpl import matplotlib as mpl
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
def season(x): def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'): if str(x)[5:7] in ('06','07','08','12','01','02'):
@ -13,23 +14,18 @@ def season(x):
return 0 return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None) pd.set_option('display.width',None)
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate') data = pd.read_excel(os.path.join(parent_dir,'入模数据/丽水.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d') data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data['season'] = data.index.map(season) data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head()) print(data.head())
df_eval = data.loc['2023-9'] df_eval = data.loc['2023-9']
df_train = data.loc['2021-1':'2023-8'] df_train = data.loc['2021-1':'2023-8']
# df_train = df[500:850] # df_train = df[500:850]
print(len(df_eval),len(df_train),len(data)) print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']] df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%'] # IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -38,6 +34,19 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)])) # print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)] # df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
X = df_train[['tem_max','tem_min','holiday','24ST','season']] X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']] X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -46,6 +55,7 @@ y = df_train['售电量']
# best_i = {} # best_i = {}
# for i in range(200): # for i in range(200):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42) x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150) model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train) model.fit(x_train,y_train)

@ -121,6 +121,19 @@ df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate') df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
print(df_hangzhou) print(df_hangzhou)
loaded_model = xgb.XGBRegressor() loaded_model = xgb.XGBRegressor()

Loading…
Cancel
Save