更新入模数据

main
鸽子 11 months ago
parent cb599702a1
commit 9d0daf370e

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="pytorch_gpu" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="C:\anaconda\envs\pytorch" project-jdk-type="Python SDK" />
</project>

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="pytorch_gpu" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="C:\anaconda\envs\pytorch" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -5,6 +5,7 @@ from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif']=['kaiti']
def season(x):
if str(x)[5:7] in ('06','07','08','12','01','02'):
@ -13,23 +14,18 @@ def season(x):
return 0
mpl.rcParams['font.sans-serif']=['kaiti']
pd.set_option('display.width',None)
data = pd.read_excel(r'C:\Users\user\PycharmProjects\pytorch2\入模数据\丽水数据.xlsx',index_col='dtdate')
parent_dir = os.path.abspath(os.path.join(os.getcwd(),os.pardir))
data = pd.read_excel(os.path.join(parent_dir,'入模数据/丽水.xlsx'),index_col='dtdate')
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')
data['season'] = data.index.map(season)
# plt.plot(range(len(data)),data['售电量'])
# plt.show()
print(data.head())
df_eval = data.loc['2023-9']
df_train = data.loc['2021-1':'2023-8']
# df_train = df[500:850]
print(len(df_eval),len(df_train),len(data))
df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量','season']]
# IQR = df['售电量'].describe()['75%'] - df['售电量'].describe()['25%']
@ -38,6 +34,19 @@ df_train = df_train[['tem_max','tem_min','holiday','24ST','rh','prs','售电量'
# print('异常值数量:',len(df[(df['售电量'] >= high) | (df['售电量'] <= low)]))
# df_train = df_train[(df['售电量'] <= high) & (df['售电量'] >= low)]
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
X = df_train[['tem_max','tem_min','holiday','24ST','season']]
X_eval = df_eval[['tem_max','tem_min','holiday','24ST','season']]
@ -46,6 +55,7 @@ y = df_train['售电量']
# best_i = {}
# for i in range(200):
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=42)
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=150)
model.fit(x_train,y_train)

@ -121,6 +121,19 @@ df_eval['city_name'] = df_eval['city_name'].map(lambda x:x.strip())
df_hangzhou = df_eval[df_eval['city_name']=='金华市'].sort_values(by='dtdate')
# list2 = []
# list0 = []
# list1 = []
# for i in ('01','02','03','04','05','06','07','08','09','10','11','12'):
# month_index = df.index.strftime('%Y-%m-%d').str[5:7] == f'{i}'
# if df.loc[month_index]['售电量'].mean() >= df['售电量'].describe()['75%']:
# list2.append(i)
# elif df.loc[month_index]['售电量'].mean() <= df['售电量'].describe()['25%']:
# list0.append(i)
# else:
# list1.append(i)
# print(list0,list1,list2)
print(df_hangzhou)
loaded_model = xgb.XGBRegressor()

Loading…
Cancel
Save