You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
83 lines
3.4 KiB
Python
83 lines
3.4 KiB
Python
import os
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
n1 = np.array([[1,1,1]])
|
|
n2 = np.array([1,1,1]).reshape(1,-1)
|
|
print(n2)
|
|
n2 = np.array([]).reshape(3,-1)
|
|
|
|
print(np.max([[1,2,3],[4,5,6]]))
|
|
|
|
file_dir = r'../浙江电压等级电量/浙江各地市分电压日电量数据'
|
|
excel = os.listdir(file_dir)[0]
|
|
df = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
|
|
df.columns = df.columns.map(lambda x:x.strip())
|
|
df.index = pd.to_datetime(df.index)
|
|
df.sort_index(inplace=True)
|
|
df = df.loc['2021-01':'2023-08'][:-3]
|
|
print(df.tail())
|
|
# for city in df['地市'].drop_duplicates():
|
|
# df_city = df[df['地市']== city]
|
|
# df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip())
|
|
# df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y-%m-%d')
|
|
# df_city = df_city[df_city.columns[:-1]]
|
|
# df_city.sort_values(by='stat_date',ascending=True,inplace=True)
|
|
# df_city['stat_date'] = df_city['stat_date'].astype('str')
|
|
# df_city.to_excel(fr'C:\Users\user\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False)
|
|
# file_Dir = r'C:\Users\鸽子\Desktop\浙江各地市行业电量数据'
|
|
# for excel in os.listdir(file_Dir):
|
|
# df1 = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市日电量数据-27-28).xlsx',sheet_name=1)
|
|
# df1.columns = df1.columns.map(lambda x:x.strip())
|
|
# df2 = pd.read_excel(os.path.join(file_Dir,excel))
|
|
# df2['地市'] = df2['地市'].map(lambda x:x.strip())
|
|
# city = df2['地市'].iloc[0]
|
|
# col_list = df2.columns
|
|
# df1 = df1[col_list]
|
|
# df1 = df1[(df1['stat_date']==20231028)&(df1['地市']==city)]
|
|
# df1['stat_date'] = pd.to_datetime(df1['stat_date'],format='%Y%m%d')
|
|
# df2 = pd.concat((df2,df1),ignore_index=True)
|
|
# df2.to_excel(fr'C:\Users\鸽子\Desktop\浙江各地市行业电量数据\{city}.xlsx')
|
|
pd.set_option('display.width',None)
|
|
def normal(df):
|
|
drop_col = [x for x in df.columns if len(df[df[x]==0])/len(df) >= 0.5]
|
|
df.drop(columns=drop_col,inplace=True)
|
|
for col in df.columns:
|
|
try:
|
|
high = df[col].describe()['75%'] + 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])
|
|
low = df[col].describe()['25%'] - 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])
|
|
df[col] = df[col].map(lambda x: np.nan if (x >= high) | (x <= low) else x)
|
|
df[col] = df[col].fillna(method='ffill')
|
|
df[col] = df[col].fillna(method='bfill')
|
|
except:
|
|
pass
|
|
return df
|
|
|
|
file_dir = './浙江各地市行业电量数据'
|
|
city1 = os.listdir(file_dir)[0]
|
|
df_city = pd.read_excel(os.path.join(file_dir, city1))
|
|
df_city = normal(df_city)
|
|
df_city = df_city.drop(columns='地市')
|
|
df_city[df_city.columns[1:]] /= 10000
|
|
df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
|
|
df_city.stat_date = pd.to_datetime(df_city.stat_date)
|
|
print(df_city.describe())
|
|
|
|
list_1000 = []
|
|
list_100 = []
|
|
list_10 = []
|
|
list_1 = []
|
|
for i in df_city.columns[1:]:
|
|
if df_city[i].describe()['mean']>=1000:
|
|
list_1000.append(i)
|
|
if df_city[i].describe()['mean'] < 1000 and df_city[i].describe()['mean'] >= 100:
|
|
list_100.append(i)
|
|
if df_city[i].describe()['mean'] < 100 and df_city[i].describe()['mean'] >= 10:
|
|
list_10.append(i)
|
|
else:
|
|
list_1.append(i)
|
|
print('list_1:',list_1)
|
|
print('list_10:',list_10)
|
|
print('list_100:',list_100)
|
|
print('list_1000:',list_1000)
|