|
|
@ -53,30 +53,62 @@ def normal(df):
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
return df
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
file_dir = './浙江各地市行业电量数据'
|
|
|
|
# file_dir = './浙江各地市行业电量数据'
|
|
|
|
city1 = os.listdir(file_dir)[0]
|
|
|
|
# city1 = os.listdir(file_dir)[0]
|
|
|
|
df_city = pd.read_excel(os.path.join(file_dir, city1))
|
|
|
|
# df_city = pd.read_excel(os.path.join(file_dir, city1))
|
|
|
|
df_city = normal(df_city)
|
|
|
|
# df_city = normal(df_city)
|
|
|
|
df_city = df_city.drop(columns='地市')
|
|
|
|
# df_city = df_city.drop(columns='地市')
|
|
|
|
df_city[df_city.columns[1:]] /= 10000
|
|
|
|
# df_city[df_city.columns[1:]] /= 10000
|
|
|
|
df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
|
|
|
|
# df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
|
|
|
|
df_city.stat_date = pd.to_datetime(df_city.stat_date)
|
|
|
|
# df_city.stat_date = pd.to_datetime(df_city.stat_date)
|
|
|
|
print(df_city.describe())
|
|
|
|
# print(df_city.describe())
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# list_1000 = []
|
|
|
|
|
|
|
|
# list_100 = []
|
|
|
|
|
|
|
|
# list_10 = []
|
|
|
|
|
|
|
|
# list_1 = []
|
|
|
|
|
|
|
|
# for i in df_city.columns[1:]:
|
|
|
|
|
|
|
|
# if df_city[i].describe()['mean']>=1000:
|
|
|
|
|
|
|
|
# list_1000.append(i)
|
|
|
|
|
|
|
|
# if df_city[i].describe()['mean'] < 1000 and df_city[i].describe()['mean'] >= 100:
|
|
|
|
|
|
|
|
# list_100.append(i)
|
|
|
|
|
|
|
|
# if df_city[i].describe()['mean'] < 100 and df_city[i].describe()['mean'] >= 10:
|
|
|
|
|
|
|
|
# list_10.append(i)
|
|
|
|
|
|
|
|
# else:
|
|
|
|
|
|
|
|
# list_1.append(i)
|
|
|
|
|
|
|
|
# print('list_1:',list_1)
|
|
|
|
|
|
|
|
# print('list_10:',list_10)
|
|
|
|
|
|
|
|
# print('list_100:',list_100)
|
|
|
|
|
|
|
|
# print('list_1000:',list_1000)
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 创建一个简单的DataFrame
|
|
|
|
|
|
|
|
data = pd.DataFrame({'A': [1, 2, 3000, 4, 500],
|
|
|
|
|
|
|
|
'B': [10, 20, 30, 40, 50]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Q1 = data['A'].quantile(0.25)
|
|
|
|
|
|
|
|
Q3 = data['A'].quantile(0.75)
|
|
|
|
|
|
|
|
IQR = Q3 - Q1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lower_threshold = Q1 - 1.5 * IQR
|
|
|
|
|
|
|
|
upper_threshold = Q3 + 1.5 * IQR
|
|
|
|
|
|
|
|
# 向下移动一行
|
|
|
|
|
|
|
|
outliers = (data['A'] < lower_threshold) | (data['A'] > upper_threshold)
|
|
|
|
|
|
|
|
print(outliers)
|
|
|
|
|
|
|
|
print( data['A'].shift(1))
|
|
|
|
|
|
|
|
# 替换异常值为临近一个值
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data = {'A': [1, 2, 3, 4, 5],
|
|
|
|
|
|
|
|
'B': [10, 20, 30, 40, 50]}
|
|
|
|
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将满足条件的元素替换为新值
|
|
|
|
|
|
|
|
condition = df['A'] > 3
|
|
|
|
|
|
|
|
df_new = df.where(condition, other=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("原始数据:")
|
|
|
|
|
|
|
|
print(df)
|
|
|
|
|
|
|
|
|
|
|
|
list_1000 = []
|
|
|
|
print("\n根据条件替换后的数据:")
|
|
|
|
list_100 = []
|
|
|
|
print(df_new)
|
|
|
|
list_10 = []
|
|
|
|
|
|
|
|
list_1 = []
|
|
|
|
|
|
|
|
for i in df_city.columns[1:]:
|
|
|
|
|
|
|
|
if df_city[i].describe()['mean']>=1000:
|
|
|
|
|
|
|
|
list_1000.append(i)
|
|
|
|
|
|
|
|
if df_city[i].describe()['mean'] < 1000 and df_city[i].describe()['mean'] >= 100:
|
|
|
|
|
|
|
|
list_100.append(i)
|
|
|
|
|
|
|
|
if df_city[i].describe()['mean'] < 100 and df_city[i].describe()['mean'] >= 10:
|
|
|
|
|
|
|
|
list_10.append(i)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
list_1.append(i)
|
|
|
|
|
|
|
|
print('list_1:',list_1)
|
|
|
|
|
|
|
|
print('list_10:',list_10)
|
|
|
|
|
|
|
|
print('list_100:',list_100)
|
|
|
|
|
|
|
|
print('list_1000:',list_1000)
|
|
|
|
|
|
|
|