pytorch/浙江行业电量/test1.py

import os

import numpy as np
import pandas as pd
n1 = np.array([[1,1,1]])
n2 = np.array([1,1,1]).reshape(1,-1)
print(n2)
n2 = np.array([]).reshape(3,-1)

print(np.max([[1,2,3],[4,5,6]]))

file_dir = r'../浙江电压等级电量/浙江各地市分电压日电量数据'
excel = os.listdir(file_dir)[0]
df = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')
df.columns = df.columns.map(lambda x:x.strip())
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df = df.loc['2021-01':'2023-08'][:-3]
print(df.tail())
# for city in df['地市'].drop_duplicates():
#     df_city = df[df['地市']== city]
#     df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip())
#     df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y-%m-%d')
#     df_city = df_city[df_city.columns[:-1]]
#     df_city.sort_values(by='stat_date',ascending=True,inplace=True)
#     df_city['stat_date'] = df_city['stat_date'].astype('str')
#     df_city.to_excel(fr'C:\Users\user\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False)
# file_Dir = r'C:\Users\鸽子\Desktop\浙江各地市行业电量数据'
# for excel in os.listdir(file_Dir):
#     df1 = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市日电量数据-27-28).xlsx',sheet_name=1)
#     df1.columns = df1.columns.map(lambda x:x.strip())
#     df2 = pd.read_excel(os.path.join(file_Dir,excel))
#     df2['地市'] = df2['地市'].map(lambda x:x.strip())
#     city = df2['地市'].iloc[0]
#     col_list = df2.columns
#     df1 = df1[col_list]
#     df1 = df1[(df1['stat_date']==20231028)&(df1['地市']==city)]
#     df1['stat_date'] = pd.to_datetime(df1['stat_date'],format='%Y%m%d')
#     df2 = pd.concat((df2,df1),ignore_index=True)
#     df2.to_excel(fr'C:\Users\鸽子\Desktop\浙江各地市行业电量数据\{city}.xlsx')
pd.set_option('display.width',None)
def normal(df):
    drop_col = [x for x in df.columns if len(df[df[x]==0])/len(df) >= 0.5]
    df.drop(columns=drop_col,inplace=True)
    for col in df.columns:
        try:
            high = df[col].describe()['75%'] + 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])
            low = df[col].describe()['25%'] - 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])
            df[col] = df[col].map(lambda x: np.nan if (x >= high) | (x <= low) else x)
            df[col] = df[col].fillna(method='ffill')
            df[col] = df[col].fillna(method='bfill')
        except:
            pass
    return df

# file_dir = './浙江各地市行业电量数据'
# city1 = os.listdir(file_dir)[0]
# df_city = pd.read_excel(os.path.join(file_dir, city1))
# df_city = normal(df_city)
# df_city = df_city.drop(columns='地市')
# df_city[df_city.columns[1:]] /= 10000
# df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])
# df_city.stat_date = pd.to_datetime(df_city.stat_date)
# print(df_city.describe())
#
# list_1000 = []
# list_100 = []
# list_10 = []
# list_1 = []
# for i in df_city.columns[1:]:
#     if df_city[i].describe()['mean']>=1000:
#         list_1000.append(i)
#     if df_city[i].describe()['mean'] < 1000 and df_city[i].describe()['mean']  >= 100:
#         list_100.append(i)
#     if df_city[i].describe()['mean'] < 100 and df_city[i].describe()['mean']  >= 10:
#         list_10.append(i)
#     else:
#         list_1.append(i)
# print('list_1:',list_1)
# print('list_10:',list_10)
# print('list_100:',list_100)
# print('list_1000:',list_1000)
import pandas as pd

# 创建一个简单的DataFrame
data = pd.DataFrame({'A': [1, 2, 3000, 4, 500],
        'B': [10, 20, 30, 40, 50]})


Q1 = data['A'].quantile(0.25)
Q3 = data['A'].quantile(0.75)
IQR = Q3 - Q1

lower_threshold = Q1 - 1.5 * IQR
upper_threshold = Q3 + 1.5 * IQR
# 向下移动一行
outliers = (data['A'] < lower_threshold) | (data['A'] > upper_threshold)
print(outliers)
print( data['A'].shift(1))
# 替换异常值为临近一个值

data = {'A': [1, 2, 3, 4, 5],
        'B': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# 将满足条件的元素替换为新值
condition = df['A'] > 3
df_new = df.where(condition, other=-1)

print("原始数据:")
print(df)

print("\n根据条件替换后的数据:")
print(df_new)
输出预测结果 1 year ago			`import os`
更新电压lstm模型 1 year ago
电压等级输出为5lstm 1 year ago			`import numpy as np`
更新入模数据 1 year ago			`import pandas as pd`
电压等级输出为5lstm 1 year ago			`n1 = np.array([[1,1,1]])`
			`n2 = np.array([1,1,1]).reshape(1,-1)`
			`print(n2)`
			`n2 = np.array([]).reshape(3,-1)`

更新入模数据 1 year ago			`print(np.max([[1,2,3],[4,5,6]]))`

删除重复数据集 1 year ago			`file_dir = r'../浙江电压等级电量/浙江各地市分电压日电量数据'`
更新电压lstm模型 1 year ago			`excel = os.listdir(file_dir)[0]`
			`df = pd.read_excel(os.path.join(file_dir, excel), sheet_name=0, index_col='stat_date')`
			`df.columns = df.columns.map(lambda x:x.strip())`
			`df.index = pd.to_datetime(df.index)`
			`df.sort_index(inplace=True)`
			`df = df.loc['2021-01':'2023-08'][:-3]`
			`print(df.tail())`
输出预测结果 1 year ago			`# for city in df['地市'].drop_duplicates():`
			`# df_city = df[df['地市']== city]`
			`# df_city['stat_date'] = df_city['stat_date'].map(lambda x:x.strip())`
			`# df_city['stat_date'] = pd.to_datetime(df_city['stat_date'],format='%Y-%m-%d')`
			`# df_city = df_city[df_city.columns[:-1]]`
			`# df_city.sort_values(by='stat_date',ascending=True,inplace=True)`
			`# df_city['stat_date'] = df_city['stat_date'].astype('str')`
			`# df_city.to_excel(fr'C:\Users\user\Desktop\浙江各地市分电压日电量数据\{city}.xlsx',index=False)`
更新电压lstm模型 1 year ago			`# file_Dir = r'C:\Users\鸽子\Desktop\浙江各地市行业电量数据'`
			`# for excel in os.listdir(file_Dir):`
			`# df1 = pd.read_excel(r'C:\Users\鸽子\Desktop\浙江各地市日电量数据-27-28).xlsx',sheet_name=1)`
			`# df1.columns = df1.columns.map(lambda x:x.strip())`
			`# df2 = pd.read_excel(os.path.join(file_Dir,excel))`
			`# df2['地市'] = df2['地市'].map(lambda x:x.strip())`
			`# city = df2['地市'].iloc[0]`
			`# col_list = df2.columns`
			`# df1 = df1[col_list]`
			`# df1 = df1[(df1['stat_date']==20231028)&(df1['地市']==city)]`
			`# df1['stat_date'] = pd.to_datetime(df1['stat_date'],format='%Y%m%d')`
			`# df2 = pd.concat((df2,df1),ignore_index=True)`
			`# df2.to_excel(fr'C:\Users\鸽子\Desktop\浙江各地市行业电量数据\{city}.xlsx')`
删除重复数据集 1 year ago			`pd.set_option('display.width',None)`
			`def normal(df):`
			`drop_col = [x for x in df.columns if len(df[df[x]==0])/len(df) >= 0.5]`
			`df.drop(columns=drop_col,inplace=True)`
			`for col in df.columns:`
			`try:`
			`high = df[col].describe()['75%'] + 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])`
			`low = df[col].describe()['25%'] - 1.5 * (df[col].describe()['75%'] - df[col].describe()['25%'])`
			`df[col] = df[col].map(lambda x: np.nan if (x >= high) \| (x <= low) else x)`
			`df[col] = df[col].fillna(method='ffill')`
			`df[col] = df[col].fillna(method='bfill')`
			`except:`
			`pass`
			`return df`
输出预测结果 1 year ago
输出预测结果 1 year ago			`# file_dir = './浙江各地市行业电量数据'`
			`# city1 = os.listdir(file_dir)[0]`
			`# df_city = pd.read_excel(os.path.join(file_dir, city1))`
			`# df_city = normal(df_city)`
			`# df_city = df_city.drop(columns='地市')`
			`# df_city[df_city.columns[1:]] /= 10000`
			`# df_city['stat_date'] = df_city['stat_date'].map(lambda x: str(x).strip()[:10])`
			`# df_city.stat_date = pd.to_datetime(df_city.stat_date)`
			`# print(df_city.describe())`
			`#`
			`# list_1000 = []`
			`# list_100 = []`
			`# list_10 = []`
			`# list_1 = []`
			`# for i in df_city.columns[1:]:`
			`# if df_city[i].describe()['mean']>=1000:`
			`# list_1000.append(i)`
			`# if df_city[i].describe()['mean'] < 1000 and df_city[i].describe()['mean'] >= 100:`
			`# list_100.append(i)`
			`# if df_city[i].describe()['mean'] < 100 and df_city[i].describe()['mean'] >= 10:`
			`# list_10.append(i)`
			`# else:`
			`# list_1.append(i)`
			`# print('list_1:',list_1)`
			`# print('list_10:',list_10)`
			`# print('list_100:',list_100)`
			`# print('list_1000:',list_1000)`
			`import pandas as pd`

			`# 创建一个简单的DataFrame`
			`data = pd.DataFrame({'A': [1, 2, 3000, 4, 500],`
			`'B': [10, 20, 30, 40, 50]})`


			`Q1 = data['A'].quantile(0.25)`
			`Q3 = data['A'].quantile(0.75)`
			`IQR = Q3 - Q1`

			`lower_threshold = Q1 - 1.5 * IQR`
			`upper_threshold = Q3 + 1.5 * IQR`
			`# 向下移动一行`
			`outliers = (data['A'] < lower_threshold) \| (data['A'] > upper_threshold)`
			`print(outliers)`
			`print( data['A'].shift(1))`
			`# 替换异常值为临近一个值`

			`data = {'A': [1, 2, 3, 4, 5],`
			`'B': [10, 20, 30, 40, 50]}`
			`df = pd.DataFrame(data)`

			`# 将满足条件的元素替换为新值`
			`condition = df['A'] > 3`
			`df_new = df.where(condition, other=-1)`

			`print("原始数据:")`
			`print(df)`
删除重复数据集 1 year ago
输出预测结果 1 year ago			`print("\n根据条件替换后的数据:")`
			`print(df_new)`