init repository
This commit is contained in:
commit
cfac5fd675
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
dataset
|
||||||
|
rawdata
|
||||||
|
labels
|
||||||
|
logs
|
||||||
|
|
5
CHANGELOG.md
Normal file
5
CHANGELOG.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Change Log
|
||||||
|
|
||||||
|
## v0.1.0.20240910_alpha
|
||||||
|
|
||||||
|
- 跑通
|
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
源代码文件在`src`文件夹下。
|
||||||
|
|
||||||
|
```
|
||||||
|
cd src
|
||||||
|
python main_train.py
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
pip install hydra-core "ray[tune]" torchinfo pip install h5py tensorboard matplotlib openpyxl
|
21
configs/hydra/default.yaml
Normal file
21
configs/hydra/default.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# https://hydra.cc/docs/configure_hydra/intro/
|
||||||
|
|
||||||
|
# enable color logging
|
||||||
|
# defaults:
|
||||||
|
# - override hydra_logging: colorlog
|
||||||
|
# - override job_logging: colorlog
|
||||||
|
|
||||||
|
# output directory, generated dynamically on each run
|
||||||
|
run:
|
||||||
|
dir: ../logs/${task_name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
|
||||||
|
# sweep:
|
||||||
|
# dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
|
||||||
|
# subdir: ${hydra.job.num}
|
||||||
|
|
||||||
|
job_logging:
|
||||||
|
handlers:
|
||||||
|
file:
|
||||||
|
# Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
|
||||||
|
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
||||||
|
|
||||||
|
verbose: [__main__,hydra]
|
20
configs/main_train.yaml
Normal file
20
configs/main_train.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
- hydra: default
|
||||||
|
- ray_tune: default
|
||||||
|
|
||||||
|
task_name: main
|
||||||
|
|
||||||
|
raw_spectral_data_dir: /code/admin/20240806-NanEr-5-8-data/rawdata
|
||||||
|
raw_labels_dir: /code/admin/20240806-NanEr-5-8-data/labels/NanEr
|
||||||
|
dataset_dir: /code/admin/20240806-NanEr-5-8-data/dataset
|
||||||
|
labels_name: ["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"]
|
||||||
|
#是
|
||||||
|
|
||||||
|
|
||||||
|
dataset:
|
||||||
|
train_ratio: 0.8
|
||||||
|
validate_ratio: 0.1
|
||||||
|
num_worker: 128
|
||||||
|
train:
|
||||||
|
max_epoch: 10000
|
51
configs/ray_tune/default.yaml
Normal file
51
configs/ray_tune/default.yaml
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
run:
|
||||||
|
num_samples: 1
|
||||||
|
resources_per_trial:
|
||||||
|
cpu: 128
|
||||||
|
gpu: 1
|
||||||
|
scheduler:
|
||||||
|
_target_: ray.tune.schedulers.ASHAScheduler
|
||||||
|
metric: val_loss
|
||||||
|
mode: min
|
||||||
|
max_t: 20000
|
||||||
|
grace_period: 1
|
||||||
|
reduction_factor: 2
|
||||||
|
config:
|
||||||
|
choose_frame_spatial:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- _target_: data.choose_frame_spatial.mean.ChooseFrameSpatial
|
||||||
|
interval: [-3,0]
|
||||||
|
|
||||||
|
|
||||||
|
features_scaling:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- _target_: data.features_scaling.max_min.FeatureScaling
|
||||||
|
|
||||||
|
|
||||||
|
labels_scaling:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- _target_: data.labels_scaling.max_min.LabelScaling
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
# - _target_: model.FCNN.FCNN
|
||||||
|
# - _target_: model.CNN_LSTM_FCNN.CNN_LSTM_FCNN
|
||||||
|
- _target_: model.CNN1D_FCNN.CNN1D_FCNN
|
||||||
|
criterion:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- _target_: torch.nn.MSELoss
|
||||||
|
optimizer:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- _target_: torch.optim.Adam
|
||||||
|
_partial_: true
|
||||||
|
lr: 0.0001
|
||||||
|
batch_size:
|
||||||
|
_target_: ray.tune.grid_search
|
||||||
|
values:
|
||||||
|
- 128
|
BIN
src/data/__pycache__/pre_process.cpython-312.pyc
Normal file
BIN
src/data/__pycache__/pre_process.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/data/__pycache__/spectral_dataset.cpython-312.pyc
Normal file
BIN
src/data/__pycache__/spectral_dataset.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/data/choose_frame_spatial/__pycache__/mean.cpython-312.pyc
Normal file
BIN
src/data/choose_frame_spatial/__pycache__/mean.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
100
src/data/choose_frame_spatial/mean.py
Normal file
100
src/data/choose_frame_spatial/mean.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
'''
|
||||||
|
@File : mean.py
|
||||||
|
@Time : 2024/08/12 13:53:36
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
class ChooseFrameSpatial:
|
||||||
|
def __init__(self, interval=[-3,0]):
|
||||||
|
"""注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数
|
||||||
|
|
||||||
|
Args:
|
||||||
|
interval (list, optional): 此方法依赖的光谱时间范围,默认[-30,30],即获取前TSC开始时刻前30秒到TSC结束时刻的光谱数据作为此算法输入.
|
||||||
|
"""
|
||||||
|
self.description=f"{str(self.__class__).split(".")[2]}_{interval[0]}_{interval[1]}"
|
||||||
|
# print(self.description)
|
||||||
|
self.interval=interval
|
||||||
|
|
||||||
|
def get_data_interval(self, start_time, end_time ):
|
||||||
|
|
||||||
|
return start_time+datetime.timedelta(seconds=self.interval[0]),start_time+datetime.timedelta(seconds=self.interval[1])
|
||||||
|
|
||||||
|
def run(self,timestamps,rawdata):
|
||||||
|
############# 空间数据压缩 ######################
|
||||||
|
space_intensity_mean = np.mean(np.sum(rawdata, axis=1), axis=0)
|
||||||
|
|
||||||
|
space_num= 10 #选取方差最小的点的个数
|
||||||
|
|
||||||
|
light_indices = np.where( (space_intensity_mean > (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean)))
|
||||||
|
& (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean)))
|
||||||
|
)
|
||||||
|
|
||||||
|
if light_indices[0].shape[0] < space_num:
|
||||||
|
print('No Enough Usable Space Point Found!')
|
||||||
|
return None
|
||||||
|
|
||||||
|
intensity_data = rawdata[:,:,light_indices[0]]
|
||||||
|
|
||||||
|
spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0)
|
||||||
|
top_10_indices = np.argsort(spatial_variance)[:space_num]
|
||||||
|
space_selected_data = intensity_data[:, :, top_10_indices]
|
||||||
|
|
||||||
|
############# 时间数据压缩 ######################
|
||||||
|
# framerate = 24
|
||||||
|
# timewindow = 2
|
||||||
|
|
||||||
|
# time_data_intensity = np.sum(np.mean(space_selected_data,axis=2),axis=1)
|
||||||
|
|
||||||
|
# min_var = float('inf')
|
||||||
|
# min_index = 0
|
||||||
|
|
||||||
|
# for i in range(len(time_data_intensity)-framerate*timewindow-1):
|
||||||
|
# window_var = np.var(time_data_intensity[i:i+framerate*timewindow])
|
||||||
|
# if window_var < min_var:
|
||||||
|
# min_var = window_var
|
||||||
|
# min_index = i
|
||||||
|
|
||||||
|
# selected_data = space_selected_data[min_index:min_index+framerate*timewindow,:,:]
|
||||||
|
|
||||||
|
selected_data=np.mean(np.mean(space_selected_data,axis=0),axis=1)
|
||||||
|
# print("timewindow_begin=",min_index)
|
||||||
|
|
||||||
|
# if (result_dir is not None) and (filenum is not None) :
|
||||||
|
|
||||||
|
# for i in range (selected_data.shape[2]):
|
||||||
|
|
||||||
|
# Z=selected_data[:,:,i]
|
||||||
|
# x=np.linspace(400,1000,Z.shape[1])
|
||||||
|
# y=selected_data[:,1,i]
|
||||||
|
|
||||||
|
# x, y = np.meshgrid(x, y)
|
||||||
|
|
||||||
|
# fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
|
||||||
|
|
||||||
|
# surf = ax.plot_surface(x, y, Z, cmap=cm.Blues,
|
||||||
|
# linewidth=0, antialiased=False)
|
||||||
|
|
||||||
|
# ax.view_init(elev=30, azim=-60) # 更改视角
|
||||||
|
|
||||||
|
# ax.set_zlim(0, 4095)
|
||||||
|
# ax.set_zlabel("Light Intensity")
|
||||||
|
# ax.set_xlabel("Spectral Band(nm)")
|
||||||
|
# ax.set_ylabel("Time (Serial Number)")
|
||||||
|
# plt.savefig(f"{result_dir}{ filenum} file {i}-th spatial_point_line.png")
|
||||||
|
# plt.close()
|
||||||
|
|
||||||
|
return selected_data
|
||||||
|
|
||||||
|
if __name__ =="__main__":
|
||||||
|
timpestamp=np.zeros((600,))
|
||||||
|
input_data=np.random.random((600,224,512))*4095
|
||||||
|
tmp=ChooseFrameSpatial()
|
||||||
|
output=tmp.run(timpestamp,input_data)
|
||||||
|
print(f"输入数据维度:{input_data.shape}\n输出数据维度:{output.shape}")
|
132
src/data/choose_frame_spatial/mean_var_weight_reduction.py
Normal file
132
src/data/choose_frame_spatial/mean_var_weight_reduction.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
'''
|
||||||
|
@File : mean.py
|
||||||
|
@Time : 2024/08/12 13:53:36
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
class ChooseFrameSpatial:
|
||||||
|
def __init__(self, interval=[-30,30],overexposion_threshold = 1,weight_mean_var = 0.5,space_num= 10, time_num = 9,framerate = 4,timewindow_time = 1,Type=6):
|
||||||
|
"""注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数
|
||||||
|
|
||||||
|
Args:
|
||||||
|
interval (list, optional): 此方法依赖的光谱时间范围,默认[-30,30],即获取前TSC开始时刻前30秒到TSC结束时刻的光谱数据作为此算法输入.
|
||||||
|
"""
|
||||||
|
# print(str(self.__class__).split("."))
|
||||||
|
self.description=f"{str(self.__class__).split(".")[-2]}_{interval[0]}_{interval[1]}"
|
||||||
|
|
||||||
|
# print(self.description)
|
||||||
|
|
||||||
|
self.overexposion_threshold = overexposion_threshold #过曝阈值:有多少时间点过曝时,判定为这个空间点过曝
|
||||||
|
self.weight_mean_var =weight_mean_var #强度/方差选取权重,默认平均光强权重为1,此权重为方差权重,取值为[0,∞)
|
||||||
|
self.space_num= space_num #选取空间点数量
|
||||||
|
self.time_num = time_num #选取时间点数量
|
||||||
|
self.framerate = framerate #采样帧率
|
||||||
|
self.timewindow = framerate * timewindow_time #选取时间窗口为1s
|
||||||
|
|
||||||
|
|
||||||
|
self.interval=interval
|
||||||
|
self.Type=Type
|
||||||
|
|
||||||
|
def get_data_interval(self, start_time, end_time ):
|
||||||
|
|
||||||
|
return start_time+datetime.timedelta(seconds=self.interval[0]),end_time+datetime.timedelta(seconds=self.interval[1])
|
||||||
|
|
||||||
|
def Overexposed_spatial_point_detection(self,inputdata, timethreshold):
|
||||||
|
#判别一个空间点是否过曝,inputdata是(time,wavelength)二维数组,timethreshold是时间阈值,有多少个时间点过曝则认为这个点过曝
|
||||||
|
# 如果过曝返回False,不过曝返回True
|
||||||
|
|
||||||
|
row_max_values= np.max(inputdata, axis=1)
|
||||||
|
overexposed_rows = np.sum(row_max_values >= 4095)
|
||||||
|
if overexposed_rows > timethreshold:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
def run(self,timestamps,rawdata):
|
||||||
|
# 降维方法:空间上选取强度达到一定的阈值且方差最小的点,时间上把整个时间段划分并选取各段权重最高的1s进行平均,Type=1取前1/3,Type=2取中部1/3,Type=3取最后1/3,Type=4取全部
|
||||||
|
|
||||||
|
|
||||||
|
############# 空间过曝点去除 ######################
|
||||||
|
unoverposed_indices = [i for i in range(rawdata.shape[2]) if self.Overexposed_spatial_point_detection(rawdata[:, :, i],self.overexposion_threshold)]
|
||||||
|
rawdata = rawdata[:,:,unoverposed_indices]
|
||||||
|
|
||||||
|
############# 空间数据压缩 ######################
|
||||||
|
space_intensity_mean = np.mean(np.sum(rawdata, axis=1), axis=0)
|
||||||
|
space_intensity_var = np.var(np.sum(rawdata, axis=1), axis=0)
|
||||||
|
combined_score = (space_intensity_mean - np.min(space_intensity_mean)) / (np.max(space_intensity_mean) - np.min(space_intensity_mean)) \
|
||||||
|
- self.weight_mean_var * (space_intensity_var - np.min(space_intensity_var)) / (np.max(space_intensity_var) - np.min(space_intensity_var))
|
||||||
|
|
||||||
|
sorted_indices = np.argsort(combined_score)[::-1]
|
||||||
|
space_selected_data = rawdata[:,:,sorted_indices[:self.space_num]]
|
||||||
|
|
||||||
|
############# 时间数据压缩 ######################
|
||||||
|
space_selected_data_intensity = np.sum(space_selected_data,axis=1)
|
||||||
|
|
||||||
|
#按照时间把强度数据拆分成num_time份,每一份维度为(总时间点数/time_num, space_num)
|
||||||
|
time_length = space_selected_data_intensity.shape[0]
|
||||||
|
chunk_size = time_length // self.time_num
|
||||||
|
remainder = time_length % self.time_num
|
||||||
|
start_index = 0
|
||||||
|
|
||||||
|
time_selected_data =None
|
||||||
|
|
||||||
|
for i in range(self.time_num):
|
||||||
|
end_index = start_index + chunk_size + (1 if i < remainder else 0)
|
||||||
|
chunk = space_selected_data_intensity[start_index:end_index, :]
|
||||||
|
|
||||||
|
window_var = np.empty(0)
|
||||||
|
window_mean = np.empty(0)
|
||||||
|
|
||||||
|
for j in range(len(chunk)-self.timewindow-1):
|
||||||
|
window_var = np.concatenate((window_var, np.expand_dims( np.sum(np.var(chunk[j:j+self.timewindow,:], axis=0)),0)), axis=0)
|
||||||
|
window_mean = np.concatenate((window_mean, np.expand_dims( np.sum(np.mean(chunk[j:j+self.timewindow,:], axis=0)),0)), axis=0)
|
||||||
|
|
||||||
|
combined_score_time = (window_mean - np.min(window_mean)) / (np.max(window_mean) - np.min(window_mean)) \
|
||||||
|
- self.weight_mean_var * (window_var - np.min(window_var)) / (np.max(window_var) - np.min(window_var))
|
||||||
|
sorted_indices = np.argsort(combined_score_time)[::-1]
|
||||||
|
|
||||||
|
time_window_data = np.mean(space_selected_data[start_index+sorted_indices[0]:start_index+sorted_indices[0]+self.timewindow, :, :],axis=0)
|
||||||
|
# print(time_selected_data.shape,time_window_data.shape,np.expand_dims( time_window_data,0).shape)
|
||||||
|
if time_selected_data is None:
|
||||||
|
|
||||||
|
time_selected_data=np.expand_dims( time_window_data,0)
|
||||||
|
else:
|
||||||
|
time_selected_data = np.concatenate((time_selected_data, np.expand_dims( time_window_data,0)), axis=0)
|
||||||
|
|
||||||
|
start_index = end_index
|
||||||
|
|
||||||
|
if self.Type == 1:
|
||||||
|
print("time_selected_data[前1/3].shape: ", time_selected_data[:self.time_num//3,:,:].shape)
|
||||||
|
return time_selected_data[:self.time_num//3,:,:]
|
||||||
|
elif self.Type == 2:
|
||||||
|
print("time_selected_data[中1/3].shape: ", time_selected_data[self.time_num//3:2*self.time_num//3,:,:].shape)
|
||||||
|
return time_selected_data[self.time_num//3:2*self.time_num//3,:,:]
|
||||||
|
elif self.Type == 3:
|
||||||
|
print("time_selected_data[后1/3].shape: ", time_selected_data[2*self.time_num//3:,:,:].shape)
|
||||||
|
return time_selected_data[2*self.time_num//3:,:,:]
|
||||||
|
elif self.Type == 4:
|
||||||
|
print("time_selected_data[前2/3].shape: ", time_selected_data[:2*self.time_num//3,:,:].shape)
|
||||||
|
return time_selected_data[:2*self.time_num//3,:,:]
|
||||||
|
elif self.Type == 5:
|
||||||
|
print("time_selected_data[后2/3].shape: ", time_selected_data[self.time_num//3:,:,:].shape)
|
||||||
|
return time_selected_data[self.time_num//3:,:,:]
|
||||||
|
elif self.Type == 6:
|
||||||
|
print("time_selected_data.shape: ", time_selected_data.shape)
|
||||||
|
return time_selected_data
|
||||||
|
else:
|
||||||
|
print("Type is not 1, 2, 3, 4, 5 or 6 !")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if __name__ =="__main__":
|
||||||
|
timpestamp=np.zeros((600,))
|
||||||
|
input_data=np.random.random((600,224,512))*4095
|
||||||
|
tmp=ChooseFrameSpatial()
|
||||||
|
output=tmp.run(timpestamp,input_data)
|
||||||
|
print(f"输入数据维度:{input_data.shape}\n输出数据维度:{output.shape}")
|
BIN
src/data/features_scaling/__pycache__/max_min.cpython-312.pyc
Normal file
BIN
src/data/features_scaling/__pycache__/max_min.cpython-312.pyc
Normal file
Binary file not shown.
30
src/data/features_scaling/max_min.py
Normal file
30
src/data/features_scaling/max_min.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
'''
|
||||||
|
@File : max_min.py
|
||||||
|
@Time : 2024/08/12 14:02:59
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 对输入特征进行标准化放缩
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
class FeatureScaling:
|
||||||
|
def __init__(self):
|
||||||
|
self.description=f"{str(self.__class__).split(".")[-2]}"
|
||||||
|
self.flag_get_global_info=True
|
||||||
|
self._min=999999999
|
||||||
|
self._max=0
|
||||||
|
def get_global_info(self,feature):
|
||||||
|
tmp_max=np.max(feature)
|
||||||
|
tmp_min=np.min(feature)
|
||||||
|
if tmp_max>self._max:
|
||||||
|
self._max=tmp_max
|
||||||
|
if tmp_min<self._min:
|
||||||
|
self._min=tmp_min
|
||||||
|
|
||||||
|
def run( self,feature):
|
||||||
|
|
||||||
|
feature=(feature-self._min)/(self._max-self._min)
|
||||||
|
return feature
|
BIN
src/data/labels_scaling/__pycache__/max_min.cpython-312.pyc
Normal file
BIN
src/data/labels_scaling/__pycache__/max_min.cpython-312.pyc
Normal file
Binary file not shown.
45
src/data/labels_scaling/max_min.py
Normal file
45
src/data/labels_scaling/max_min.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
'''
|
||||||
|
@File : max_min.py
|
||||||
|
@Time : 2024/08/12 14:03:38
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 对输出标签进行标准化放缩
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
class LabelScaling:
|
||||||
|
def __init__(self):
|
||||||
|
self.description=f"{str(self.__class__).split(".")[-2]}"
|
||||||
|
self.flag_get_global_info=True
|
||||||
|
self._min=None
|
||||||
|
self._max=None
|
||||||
|
|
||||||
|
|
||||||
|
def get_global_info(self,label):
|
||||||
|
label_dim=label.shape[0]
|
||||||
|
if self._max is None:
|
||||||
|
self._min=np.zeros((label_dim))+999999999
|
||||||
|
self._max=np.zeros((label_dim))
|
||||||
|
|
||||||
|
for i in range(label_dim):
|
||||||
|
# tmp_max=np.max(label)
|
||||||
|
# tmp_min=np.min(label)
|
||||||
|
if label[i]>self._max[i]:
|
||||||
|
self._max[i]=label[i]
|
||||||
|
if label[i]<self._min[i]:
|
||||||
|
self._min[i]=label[i]
|
||||||
|
|
||||||
|
def run(self,label):
|
||||||
|
for i in range(label.shape[0]):
|
||||||
|
label[i]=(label[i]-self._min[i])/(self._max[i]-self._min[i])
|
||||||
|
return label
|
||||||
|
def reverse(self,labels):
|
||||||
|
for i in range(labels.shape[1]):
|
||||||
|
labels[:,i]=labels[:,i]*(self._max[i]-self._min[i])+self._min[i]
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
|
373
src/data/pre_process.py
Normal file
373
src/data/pre_process.py
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
'''
|
||||||
|
@File : pre_process.py
|
||||||
|
@Time : 2024/08/09 13:54:28
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 进行数据预处理
|
||||||
|
'''
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import logging
|
||||||
|
import os,glob
|
||||||
|
import numpy as np
|
||||||
|
import datetime
|
||||||
|
import pickle
|
||||||
|
import h5py
|
||||||
|
|
||||||
|
class DataPreProcess:
|
||||||
|
"""
|
||||||
|
包含所有数据预处理操作,包括
|
||||||
|
1. 从原始二进制数据,以钢厂 提供的Excel文件中提取出目标时刻附近的光谱
|
||||||
|
2. 从目标时刻附近的光谱选择合适、稳定的时间及空间点的光谱,作为feature
|
||||||
|
3. 将feature 与 Label进行放缩
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, raw_spectral_data_dir, raw_labels_dir,labels_name,dataset_dir,choose_frame_spatial, features_scaling,labels_scaling,train_ratio=0.8,validate_ratio=0.1) -> None:
|
||||||
|
"""初始化类,传入所有数据预处理需要的参数
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_spectral_data_dir (_type_):原始光谱数据路径
|
||||||
|
raw_labels_dir (_type_): 原始标签路径
|
||||||
|
labels_name (_type_): 提取出的标签名
|
||||||
|
dataset_dir (_type_): 生成的数据集文件夹
|
||||||
|
choose_frame_spatial (_type_): 类对象,进行光谱特征挑选
|
||||||
|
features_scaling (_type_): 类对象,对输入的特征进行放缩,使之直接输入神经网络
|
||||||
|
labels_scaling (_type_): 类对象,对输出的的标签进行放缩
|
||||||
|
train_ratio (float, optional): 训练集比例. Defaults to 0.8.
|
||||||
|
validate_ratio (float, optional): 验证集比例,剩余的即为测试集. Defaults to 0.1.
|
||||||
|
"""
|
||||||
|
self.raw_spectral_data_dir=raw_spectral_data_dir
|
||||||
|
self.raw_labels_dir=raw_labels_dir
|
||||||
|
self.dataset_dir=dataset_dir
|
||||||
|
self.labels_name=labels_name
|
||||||
|
self.choose_frame_spatial=choose_frame_spatial
|
||||||
|
self.features_scaling=features_scaling
|
||||||
|
self.labels_scaling=labels_scaling
|
||||||
|
self.train_ratio=train_ratio
|
||||||
|
self.validate_ratio=validate_ratio
|
||||||
|
|
||||||
|
#加载原始的标签
|
||||||
|
self.raw_labels=self._load_raw_labels(raw_labels_dir,labels_name)
|
||||||
|
|
||||||
|
#加载原始光谱的缓存
|
||||||
|
self.raw_spectral_data_cache=self._load_raw_spectral_data_cache(raw_spectral_data_dir)
|
||||||
|
|
||||||
|
#随便加载一个csv文件,判断光谱数据的维度
|
||||||
|
self.spectral_dim,self.spatial_dim= self._get_spectral_spatial_dim(self.raw_spectral_data_cache)
|
||||||
|
|
||||||
|
#正式开始原始数据转化为数据集
|
||||||
|
self._raw_data_2_dataset()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _load_raw_labels(self,raw_labels_dir:str,labels_name:list)->pd.DataFrame:
|
||||||
|
"""读取利用脚本处理后的钢厂给的excel文件所在文件夹(会扫描所有文件)
|
||||||
|
并选择指定的label名作为output
|
||||||
|
并去除值为NaN或0的行
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_labels_dir (str): 利用脚本处理后的钢厂给的excel路径
|
||||||
|
labels_name (list): 指定的作为Label的列
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: 返回所有筛选后的炉次数据
|
||||||
|
"""
|
||||||
|
|
||||||
|
raw_labels=None
|
||||||
|
for name in os.listdir(raw_labels_dir):
|
||||||
|
tmp_raw_labels=pd.read_excel(os.path.join(raw_labels_dir,name))
|
||||||
|
|
||||||
|
choosed_column=["TSC_start_time","TSC_end_time"]
|
||||||
|
choosed_column=choosed_column+labels_name
|
||||||
|
|
||||||
|
|
||||||
|
#只选出我们想要的部分作为标签
|
||||||
|
tmp_raw_labels=tmp_raw_labels.loc[:,choosed_column]
|
||||||
|
|
||||||
|
# 选出有NULL的行
|
||||||
|
null_rows=tmp_raw_labels.isnull().any(axis=1)
|
||||||
|
# # 选出有0的行
|
||||||
|
zeros_rows=(tmp_raw_labels==0).any(axis=1) | (tmp_raw_labels=='0').any(axis=1)
|
||||||
|
|
||||||
|
# # 每一行但凡有NULL或者0都给删了
|
||||||
|
selected_rows=~(null_rows|zeros_rows)
|
||||||
|
tmp_raw_labels=tmp_raw_labels[selected_rows]
|
||||||
|
|
||||||
|
if raw_labels is None:
|
||||||
|
raw_labels=tmp_raw_labels
|
||||||
|
else:
|
||||||
|
raw_labels=pd.concat([raw_labels,tmp_raw_labels],axis=0)
|
||||||
|
logging.debug(f"Reading raw label excel file:{name}, which has {tmp_raw_labels.shape[0]} furnaces")
|
||||||
|
logging.debug(f"Readed raw label excel files, which has {raw_labels.shape[0]} furnaces in total")
|
||||||
|
|
||||||
|
return raw_labels
|
||||||
|
def _load_raw_spectral_data_cache(self,raw_spectral_data_dir:str)->list:
|
||||||
|
"""生成所有原始光谱数据文件的缓存,包括每个文件记录的开始及结束时间,目的为加快后面读取原始数据的速度
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_spectral_data_dir (str): 原始光谱所在路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: 缓存,其中有多少个成员,就有多少个原始数据文件,每个成员包括格式为datetime的开始及结束时间,及文件路径
|
||||||
|
"""
|
||||||
|
spectral_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.csv"))
|
||||||
|
cache_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.pkl"))
|
||||||
|
|
||||||
|
update_flag=False
|
||||||
|
|
||||||
|
|
||||||
|
#不存在缓存文件,以及缓存文件中数据文件个数与文件夹中的文件个数不一致,均重新生成
|
||||||
|
if len(cache_file_paths)==0:
|
||||||
|
logging.debug(f"Raw spectral data cache is not existed! Generating")
|
||||||
|
update_flag=True
|
||||||
|
elif len(cache_file_paths)==1:
|
||||||
|
with open(cache_file_paths[0],"rb") as f:
|
||||||
|
raw_spectral_data_cache=pickle.load(f)
|
||||||
|
if len(raw_spectral_data_cache) !=len(spectral_file_paths):
|
||||||
|
logging.debug(f"Raw spectral data cache is out of date! Regenerating, cache file number:{len(raw_spectral_data_cache)}, spectral data file number: {len(spectral_file_paths)}")
|
||||||
|
update_flag=True
|
||||||
|
else:
|
||||||
|
logging.error(f"More the one 'raw_spectral_data_cache.pkl' file is existed in {raw_spectral_data_dir}")
|
||||||
|
|
||||||
|
if update_flag:
|
||||||
|
spectral_file_paths.sort()
|
||||||
|
raw_spectral_data_cache=[]
|
||||||
|
for file in spectral_file_paths:
|
||||||
|
tmp_info={}
|
||||||
|
tmp_data=np.loadtxt(file, delimiter=",")
|
||||||
|
start_t=datetime.datetime.fromtimestamp(tmp_data[0,0]/1000)+datetime.timedelta(microseconds=tmp_data[0,0]%1000)
|
||||||
|
end_t=datetime.datetime.fromtimestamp(tmp_data[-1,0]/1000)+datetime.timedelta(microseconds=tmp_data[-1,0]%1000)
|
||||||
|
tmp_info["start_t"]=start_t
|
||||||
|
tmp_info["end_t"]=end_t
|
||||||
|
tmp_info["file_path"]=file
|
||||||
|
raw_spectral_data_cache.append(tmp_info)
|
||||||
|
|
||||||
|
with open(os.path.join(raw_spectral_data_dir,f"raw_spectral_data_cache.pkl"),"wb") as f:
|
||||||
|
pickle.dump(raw_spectral_data_cache,f)
|
||||||
|
|
||||||
|
return raw_spectral_data_cache
|
||||||
|
def _get_spectral_spatial_dim(self,raw_spectral_data_cache):
|
||||||
|
data=np.loadtxt(raw_spectral_data_cache[0]["file_path"], delimiter=",").astype(np.uint64)
|
||||||
|
if data[0,2]==229376:
|
||||||
|
spectral_dim =224
|
||||||
|
spatial_dim=512
|
||||||
|
if data[0,2]==917504:
|
||||||
|
spectral_dim =448
|
||||||
|
spatial_dim=1024
|
||||||
|
return spectral_dim, spatial_dim
|
||||||
|
|
||||||
|
def _read_spectral_data(self,start_time:datetime.datetime,end_time:datetime.datetime)->np.ndarray:
|
||||||
|
"""获取从start_time到end_time的光谱数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_time (datetime.datetime): 开始时间
|
||||||
|
end_time (datetime.datetime): 结束时间
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.ndarray: 原始光谱数据
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_spectral_data_per_file(file_path,s_t,e_t):
|
||||||
|
data=np.loadtxt(file_path, delimiter=",").astype(np.uint64)
|
||||||
|
|
||||||
|
if s_t is not None:
|
||||||
|
tmp_s=datetime.datetime.timestamp(s_t)*1000
|
||||||
|
tmp_s_index=0
|
||||||
|
for i in range(data.shape[0]-1):
|
||||||
|
if data[i,0]<=tmp_s and data[i+1,0]>=tmp_s:
|
||||||
|
tmp_s_index=i
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tmp_s_index=0
|
||||||
|
if e_t is not None:
|
||||||
|
tmp_e=datetime.datetime.timestamp(e_t)*1000
|
||||||
|
tmp_e_index=data.shape[0]
|
||||||
|
for i in range(tmp_s_index,data.shape[0]-1):
|
||||||
|
if data[i,0]<=tmp_e and data[i+1,0]>=tmp_e:
|
||||||
|
tmp_e_index=i
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tmp_e_index=data.shape[0]
|
||||||
|
|
||||||
|
with open(file_path[:-3]+"bin", "rb") as f:
|
||||||
|
f.seek(data[tmp_s_index,1])
|
||||||
|
d=f.read(np.uint64((tmp_e_index-tmp_s_index)*data[tmp_s_index,2]))
|
||||||
|
d=np.frombuffer(d, dtype=np.uint16).reshape(tmp_e_index-tmp_s_index,self.spectral_dim,self.spatial_dim)
|
||||||
|
return data[tmp_s_index:tmp_e_index,0],d
|
||||||
|
|
||||||
|
timestamps=None
|
||||||
|
raw_spectral_data=None
|
||||||
|
for tmp_info in self.raw_spectral_data_cache:
|
||||||
|
tmp_data=None
|
||||||
|
if start_time<tmp_info["start_t"] and end_time>tmp_info["end_t"] and end_time<tmp_info["end_t"]:
|
||||||
|
# 目标时间段在交于此文件时间段的左侧。所以取从文件开始到,end time的数据
|
||||||
|
tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,None,end_time)
|
||||||
|
|
||||||
|
elif start_time<tmp_info["start_t"] and end_time>tmp_info["end_t"]:
|
||||||
|
# 目标时间段完全包含此文件时间段。所以取从文件开始到结束的数据
|
||||||
|
tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,None,None)
|
||||||
|
elif start_time>tmp_info["start_t"] and end_time<tmp_info["end_t"]:
|
||||||
|
# 目标时间段完全被包含此文件时间段。所以取从start_time到end_time的数据
|
||||||
|
tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,start_time,end_time)
|
||||||
|
elif start_time>tmp_info["start_t"] and start_time<tmp_info["end_t"] and end_time>tmp_info["end_t"]:
|
||||||
|
# 目标时间段在交于此文件时间段的右侧。所以取从文件start_time到文件结束的数据
|
||||||
|
tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,start_time,None)
|
||||||
|
if tmp_data is not None:
|
||||||
|
if raw_spectral_data is None:
|
||||||
|
timestamps=tmp_time_stamp
|
||||||
|
raw_spectral_data=tmp_data
|
||||||
|
else:
|
||||||
|
timestamps=np.concatenate((timestamps,tmp_time_stamp),axis=0)
|
||||||
|
raw_spectral_data=np.concatenate((raw_spectral_data,tmp_data),axis=0)
|
||||||
|
return timestamps,raw_spectral_data
|
||||||
|
|
||||||
|
def _raw_data_2_dataset(self):
|
||||||
|
|
||||||
|
save_dir=os.path.join(self.dataset_dir,self.choose_frame_spatial.description)
|
||||||
|
|
||||||
|
#第一步,进行特征时间与空间点选取,并保存
|
||||||
|
|
||||||
|
pre_dataset_save_dir=os.path.join(save_dir,"data")
|
||||||
|
|
||||||
|
if not os.path.exists(pre_dataset_save_dir):
|
||||||
|
os.makedirs(pre_dataset_save_dir)
|
||||||
|
|
||||||
|
#数据路径不存在就重新生成,如果存在,就跳过这部分。
|
||||||
|
# !!!!!!!!!!因此需要额外注意,如果有新数据,需要把dataset下的文件夹都删除,相当于删除缓存,重新生成
|
||||||
|
for i in range(self.raw_labels.shape[0]):
|
||||||
|
|
||||||
|
start_time,end_time=self.choose_frame_spatial.get_data_interval(self.raw_labels.iloc[i]["TSC_start_time"],self.raw_labels.iloc[i]["TSC_end_time"])
|
||||||
|
timestamps,raw_spectral_data=self._read_spectral_data(start_time,end_time)
|
||||||
|
|
||||||
|
if raw_spectral_data is not None:
|
||||||
|
#获取到的数据帧率大于2才开始记录
|
||||||
|
if raw_spectral_data.shape[0]>2*(end_time-start_time).total_seconds():
|
||||||
|
logging.debug(f"PreProcess Stage 1: [{i+1}/{self.raw_labels.shape[0]}] with {timestamps.shape[0]} frames")
|
||||||
|
raw_spectral_data=self.choose_frame_spatial.run(timestamps,raw_spectral_data)
|
||||||
|
np.savez(os.path.join(pre_dataset_save_dir,f"{timestamps[0]}_{timestamps.shape[0]}.npz",),timestamps=timestamps,raw_spectral_data=raw_spectral_data,raw_labels=self.raw_labels.iloc[i][self.labels_name].to_numpy())
|
||||||
|
else:
|
||||||
|
logging.info(f"Pre Dataset is existed in {pre_dataset_save_dir}")
|
||||||
|
|
||||||
|
#第二步,进行标准化,并形成数据集
|
||||||
|
|
||||||
|
self.dataset_save_dir=os.path.join(save_dir,f"{self.features_scaling.description}_{self.labels_scaling.description}")
|
||||||
|
|
||||||
|
if not os.path.exists(self.dataset_save_dir):
|
||||||
|
os.makedirs(self.dataset_save_dir)
|
||||||
|
pre_dataset_files=os.listdir(pre_dataset_save_dir)
|
||||||
|
|
||||||
|
if self.features_scaling.flag_get_global_info:
|
||||||
|
for name in pre_dataset_files:
|
||||||
|
tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True)
|
||||||
|
self.features_scaling.get_global_info(tmp_data["raw_spectral_data"])
|
||||||
|
if self.labels_scaling.flag_get_global_info:
|
||||||
|
self.labels_scaling.get_global_info(tmp_data["raw_labels"])
|
||||||
|
|
||||||
|
# 获取维度信息
|
||||||
|
tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True)
|
||||||
|
feature_dim=tmp_data["raw_spectral_data"].shape
|
||||||
|
label_dim=tmp_data["raw_labels"].shape
|
||||||
|
|
||||||
|
|
||||||
|
#划分训练集_验证集_测试集,
|
||||||
|
np.random.shuffle(pre_dataset_files)
|
||||||
|
[train_dateset_files,validate_dateset_files]=np.split(pre_dataset_files,[int(len(pre_dataset_files)*self.train_ratio)])
|
||||||
|
[validate_dateset_files,test_dateset_files]=np.split(validate_dateset_files,[int(len(pre_dataset_files)*self.validate_ratio)])
|
||||||
|
|
||||||
|
|
||||||
|
#写入HDF5文件
|
||||||
|
for dataset_name in ["train","validate","test"]:
|
||||||
|
if dataset_name=="train":
|
||||||
|
file_names=train_dateset_files
|
||||||
|
elif dataset_name=="validate":
|
||||||
|
file_names=validate_dateset_files
|
||||||
|
elif dataset_name=="test":
|
||||||
|
file_names=test_dateset_files
|
||||||
|
|
||||||
|
logging.info(f"Generating {dataset_name} dataset with {len(file_names)} samples")
|
||||||
|
with h5py.File(os.path.join( self.dataset_save_dir,f"{dataset_name}.h5"), 'w') as f:
|
||||||
|
h5_features = f.create_dataset(
|
||||||
|
'features',
|
||||||
|
tuple([len(file_names)]+list(feature_dim)),
|
||||||
|
maxshape=(tuple([None]+list(feature_dim))),
|
||||||
|
chunks=tuple([1]+list(feature_dim)), # 手动设置 chunk 大小为一次数据的大小tuple([1]+list(feature_dim))
|
||||||
|
# compression='gzip',
|
||||||
|
# compression_opts=9,
|
||||||
|
dtype=np.float32,
|
||||||
|
)
|
||||||
|
h5_labels = f.create_dataset(
|
||||||
|
'labels',
|
||||||
|
tuple([len(file_names)]+list(label_dim)),
|
||||||
|
chunks=tuple([1]+list(label_dim)), # 手动设置 chunk 大小为一次数据的大小
|
||||||
|
dtype=np.float32,
|
||||||
|
)
|
||||||
|
for i,name in enumerate(file_names):
|
||||||
|
tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True)
|
||||||
|
|
||||||
|
feature=self.features_scaling.run(tmp_data["raw_spectral_data"])
|
||||||
|
label=self.labels_scaling.run(tmp_data["raw_labels"])
|
||||||
|
h5_features[i]=feature.astype(np.float32)
|
||||||
|
h5_labels[i]=label.astype(np.float32)
|
||||||
|
else:
|
||||||
|
pre_dataset_files=os.listdir(pre_dataset_save_dir)
|
||||||
|
|
||||||
|
if self.labels_scaling.flag_get_global_info:
|
||||||
|
for name in pre_dataset_files:
|
||||||
|
tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True)
|
||||||
|
self.labels_scaling.get_global_info(tmp_data["raw_labels"])
|
||||||
|
logging.info(f"Standardized Dataset is existed in {self.dataset_save_dir}")
|
||||||
|
|
||||||
|
def get_metric(self,outputs,labels):
|
||||||
|
outputs=self.labels_scaling.reverse(outputs)
|
||||||
|
labels=self.labels_scaling.reverse(labels)
|
||||||
|
error=outputs-labels
|
||||||
|
|
||||||
|
# print("outputs",outputs)
|
||||||
|
# print("labels",labels)
|
||||||
|
# print("errors",error)
|
||||||
|
hit_rate=np.zeros(error.shape[1])
|
||||||
|
for i,name in enumerate(self.labels_name):
|
||||||
|
# error[:,i]=outputs[:,i]-labels[:,i]
|
||||||
|
|
||||||
|
#["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"]
|
||||||
|
|
||||||
|
if name =="TSC_T":
|
||||||
|
bound=10
|
||||||
|
elif name=="TSC_C":
|
||||||
|
bound=0.05
|
||||||
|
elif name=="TSC_C_lab":
|
||||||
|
bound=0.05
|
||||||
|
elif name=="TSC_P_lab":
|
||||||
|
bound=0.0005
|
||||||
|
|
||||||
|
|
||||||
|
hit_rate[i]=((error[:,i]>=-bound) &(error[:,i]<=bound)).sum() /error.shape[0]
|
||||||
|
return error,hit_rate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
|
||||||
|
logging.basicConfig(level = logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
raw_data_dir="/code/admin/20240806-NanEr-5-8-data/rawdata"
|
||||||
|
labels_path="/code/admin/20240806-NanEr-5-8-data/labels/NanEr"
|
||||||
|
dataset_dir="/code/admin/20240806-NanEr-5-8-data/dataset"
|
||||||
|
labels_name=["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"]
|
||||||
|
|
||||||
|
|
||||||
|
from choose_frame_spatial.mean import ChooseFrameSpatial
|
||||||
|
from features_scaling.max_min import FeatureScaling
|
||||||
|
from labels_scaling.max_min import LabelScaling
|
||||||
|
choose_frame_spatial=ChooseFrameSpatial(interval=[-30,30])
|
||||||
|
features_scaling=FeatureScaling()
|
||||||
|
labels_scaling=LabelScaling()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
data_pre_process=DataPreProcess(raw_data_dir,labels_path,labels_name,dataset_dir,choose_frame_spatial,features_scaling,labels_scaling)
|
50
src/data/spectral_dataset.py
Normal file
50
src/data/spectral_dataset.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import torch
|
||||||
|
import h5py
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataset(dataset_dir):
|
||||||
|
train_dataset=SpectralDataset(os.path.join(dataset_dir,"train.h5"))
|
||||||
|
val_dataset=SpectralDataset(os.path.join(dataset_dir,"validate.h5"))
|
||||||
|
return train_dataset, val_dataset
|
||||||
|
|
||||||
|
class SpectralDataset(torch.utils.data.Dataset):
|
||||||
|
def __init__(self, hdf5_path):
|
||||||
|
self.hdf5_path=hdf5_path
|
||||||
|
self.hdf5_f = h5py.File(hdf5_path, 'r',
|
||||||
|
# rdcc_nbytes=1024**2*1024*10, # 10G 缓存总大小,单位字节。应设置为 chunk 的整数倍
|
||||||
|
# rdcc_w0=0, # 缓存淘汰策略。值越接近 0,优先淘汰最近最少使用的 chunk。值越接近 1,优先淘汰已完全读取或写入的块
|
||||||
|
# rdcc_nslots=1e8, # 定位缓存 chunk 位置的坐标长度。推荐为缓存 chunk 数的 10 倍,为达最佳性能需要 100 倍。默认为 521
|
||||||
|
)
|
||||||
|
self.features=self.hdf5_f["features"]
|
||||||
|
self.labels=self.hdf5_f["labels"]
|
||||||
|
def __len__(self):
|
||||||
|
return self.features.shape[0]
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
|
||||||
|
|
||||||
|
#构造Input
|
||||||
|
feature=self.features[idx]
|
||||||
|
|
||||||
|
label=self.labels[idx]
|
||||||
|
|
||||||
|
return feature, label
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ =="__main__":
|
||||||
|
|
||||||
|
|
||||||
|
training_data=SpectralDataset("/home/admin/20240806-NanEr-5-8-data/dataset/mean_-30_30/max_min_max_min/train.h5")
|
||||||
|
print("数据集大小为:",len(training_data))
|
||||||
|
print("输入Feature维度为:", training_data[0][0].shape, "输出Label维度为:",training_data[0][1].shape)
|
||||||
|
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
|
||||||
|
|
||||||
|
train_features, train_labels = next(iter(train_dataloader))
|
||||||
|
print(f"Feature batch shape: {train_features.size()}")
|
||||||
|
print(f"Labels batch shape: {train_labels.size()}")
|
78
src/inference/CNN1D_FCNN.py
Normal file
78
src/inference/CNN1D_FCNN.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchinfo
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class CNN1D_FCNN(nn.Module):
|
||||||
|
def __init__(self,):
|
||||||
|
super(CNN1D_FCNN, self).__init__()
|
||||||
|
|
||||||
|
self.Conv2d_1=nn.Conv1d(in_channels=1, out_channels=256, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_2=nn.Conv1d(in_channels=256, out_channels=512, kernel_size=3,stride=1)
|
||||||
|
self.MaxPool1d_1=nn.MaxPool1d(kernel_size=2, stride=2)
|
||||||
|
|
||||||
|
self.Conv2d_3=nn.Conv1d(in_channels=512, out_channels=256, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.Conv2d_4=nn.Conv1d(in_channels=256, out_channels=32, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_5=nn.Conv1d(in_channels=32, out_channels=3, kernel_size=2,stride=1)
|
||||||
|
self.flatten=nn.Flatten(start_dim=1, end_dim=2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# self.fc_1=nn.Linear(in_features=645,out_features=1024) #2048->1024
|
||||||
|
|
||||||
|
# self.fc_2=nn.Linear(in_features=1024,out_features=512)
|
||||||
|
self.fc_3=nn.Linear(in_features=315,out_features=256)
|
||||||
|
self.fc_4=nn.Linear(in_features=256,out_features=128)
|
||||||
|
self.fc_5=nn.Linear(in_features=128,out_features=4)
|
||||||
|
|
||||||
|
|
||||||
|
# self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# input_shape=x.shape
|
||||||
|
|
||||||
|
x=torch.mean(x,dim=1,keepdim=True)
|
||||||
|
x=torch.mean(x,dim=3)
|
||||||
|
print(x.shape)
|
||||||
|
|
||||||
|
# x=torch.unsqueeze(x, 1)
|
||||||
|
x=F.tanh(self.Conv2d_1(x))
|
||||||
|
x=F.tanh(self.Conv2d_2(x))
|
||||||
|
x=self.MaxPool1d_1(x)
|
||||||
|
x=F.tanh(self.Conv2d_3(x))
|
||||||
|
x=F.tanh(self.Conv2d_4(x))
|
||||||
|
x=F.tanh(self.Conv2d_5(x))
|
||||||
|
x=self.flatten(x)
|
||||||
|
# x=F.tanh(self.fc_1(x))
|
||||||
|
# x=F.tanh(self.fc_2(x))
|
||||||
|
x=F.tanh(self.fc_3(x))
|
||||||
|
x=F.tanh(self.fc_4(x))
|
||||||
|
x=F.sigmoid(self.fc_5(x))
|
||||||
|
|
||||||
|
#
|
||||||
|
# x=nn.Unflatten(dim=0,unflattened_size=(int(input_shape[0]),int(input_shape[1])))(x)
|
||||||
|
# x=nn.Flatten(start_dim=2, end_dim=4)(x)
|
||||||
|
|
||||||
|
|
||||||
|
# x,_=self.lstm_1(x)
|
||||||
|
|
||||||
|
# x=x[:,-1,:]
|
||||||
|
|
||||||
|
|
||||||
|
# x=nn.LeakyReLU()(self.fc_1(x))
|
||||||
|
# x=nn.LeakyReLU()(self.fc_2(x))
|
||||||
|
# x=nn.Sigmoid()(self.fc_3(x))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
model=CNN1D_FCNN()
|
||||||
|
|
||||||
|
torchinfo.summary(model,input_size=(64, 48, 224, 10))
|
BIN
src/inference/__pycache__/CNN1D_FCNN.cpython-312.pyc
Normal file
BIN
src/inference/__pycache__/CNN1D_FCNN.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/inference/__pycache__/main.cpython-312.pyc
Normal file
BIN
src/inference/__pycache__/main.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/inference/__pycache__/test.cpython-312.pyc
Normal file
BIN
src/inference/__pycache__/test.cpython-312.pyc
Normal file
Binary file not shown.
1
src/inference/main.py
Normal file
1
src/inference/main.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
np
|
BIN
src/inference/super_resolution.onnx
Normal file
BIN
src/inference/super_resolution.onnx
Normal file
Binary file not shown.
12
src/inference/test.http
Normal file
12
src/inference/test.http
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
GET http://localhost:8000
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
|
||||||
|
POST http://localhost:8000/inference HTTP/1.1
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"type": "a",
|
||||||
|
"a":"c"
|
||||||
|
}
|
17
src/inference/test.py
Normal file
17
src/inference/test.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
return {"message": "Hello World"}
|
||||||
|
|
||||||
|
class Item(BaseModel):
|
||||||
|
type: str
|
||||||
|
|
||||||
|
@app.post("/inference/")
|
||||||
|
async def inference(data:Item):
|
||||||
|
print(data)
|
||||||
|
return {"message": "Hello World"}
|
56
src/main_train.py
Normal file
56
src/main_train.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import hydra
|
||||||
|
import omegaconf # DictConfig, OmegaConf
|
||||||
|
import logging
|
||||||
|
import ray,os
|
||||||
|
import ray.tune
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
from optimizer.trainable import trainable
|
||||||
|
|
||||||
|
|
||||||
|
#引入系统配置文件
|
||||||
|
@hydra.main(version_base=None, config_path="../configs", config_name="main_train.yaml")
|
||||||
|
def main(hydra_cfg : omegaconf.DictConfig) -> None:
|
||||||
|
logger.info(f"**************Hydra Configs**************\n{omegaconf.OmegaConf.to_yaml(hydra_cfg)}")
|
||||||
|
|
||||||
|
#获取当前保存文件夹
|
||||||
|
hydra_output_dir = hydra.core.hydra_config.HydraConfig.get()
|
||||||
|
hydra_output_dir=hydra_output_dir['runtime']['output_dir']
|
||||||
|
|
||||||
|
|
||||||
|
#获取当前超参数配置
|
||||||
|
tune_cfg={}
|
||||||
|
for key in hydra_cfg.ray_tune.config.keys():
|
||||||
|
tune_cfg[key]=hydra.utils.instantiate(hydra_cfg.ray_tune.config[key],_recursive_=False)
|
||||||
|
|
||||||
|
#调度器
|
||||||
|
scheduler=hydra.utils.instantiate(hydra_cfg.ray_tune.scheduler)
|
||||||
|
result=ray.tune.run(
|
||||||
|
partial(trainable,hydra_cfg),
|
||||||
|
resources_per_trial=omegaconf.OmegaConf.to_container(hydra_cfg.ray_tune.run.resources_per_trial),
|
||||||
|
config=tune_cfg,
|
||||||
|
num_samples=hydra_cfg.ray_tune.run.num_samples,
|
||||||
|
scheduler=scheduler,
|
||||||
|
storage_path=hydra_output_dir,
|
||||||
|
log_to_file=os.path.join(hydra_output_dir,"ray-tune.log"),
|
||||||
|
name="ray-tune",
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
#保存最好的模型
|
||||||
|
best_trial = result.get_best_trial("val_loss", "min", "last")
|
||||||
|
print(f"Best trial config: {best_trial.config}")
|
||||||
|
print(f"Best trial final validation loss: {best_trial.last_result['val_loss']}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
main()
|
78
src/model/CNN1D_FCNN.py
Normal file
78
src/model/CNN1D_FCNN.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchinfo
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class CNN1D_FCNN(nn.Module):
|
||||||
|
def __init__(self,):
|
||||||
|
super(CNN1D_FCNN, self).__init__()
|
||||||
|
|
||||||
|
self.Conv2d_1=nn.Conv1d(in_channels=1, out_channels=1024, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_2=nn.Conv1d(in_channels=1024, out_channels=2048, kernel_size=3,stride=1)
|
||||||
|
self.MaxPool1d_1=nn.MaxPool1d(kernel_size=2, stride=2)
|
||||||
|
|
||||||
|
self.Conv2d_3=nn.Conv1d(in_channels=2048, out_channels=1024, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.Conv2d_4=nn.Conv1d(in_channels=1024, out_channels=256, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_5=nn.Conv1d(in_channels=256, out_channels=3, kernel_size=2,stride=1)
|
||||||
|
self.flatten=nn.Flatten(start_dim=1, end_dim=2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# self.fc_1=nn.Linear(in_features=645,out_features=1024) #2048->1024
|
||||||
|
|
||||||
|
# self.fc_2=nn.Linear(in_features=1024,out_features=512)
|
||||||
|
self.fc_3=nn.Linear(in_features=315,out_features=256)
|
||||||
|
self.fc_4=nn.Linear(in_features=256,out_features=128)
|
||||||
|
self.fc_5=nn.Linear(in_features=128,out_features=4)
|
||||||
|
|
||||||
|
|
||||||
|
# self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# input_shape=x.shape
|
||||||
|
|
||||||
|
# x=torch.mean(x,dim=1,keepdim=True)
|
||||||
|
# x=torch.mean(x,dim=3)
|
||||||
|
# print(x.shape)
|
||||||
|
|
||||||
|
x=torch.unsqueeze(x, 1)
|
||||||
|
x=F.tanh(self.Conv2d_1(x))
|
||||||
|
x=F.tanh(self.Conv2d_2(x))
|
||||||
|
x=self.MaxPool1d_1(x)
|
||||||
|
x=F.tanh(self.Conv2d_3(x))
|
||||||
|
x=F.tanh(self.Conv2d_4(x))
|
||||||
|
x=F.tanh(self.Conv2d_5(x))
|
||||||
|
x=self.flatten(x)
|
||||||
|
# x=F.tanh(self.fc_1(x))
|
||||||
|
# x=F.tanh(self.fc_2(x))
|
||||||
|
x=F.tanh(self.fc_3(x))
|
||||||
|
x=F.tanh(self.fc_4(x))
|
||||||
|
x=F.sigmoid(self.fc_5(x))
|
||||||
|
|
||||||
|
#
|
||||||
|
# x=nn.Unflatten(dim=0,unflattened_size=(int(input_shape[0]),int(input_shape[1])))(x)
|
||||||
|
# x=nn.Flatten(start_dim=2, end_dim=4)(x)
|
||||||
|
|
||||||
|
|
||||||
|
# x,_=self.lstm_1(x)
|
||||||
|
|
||||||
|
# x=x[:,-1,:]
|
||||||
|
|
||||||
|
|
||||||
|
# x=nn.LeakyReLU()(self.fc_1(x))
|
||||||
|
# x=nn.LeakyReLU()(self.fc_2(x))
|
||||||
|
# x=nn.Sigmoid()(self.fc_3(x))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
model=CNN1D_FCNN()
|
||||||
|
|
||||||
|
torchinfo.summary(model,input_size=(64, 224))
|
76
src/model/CNN_FCNN.py
Normal file
76
src/model/CNN_FCNN.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchinfo
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class CNN_FCNN(nn.Module):
|
||||||
|
def __init__(self,):
|
||||||
|
super(CNN_FCNN, self).__init__()
|
||||||
|
|
||||||
|
self.Conv2d_1=nn.Conv2d(in_channels=1, out_channels=512, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_2=nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3,stride=1)
|
||||||
|
# self.MaxPool2d_1=nn.MaxPool2d(kernel_size=2, stride=2)
|
||||||
|
|
||||||
|
self.Conv2d_3=nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.Conv2d_4=nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3,stride=1)
|
||||||
|
self.Conv2d_5=nn.Conv2d(in_channels=64, out_channels=6, kernel_size=2,stride=1)
|
||||||
|
self.flatten=nn.Flatten(start_dim=1, end_dim=3)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
self.fc_1=nn.Linear(in_features=1290,out_features=2048) #2048->1024
|
||||||
|
|
||||||
|
self.fc_2=nn.Linear(in_features=2048,out_features=1024)
|
||||||
|
self.fc_3=nn.Linear(in_features=1024,out_features=512)
|
||||||
|
self.fc_4=nn.Linear(in_features=512,out_features=256)
|
||||||
|
self.fc_5=nn.Linear(in_features=256,out_features=4)
|
||||||
|
|
||||||
|
|
||||||
|
# self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# input_shape=x.shape
|
||||||
|
|
||||||
|
x=torch.mean(x,dim=1,keepdim=True)
|
||||||
|
|
||||||
|
# x=torch.unsqueeze(x, 1)
|
||||||
|
x=F.tanh(self.Conv2d_1(x))
|
||||||
|
x=F.tanh(self.Conv2d_2(x))
|
||||||
|
# x=self.MaxPool2d_1(x)
|
||||||
|
x=F.tanh(self.Conv2d_3(x))
|
||||||
|
x=F.tanh(self.Conv2d_4(x))
|
||||||
|
x=F.tanh(self.Conv2d_5(x))
|
||||||
|
x=self.flatten(x)
|
||||||
|
x=F.tanh(self.fc_1(x))
|
||||||
|
x=F.tanh(self.fc_2(x))
|
||||||
|
x=F.tanh(self.fc_3(x))
|
||||||
|
x=F.tanh(self.fc_4(x))
|
||||||
|
x=F.sigmoid(self.fc_5(x))
|
||||||
|
|
||||||
|
#
|
||||||
|
# x=nn.Unflatten(dim=0,unflattened_size=(int(input_shape[0]),int(input_shape[1])))(x)
|
||||||
|
# x=nn.Flatten(start_dim=2, end_dim=4)(x)
|
||||||
|
|
||||||
|
|
||||||
|
# x,_=self.lstm_1(x)
|
||||||
|
|
||||||
|
# x=x[:,-1,:]
|
||||||
|
|
||||||
|
|
||||||
|
# x=nn.LeakyReLU()(self.fc_1(x))
|
||||||
|
# x=nn.LeakyReLU()(self.fc_2(x))
|
||||||
|
# x=nn.Sigmoid()(self.fc_3(x))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
model=CNN_FCNN()
|
||||||
|
|
||||||
|
torchinfo.summary(model,input_size=(64, 48, 224, 10))
|
69
src/model/CNN_LSTM_FCNN.py
Normal file
69
src/model/CNN_LSTM_FCNN.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchinfo
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class CNN_LSTM_FCNN(nn.Module):
|
||||||
|
def __init__(self,):
|
||||||
|
super(CNN_LSTM_FCNN, self).__init__()
|
||||||
|
|
||||||
|
# self.reshape_1=nn.Flatten(start_dim=0, end_dim=1)
|
||||||
|
self.Conv2d_1=nn.Conv2d(in_channels=1, out_channels=256, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
self.Conv2d_2=nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.Conv2d_3=nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.Conv2d_4=nn.Conv2d(in_channels=256, out_channels=32, kernel_size=3,stride=1)
|
||||||
|
|
||||||
|
self.MaxPool2d_1=nn.MaxPool2d(kernel_size=2, stride=2)
|
||||||
|
|
||||||
|
self.lstm_1=nn.LSTM(input_size=3456, hidden_size=2048,num_layers=2, batch_first=True)
|
||||||
|
|
||||||
|
|
||||||
|
self.fc_1=nn.Linear(in_features=2048,out_features=1024) #2048->1024
|
||||||
|
|
||||||
|
self.fc_2=nn.Linear(in_features=1024,out_features=256)
|
||||||
|
self.fc_3=nn.Linear(in_features=256,out_features=4)
|
||||||
|
|
||||||
|
|
||||||
|
# self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
input_shape=x.shape
|
||||||
|
x=nn.Flatten(start_dim=0, end_dim=1)(x)
|
||||||
|
x=torch.unsqueeze(x, 1)
|
||||||
|
x=self.Conv2d_1(x)
|
||||||
|
x=self.Conv2d_2(x)
|
||||||
|
x=self.Conv2d_3(x)
|
||||||
|
x=self.Conv2d_4(x)
|
||||||
|
|
||||||
|
x=self.MaxPool2d_1(x)
|
||||||
|
x=nn.Unflatten(dim=0,unflattened_size=(int(input_shape[0]),int(input_shape[1])))(x)
|
||||||
|
x=nn.Flatten(start_dim=2, end_dim=4)(x)
|
||||||
|
|
||||||
|
|
||||||
|
x,_=self.lstm_1(x)
|
||||||
|
|
||||||
|
x=x[:,-1,:]
|
||||||
|
|
||||||
|
|
||||||
|
x=nn.LeakyReLU()(self.fc_1(x))
|
||||||
|
x=nn.LeakyReLU()(self.fc_2(x))
|
||||||
|
x=nn.Sigmoid()(self.fc_3(x))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
model=CNN_LSTM_FCNN()
|
||||||
|
|
||||||
|
torchinfo.summary(model,input_size=(64, 48, 224, 10))
|
42
src/model/FCNN.py
Normal file
42
src/model/FCNN.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torchinfo
|
||||||
|
|
||||||
|
class FCNN(nn.Module):
|
||||||
|
def __init__(self,):
|
||||||
|
super(FCNN, self).__init__()
|
||||||
|
|
||||||
|
|
||||||
|
self.fc1 = nn.Linear(48* 224* 10, 4)
|
||||||
|
# self.fc2 = nn.Linear(40960, 10240)
|
||||||
|
# self.fc3 = nn.Linear(10240, 4096)
|
||||||
|
# self.fc4 = nn.Linear(4096, 2048)
|
||||||
|
# self.fc5 = nn.Linear(2048, 1024)
|
||||||
|
# self.fc6 = nn.Linear(1024, 512)
|
||||||
|
# self.fc7 = nn.Linear(512, 256)
|
||||||
|
# self.fc8 = nn.Linear(256, 128)
|
||||||
|
self.fc9 = nn.Linear(128, 4)
|
||||||
|
# self.fc4 = nn.Linear(10240, 4)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x=x.reshape(x.shape[0],-1)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
# x = F.relu(self.fc2(x))
|
||||||
|
# x = F.relu(self.fc3(x))
|
||||||
|
# x = F.relu(self.fc4(x))
|
||||||
|
# x = F.relu(self.fc5(x))
|
||||||
|
# x = F.relu(self.fc6(x))
|
||||||
|
# x = F.relu(self.fc7(x))
|
||||||
|
# x = F.relu(self.fc8(x))
|
||||||
|
# x = F.sigmoid(self.fc9(x))
|
||||||
|
# x = F.relu(self.fc2(x))
|
||||||
|
# x = F.relu(self.fc3(x))
|
||||||
|
# x = F.relu(self.fc4(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
model=FCNN()
|
||||||
|
|
||||||
|
torchinfo.summary(model,input_size=(64, 48, 224, 10))
|
BIN
src/model/__pycache__/CNN1D_FCNN.cpython-312.pyc
Normal file
BIN
src/model/__pycache__/CNN1D_FCNN.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/model/__pycache__/CNN_FCNN.cpython-312.pyc
Normal file
BIN
src/model/__pycache__/CNN_FCNN.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/model/__pycache__/CNN_LSTM_FCNN.cpython-312.pyc
Normal file
BIN
src/model/__pycache__/CNN_LSTM_FCNN.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/model/__pycache__/FCNN.cpython-312.pyc
Normal file
BIN
src/model/__pycache__/FCNN.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/optimizer/__pycache__/trainable.cpython-312.pyc
Normal file
BIN
src/optimizer/__pycache__/trainable.cpython-312.pyc
Normal file
Binary file not shown.
246
src/optimizer/trainable.py
Normal file
246
src/optimizer/trainable.py
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
|
||||||
|
import ray.train
|
||||||
|
import ray.tune
|
||||||
|
import ray.tune.logger
|
||||||
|
from data.pre_process import DataPreProcess
|
||||||
|
from data.spectral_dataset import load_dataset
|
||||||
|
import ray
|
||||||
|
import hydra
|
||||||
|
|
||||||
|
import random
|
||||||
|
import logging
|
||||||
|
import torch
|
||||||
|
import pickle
|
||||||
|
import pathlib
|
||||||
|
import tempfile
|
||||||
|
import time,os
|
||||||
|
|
||||||
|
|
||||||
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
logger = logging.getLogger("ray")
|
||||||
|
|
||||||
|
def trainable(hydra_cfg,tune_cfg):
|
||||||
|
"""
|
||||||
|
tune_cfg为ray tune 传进来的可变的配置
|
||||||
|
hydra_cfg为Hydra传进来的所有配置
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
#构建tensorboard日志
|
||||||
|
writer=SummaryWriter(log_dir=os.getcwd().replace("working_dirs","driver_artifacts"))
|
||||||
|
|
||||||
|
|
||||||
|
############################
|
||||||
|
## 原始数据预处理为数据集 ##
|
||||||
|
############################
|
||||||
|
t_1=time.time()
|
||||||
|
data_preprocess=DataPreProcess( hydra_cfg.raw_spectral_data_dir,hydra_cfg.raw_labels_dir,hydra_cfg.labels_name,hydra_cfg.dataset_dir,hydra.utils.instantiate(tune_cfg["choose_frame_spatial"]), hydra.utils.instantiate(tune_cfg["features_scaling"]),hydra.utils.instantiate(tune_cfg["labels_scaling"]),hydra_cfg.dataset.train_ratio,hydra_cfg.dataset.validate_ratio)
|
||||||
|
t_2=time.time()
|
||||||
|
logger.info(f"Preprocessed raw data costs {t_2-t_1}s")
|
||||||
|
|
||||||
|
############################
|
||||||
|
## 加载数据集为dataloader ##
|
||||||
|
############################
|
||||||
|
|
||||||
|
train_dataset,val_dataset=load_dataset(data_preprocess.dataset_save_dir)
|
||||||
|
trainloader = torch.utils.data.DataLoader(
|
||||||
|
train_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker
|
||||||
|
)
|
||||||
|
valloader = torch.utils.data.DataLoader(
|
||||||
|
val_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker
|
||||||
|
)
|
||||||
|
t_3=time.time()
|
||||||
|
logger.info(f"Dataloader costs {t_3-t_2}s")
|
||||||
|
|
||||||
|
|
||||||
|
# 确认训练设备
|
||||||
|
device = torch.device("cpu")
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = torch.device("cuda:0")
|
||||||
|
|
||||||
|
|
||||||
|
################################
|
||||||
|
## 模型,优化起,损失函数初始化 ##
|
||||||
|
################################
|
||||||
|
model=hydra.utils.instantiate(tune_cfg["model"])
|
||||||
|
model.to(device)
|
||||||
|
criterion=hydra.utils.instantiate(tune_cfg["criterion"])
|
||||||
|
optimizer=hydra.utils.instantiate(tune_cfg["optimizer"])
|
||||||
|
optimizer=optimizer(model.parameters())
|
||||||
|
t_4=time.time()
|
||||||
|
logger.info(f"Initilized model costs {t_4-t_3}s")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#BUG,暂不可用
|
||||||
|
################################
|
||||||
|
## 加载未训练完成的checkpoint ##
|
||||||
|
################################
|
||||||
|
checkpoint = ray.train.get_checkpoint()
|
||||||
|
if checkpoint:
|
||||||
|
with checkpoint.as_directory() as checkpoint_dir:
|
||||||
|
data_path = pathlib.Path() / "data.pkl"
|
||||||
|
with open(data_path, "rb") as fp:
|
||||||
|
checkpoint_state = pickle.load(fp)
|
||||||
|
start_epoch = checkpoint_state["epoch"]
|
||||||
|
model.load_state_dict(checkpoint_state["net_state_dict"])
|
||||||
|
optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
|
||||||
|
else:
|
||||||
|
start_epoch = 0
|
||||||
|
|
||||||
|
t_5=time.time()
|
||||||
|
logger.info(f"Check checkpoint costs {t_5-t_4}s")
|
||||||
|
|
||||||
|
|
||||||
|
################################
|
||||||
|
## 生成模型架构图写入tensorboad ##
|
||||||
|
################################
|
||||||
|
sample = train_dataset[0:4][0]
|
||||||
|
print(sample.shape)
|
||||||
|
writer.add_graph(model, torch.tensor(sample).to(device))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for epoch in range(start_epoch, hydra_cfg.train.max_epoch):
|
||||||
|
|
||||||
|
|
||||||
|
################################
|
||||||
|
## 模型训练 ##
|
||||||
|
################################
|
||||||
|
logger.info(f"Start epoch {epoch+1}")
|
||||||
|
train_loss = 0.0
|
||||||
|
train_steps = 0
|
||||||
|
train_hit_rate=None
|
||||||
|
t_epoch_start=time.time()
|
||||||
|
train_errors=None
|
||||||
|
train_outputs=None
|
||||||
|
train_labels=None
|
||||||
|
model.train()
|
||||||
|
for batch, (features, labels) in enumerate(trainloader):
|
||||||
|
t_iteration_start=time.time()
|
||||||
|
features=features.to(device)
|
||||||
|
labels=labels.to(device)
|
||||||
|
|
||||||
|
# zero the parameter gradients
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
# Compute prediction and loss
|
||||||
|
outputs = model(features)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
|
||||||
|
labels_npy=labels.cpu().detach().numpy()
|
||||||
|
outputs_npy=outputs.cpu().detach().numpy()
|
||||||
|
|
||||||
|
# 生成自定义的指标
|
||||||
|
error,hit_rate=data_preprocess.get_metric(outputs_npy, labels_npy)
|
||||||
|
|
||||||
|
# Backpropagation
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# 记录我们自定义的指标
|
||||||
|
train_loss += loss.item()
|
||||||
|
train_steps += 1
|
||||||
|
if train_steps==1:
|
||||||
|
train_hit_rate=hit_rate
|
||||||
|
train_errors=error
|
||||||
|
train_outputs=outputs_npy
|
||||||
|
train_labels=labels_npy
|
||||||
|
else:
|
||||||
|
train_hit_rate=(train_steps-1)/train_steps *train_hit_rate+ 1/train_steps*hit_rate
|
||||||
|
train_errors=np.concatenate((train_errors,error),axis=0)
|
||||||
|
train_outputs=np.concatenate((train_outputs,outputs_npy),axis=0)
|
||||||
|
train_labels=np.concatenate((train_labels,labels_npy),axis=0)
|
||||||
|
|
||||||
|
t_iteration_end=time.time()
|
||||||
|
print("Training Epoch:[{}/{}],Iteration:{}/{},Loss:{}, Cost {}s".format(epoch+1,hydra_cfg.train.max_epoch,train_steps,len(trainloader),loss.item(),t_iteration_end-t_iteration_start))
|
||||||
|
train_loss=train_loss/train_steps
|
||||||
|
|
||||||
|
################################
|
||||||
|
## 模型验证 ##
|
||||||
|
################################
|
||||||
|
|
||||||
|
val_loss = 0.0
|
||||||
|
val_steps = 0
|
||||||
|
val_hit_rate=None
|
||||||
|
|
||||||
|
t_epoch_train_end=time.time()
|
||||||
|
logger.info(f"Training costs {t_epoch_train_end-t_epoch_start}s")
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
for batch, (features, labels) in enumerate(valloader):
|
||||||
|
t_iteration_start=time.time()
|
||||||
|
with torch.no_grad():
|
||||||
|
features=features.to(device)
|
||||||
|
labels=labels.to(device)
|
||||||
|
|
||||||
|
outputs = model(features)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
|
||||||
|
error,hit_rate=data_preprocess.get_metric(outputs.cpu().detach().numpy(), labels.cpu().detach().numpy())
|
||||||
|
|
||||||
|
val_loss += loss.cpu().numpy()
|
||||||
|
val_steps += 1
|
||||||
|
if val_steps==1:
|
||||||
|
val_hit_rate=hit_rate
|
||||||
|
else:
|
||||||
|
val_hit_rate=(val_steps-1)/val_steps *val_hit_rate+ 1/val_steps*hit_rate
|
||||||
|
t_iteration_end=time.time()
|
||||||
|
print("Validate Iteration:{}/{},Loss:{}, Cost {}s".format(val_steps,len(valloader),loss.item(),t_iteration_end-t_iteration_start))
|
||||||
|
|
||||||
|
t_epoch_val_end=time.time()
|
||||||
|
logger.info(f"Validate costs {t_epoch_val_end-t_epoch_train_end}s")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################
|
||||||
|
## 每个epoch保存checkpoint,并记录到tensorboard ##
|
||||||
|
#####################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
checkpoint_data = {
|
||||||
|
"epoch": epoch,
|
||||||
|
"net_state_dict": model.state_dict(),
|
||||||
|
"optimizer_state_dict": optimizer.state_dict(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as checkpoint_dir:
|
||||||
|
data_path = pathlib.Path(checkpoint_dir) / "data.pkl"
|
||||||
|
with open(data_path, "wb") as fp:
|
||||||
|
pickle.dump(checkpoint_data, fp)
|
||||||
|
|
||||||
|
checkpoint = ray.train.Checkpoint.from_directory(checkpoint_dir)
|
||||||
|
|
||||||
|
reports={"val_loss": val_loss / val_steps,
|
||||||
|
"train_loss": train_loss,
|
||||||
|
}
|
||||||
|
for i,name in enumerate(hydra_cfg.labels_name):
|
||||||
|
reports[f"train_hit_rate_{name}"]=train_hit_rate[i]
|
||||||
|
reports[f"val_hit_rate_{name}"]=val_hit_rate[i]
|
||||||
|
writer.add_histogram(tag=f'train_error_{name}', values=train_errors[:,i], global_step=epoch)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.scatter(np.arange(train_outputs.shape[0]),train_outputs[:,i])
|
||||||
|
ax.scatter(np.arange(train_outputs.shape[0]),train_labels[:,i])
|
||||||
|
writer.add_figure(f'train_imgage_{name}', fig , epoch)
|
||||||
|
plt.close(fig)
|
||||||
|
ray.train.report(
|
||||||
|
reports,
|
||||||
|
checkpoint=checkpoint,
|
||||||
|
)
|
||||||
|
t_epoch_end=time.time()
|
||||||
|
logger.info(f"Save Checkpoint costs {t_epoch_end-t_epoch_val_end}s")
|
||||||
|
|
||||||
|
print("Training Epoch:[{}/{}], Average Loss:{}, Cost {}s".format(epoch+1,hydra_cfg.train.max_epoch,train_loss,t_epoch_end-t_epoch_start))
|
||||||
|
for i,name in enumerate(hydra_cfg.labels_name):
|
||||||
|
print(f"train_hit_rate_{name}: {train_hit_rate[i]} ",end=" ")
|
||||||
|
print(f"val_hit_rate_{name}: {val_hit_rate[i]} ",end=" ")
|
71
src/scripts/Naner_label_excel_standardization.py
Normal file
71
src/scripts/Naner_label_excel_standardization.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
|
||||||
|
'''
|
||||||
|
@File : Naner_label_excel_standardization.py
|
||||||
|
@Time : 2024/08/09 10:25:21
|
||||||
|
@Author : Zhanpeng Yang
|
||||||
|
@Version : 0.0.1
|
||||||
|
@Contact : zhpyang@outlook.com
|
||||||
|
@Desc : 将钢厂给的、从其系统中导出的Label文件,转化为标准的格式,供后续使用
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ =="__main__":
|
||||||
|
|
||||||
|
raw_label_file_path="/home/admin/20240806-NanEr-5-8-data/labels/raw_labels/NanEr/5-8月份6#炉生产数据.xls"
|
||||||
|
|
||||||
|
save_dir="/home/admin/20240806-NanEr-5-8-data/labels/NanEr"
|
||||||
|
|
||||||
|
select_and_rename={
|
||||||
|
"炉号":"Furnace_Number",
|
||||||
|
"钢种": "Steel_Type",
|
||||||
|
"SL1开始时间":"TSC_start_time",
|
||||||
|
"SL1结束时间":"TSC_end_time",
|
||||||
|
"TSC温度":"TSC_T",
|
||||||
|
"TSC测量碳值":"TSC_C",
|
||||||
|
"TSC化验碳值":"TSC_C_lab",
|
||||||
|
"TSC化验磷值":"TSC_P_lab",
|
||||||
|
"TSC化验锰值":"TSC_Mn_lab",
|
||||||
|
"TSC化验铬值":"TSC_Cr_lab",
|
||||||
|
"SL2开始时间":"TSO_start_time",
|
||||||
|
"SL2结束时间":"TSO_end_time",
|
||||||
|
"TSO温度":"TSO_T",
|
||||||
|
"TSO测量碳值":"TSO_C",
|
||||||
|
"TSO化验碳值":"TSO_C_lab",
|
||||||
|
"TSO化验磷值":"TSO_P_lab",
|
||||||
|
"TSO化验锰值":"TSO_Mn_lab",
|
||||||
|
"TSO化验铬值":"TSO_Cr_lab",
|
||||||
|
}
|
||||||
|
|
||||||
|
##读取文件
|
||||||
|
raw_labels=pd.read_excel(raw_label_file_path)
|
||||||
|
|
||||||
|
#只选出我们需要的列,从第一行之后再选是因为第一行还是列名
|
||||||
|
selected_labels=raw_labels.loc[1:,select_and_rename.keys()]
|
||||||
|
#修改列名
|
||||||
|
selected_labels=selected_labels.rename(columns=select_and_rename)
|
||||||
|
|
||||||
|
|
||||||
|
#下述功能集成到数据预处理中。因为每一行但凡有0就删,太严格,应该要求指定的label有0就删
|
||||||
|
# # 选出有NULL的行
|
||||||
|
# null_rows=selected_labels.isnull().any(axis=1)
|
||||||
|
# # 选出有0的行
|
||||||
|
# zeros_rows=(selected_labels==0).any(axis=1) | (selected_labels=='0').any(axis=1)
|
||||||
|
|
||||||
|
# # 每一行但凡有NULL或者0都给删了
|
||||||
|
# selected_rows=~(null_rows|zeros_rows)
|
||||||
|
# selected_labels=selected_labels[selected_rows]
|
||||||
|
|
||||||
|
if not os.path.exists(save_dir):
|
||||||
|
os.makedirs(save_dir)
|
||||||
|
|
||||||
|
save_path=os.path.join(save_dir,f"{selected_labels.iloc[0]["TSC_start_time"].strftime("%Y-%m-%d")}_{selected_labels.iloc[-1]["TSO_end_time"].strftime("%m-%d")}_{selected_labels.shape[0]}.xlsx")
|
||||||
|
|
||||||
|
selected_labels.to_excel(save_path,index=False)
|
||||||
|
|
||||||
|
print(f"处理完毕的文件保存于:{save_path}")
|
||||||
|
|
170
src/test/h5py_vs_torch_read_speed.py
Normal file
170
src/test/h5py_vs_torch_read_speed.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
data=np.random.random((60000,448,1024)).astype(np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# #####numpy 保存,读取并遍历
|
||||||
|
# t_1=time.time()
|
||||||
|
# np.save("./temp/data.npy",data)
|
||||||
|
# t_2=time.time()
|
||||||
|
# save_time=t_2-t_1
|
||||||
|
# print(f"Numpy save time:{save_time}s")
|
||||||
|
# data=np.load("./temp/data.npy")
|
||||||
|
# t_3=time.time()
|
||||||
|
# load_time=t_3-t_2
|
||||||
|
# print(f"Numpy load time:{load_time}s")
|
||||||
|
# for i in range(data.shape[0]):
|
||||||
|
# tmp=data[i]
|
||||||
|
# # print(tmp)
|
||||||
|
# t_4=time.time()
|
||||||
|
# ergodic_time=t_4-t_3
|
||||||
|
# print(f"Numpy ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
|
||||||
|
# #####torch 保存,读取并遍历
|
||||||
|
|
||||||
|
# data_torch=torch.tensor(data)
|
||||||
|
# t_1=time.time()
|
||||||
|
# torch.save(data_torch,"./temp/data.pt")
|
||||||
|
# t_2=time.time()
|
||||||
|
# save_time=t_2-t_1
|
||||||
|
# print(f"Torch save time:{save_time}s")
|
||||||
|
# data_torch=torch.load("./temp/data.pt",weights_only=True)
|
||||||
|
# t_3=time.time()
|
||||||
|
# load_time=t_3-t_2
|
||||||
|
# print(f"Torch load time:{load_time}s")
|
||||||
|
# for i in range(data.shape[0]):
|
||||||
|
# tmp=data[i]
|
||||||
|
# t_4=time.time()
|
||||||
|
# ergodic_time=t_4-t_3
|
||||||
|
# print(f"Torch ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
|
||||||
|
#####h5 保存,读取并遍历
|
||||||
|
|
||||||
|
|
||||||
|
t_1=time.time()
|
||||||
|
with h5py.File("./temp/data.h5", 'w') as f:
|
||||||
|
h5_features = f.create_dataset(
|
||||||
|
'data',
|
||||||
|
data.shape,
|
||||||
|
dtype=np.float32,
|
||||||
|
)
|
||||||
|
h5_features[...]=data[...]
|
||||||
|
t_2=time.time()
|
||||||
|
save_time=t_2-t_1
|
||||||
|
print(f"HDF5 save time:{save_time}s")
|
||||||
|
hdf5_f = h5py.File("./temp/data.h5", 'r')
|
||||||
|
# load_data= hdf5_f["data"][...]
|
||||||
|
t_3=time.time()
|
||||||
|
load_time=t_3-t_2
|
||||||
|
print(f"HDF5 load time:{load_time}s")
|
||||||
|
for i in range(hdf5_f["data"].shape[0]):
|
||||||
|
tmp=hdf5_f["data"][i]
|
||||||
|
t_4=time.time()
|
||||||
|
ergodic_time=t_4-t_3
|
||||||
|
print(f"HDF5 ergodic time:{ergodic_time}s")
|
||||||
|
for i in range(hdf5_f["data"].shape[0]):
|
||||||
|
tmp=hdf5_f["data"][i]
|
||||||
|
t_5=time.time()
|
||||||
|
ergodic_time=t_5-t_4
|
||||||
|
print(f"HDF5 twice ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
hdf5_f.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#####h5 保存,读取并遍历
|
||||||
|
|
||||||
|
t_1=time.time()
|
||||||
|
|
||||||
|
|
||||||
|
t_2=time.time()
|
||||||
|
save_time=t_2-t_1
|
||||||
|
print(f"HDF5 [()] save time:{save_time}s")
|
||||||
|
hdf5_f = h5py.File("./temp/data_tmp.h5", 'r')
|
||||||
|
data=hdf5_f["data"][()]
|
||||||
|
# load_data= hdf5_f["data"][...]
|
||||||
|
t_3=time.time()
|
||||||
|
load_time=t_3-t_2
|
||||||
|
print(f"HDF5 [()] load time:{load_time}s")
|
||||||
|
for i in range(data.shape[0]):
|
||||||
|
tmp=data[i]
|
||||||
|
t_4=time.time()
|
||||||
|
ergodic_time=t_4-t_3
|
||||||
|
print(f"HDF5 [()] ergodic time:{ergodic_time}s")
|
||||||
|
for i in range(data.shape[0]):
|
||||||
|
tmp=data[i]
|
||||||
|
t_5=time.time()
|
||||||
|
ergodic_time=t_5-t_4
|
||||||
|
print(f"HDF5 [()] twice ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
# for i in range(hdf5_f["data"].shape[0]):
|
||||||
|
# tmp=load_data[i]
|
||||||
|
# t_6=time.time()
|
||||||
|
# ergodic_time=t_6-t_5
|
||||||
|
# print(f"HDF5 loaded ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
hdf5_f.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# #####h5 Optimized 保存,读取并遍历
|
||||||
|
|
||||||
|
# t_1=time.time()
|
||||||
|
# with h5py.File("./temp/data_optimzed.h5", 'w') as f:
|
||||||
|
# h5_features = f.create_dataset(
|
||||||
|
# 'data',
|
||||||
|
# data.shape,
|
||||||
|
# maxshape=(None,448,1024),
|
||||||
|
# chunks=(1,448,1024), # 手动设置 chunk 大小为一次数据的大小
|
||||||
|
# # compression='gzip',
|
||||||
|
# # compression_opts=1,
|
||||||
|
# dtype=np.float32,
|
||||||
|
# )
|
||||||
|
# h5_features[...]=data[...]
|
||||||
|
# t_2=time.time()
|
||||||
|
# save_time=t_2-t_1
|
||||||
|
# print(f"HDF5 Optimized save time:{save_time}s")
|
||||||
|
# hdf5_f = h5py.File("./temp/data_optimzed.h5", 'r',
|
||||||
|
# rdcc_nbytes=1024**2*1024*20, # 10G 缓存总大小,单位字节。应设置为 chunk 的整数倍
|
||||||
|
# rdcc_w0=0, # 缓存淘汰策略。值越接近 0,优先淘汰最近最少使用的 chunk。值越接近 1,优先淘汰已完全读取或写入的块
|
||||||
|
# # rdcc_nslots=1e8, # 定位缓存 chunk 位置的坐标长度。推荐为缓存 chunk 数的 10 倍,为达最佳性能需要 100 倍。默认为 521
|
||||||
|
# )
|
||||||
|
# data=hdf5_f["data"]
|
||||||
|
# loaddata=data[...]
|
||||||
|
# t_3=time.time()
|
||||||
|
# load_time=t_3-t_2
|
||||||
|
# print(f"HDF5 Optimized load time:{load_time}s")
|
||||||
|
# for i in range(data.shape[0]):
|
||||||
|
# tmp=data[i]
|
||||||
|
# t_4=time.time()
|
||||||
|
# ergodic_time=t_4-t_3
|
||||||
|
# print(f"HDF5 Optimized ergodic time:{ergodic_time}s")
|
||||||
|
|
||||||
|
# for i in range(data.shape[0]):
|
||||||
|
# tmp=data[i]
|
||||||
|
# # print(tmp)
|
||||||
|
# t_5=time.time()
|
||||||
|
# ergodic_time=t_5-t_4
|
||||||
|
# print(f"HDF5 Optimized twice ergodic time:{ergodic_time}s")
|
||||||
|
# hdf5_f.close
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user