From 6e333fe733567782abce93d9752f41542dbd648b Mon Sep 17 00:00:00 2001 From: user Date: Thu, 7 Nov 2024 14:37:51 +0800 Subject: [PATCH] =?UTF-8?q?stable=20version=20=E5=87=86=E5=A4=87=E6=8E=A5?= =?UTF-8?q?=E5=85=A5=E4=B8=8D=E5=90=8C=E8=AE=BE=E5=A4=87=E8=AE=AD=E7=BB=83?= =?UTF-8?q?=E4=B8=8D=E5=90=8C=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- CHANGELOG.md | 14 +- TODO.md | 15 + configs/main_train.yaml | 18 +- configs/ray_tune/default.yaml | 40 +- .../__pycache__/pre_process.cpython-312.pyc | Bin 20053 -> 11272 bytes .../pre_process_backup.cpython-312.pyc | Bin 0 -> 20050 bytes .../spectral_dataset.cpython-312.pyc | Bin 2690 -> 2601 bytes src/data/choose_frame_spatial/DBSCAN.py | 142 ++++++ .../DBSCAN_data_augmentation.py | 102 +++++ .../__pycache__/DBSCAN.cpython-312.pyc | Bin 0 -> 5946 bytes .../DBSCAN_data_augmentation.cpython-312.pyc | Bin 0 -> 5417 bytes .../__pycache__/FCNN_CARS.cpython-312.pyc | Bin 0 -> 4078 bytes .../__pycache__/mean.cpython-312.pyc | Bin 3790 -> 3486 bytes .../mean_CARS100_3.cpython-312.pyc | Bin 0 -> 5655 bytes .../mean_chose_top_10_spatial.cpython-312.pyc | Bin 0 -> 4722 bytes .../__pycache__/meanwovar.cpython-312.pyc | Bin 0 -> 3484 bytes .../meanwovar_CARS.cpython-312.pyc | Bin 0 -> 4069 bytes src/data/choose_frame_spatial/mean.py | 100 ----- .../choose_frame_spatial/mean_CARS100_3.py | 125 ++++++ .../mean_chose_top_10_spatial.py | 83 ++++ .../mean_var_weight_reduction.py | 132 ------ .../choose_frame_spatial/meanwovar_CARS.py | 78 ++++ .../__pycache__/do_nothing.cpython-312.pyc | Bin 0 -> 1040 bytes .../__pycache__/frame_max_min.cpython-312.pyc | Bin 0 -> 1194 bytes .../global_max_min.cpython-312.pyc | Bin 0 -> 1659 bytes .../standardization.cpython-312.pyc | Bin 0 -> 3846 bytes src/data/features_scaling/do_nothing.py | 21 + src/data/features_scaling/frame_max_min.py | 23 + .../{max_min.py => global_max_min.py} | 0 src/data/features_scaling/standardization.py | 72 +++ src/data/generate_raw_dataset_from_MySQL.py | 202 +++++++++ .../__pycache__/max_min.cpython-312.pyc | Bin 2520 -> 2510 bytes .../standardization.cpython-312.pyc | Bin 0 -> 4017 bytes src/data/labels_scaling/max_min.py | 2 +- src/data/labels_scaling/standardization.py | 75 ++++ src/data/pre_process.py | 415 ++++++------------ src/data/spectral_dataset.py | 19 +- src/data/test.ipynb | 126 ++++++ src/main_train.py | 28 ++ src/model/DRSN-CW.py | 160 +++++++ .../__pycache__/CNN_FCNN_big.cpython-312.pyc | Bin 0 -> 3338 bytes .../CNN_FCNN_small.cpython-312.pyc | Bin 0 -> 3405 bytes .../__pycache__/CNN_LSTM_FCNN.cpython-312.pyc | Bin 3347 -> 3337 bytes src/model/__pycache__/DRSN-CW.cpython-312.pyc | Bin 0 -> 7881 bytes .../FCNN_DBSCAN_big.cpython-312.pyc | Bin 0 -> 2739 bytes .../FCNN_DBSCAN_small.cpython-312.pyc | Bin 0 -> 2540 bytes .../__pycache__/FNN_CARS.cpython-312.pyc | Bin 0 -> 2649 bytes .../__pycache__/FNN_CARS_big.cpython-312.pyc | Bin 0 -> 2790 bytes .../FNN_CARS_small.cpython-312.pyc | Bin 0 -> 2537 bytes src/model/{ => history}/CNN1D_FCNN.py | 0 .../{CNN_FCNN.py => history/CNN_FCNN_big.py} | 10 +- src/model/history/CNN_FCNN_small.py | 78 ++++ src/model/{ => history}/CNN_LSTM_FCNN.py | 10 +- src/model/{ => history}/FCNN.py | 0 src/model/history/FCNN_DBSCAN_big.py | 55 +++ src/model/history/FCNN_DBSCAN_small.py | 68 +++ .../__pycache__/trainable.cpython-312.pyc | Bin 11735 -> 17716 bytes .../__pycache__/utils.cpython-312.pyc | Bin 0 -> 2923 bytes src/optimizer/trainable.py | 222 ++++++++-- src/optimizer/utils.py | 48 ++ src/scripts/Add_Lab.py | 149 +++++++ .../Naner_label_excel_standardization.py | 5 +- src/scripts/pre_dataset_visual.py | 60 +++ src/scripts/raw_dataset_visual.py | 265 +++++++++++ src/scripts/rawbindata2rawdataset.py | 276 ++++++++++++ 66 files changed, 2650 insertions(+), 591 deletions(-) create mode 100644 src/data/__pycache__/pre_process_backup.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/DBSCAN.py create mode 100644 src/data/choose_frame_spatial/DBSCAN_data_augmentation.py create mode 100644 src/data/choose_frame_spatial/__pycache__/DBSCAN.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/DBSCAN_data_augmentation.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/FCNN_CARS.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/mean_CARS100_3.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/mean_chose_top_10_spatial.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/meanwovar.cpython-312.pyc create mode 100644 src/data/choose_frame_spatial/__pycache__/meanwovar_CARS.cpython-312.pyc delete mode 100644 src/data/choose_frame_spatial/mean.py create mode 100644 src/data/choose_frame_spatial/mean_CARS100_3.py create mode 100644 src/data/choose_frame_spatial/mean_chose_top_10_spatial.py delete mode 100644 src/data/choose_frame_spatial/mean_var_weight_reduction.py create mode 100644 src/data/choose_frame_spatial/meanwovar_CARS.py create mode 100644 src/data/features_scaling/__pycache__/do_nothing.cpython-312.pyc create mode 100644 src/data/features_scaling/__pycache__/frame_max_min.cpython-312.pyc create mode 100644 src/data/features_scaling/__pycache__/global_max_min.cpython-312.pyc create mode 100644 src/data/features_scaling/__pycache__/standardization.cpython-312.pyc create mode 100644 src/data/features_scaling/do_nothing.py create mode 100644 src/data/features_scaling/frame_max_min.py rename src/data/features_scaling/{max_min.py => global_max_min.py} (100%) create mode 100644 src/data/features_scaling/standardization.py create mode 100644 src/data/generate_raw_dataset_from_MySQL.py create mode 100644 src/data/labels_scaling/__pycache__/standardization.cpython-312.pyc create mode 100644 src/data/labels_scaling/standardization.py create mode 100644 src/data/test.ipynb create mode 100644 src/model/DRSN-CW.py create mode 100644 src/model/__pycache__/CNN_FCNN_big.cpython-312.pyc create mode 100644 src/model/__pycache__/CNN_FCNN_small.cpython-312.pyc create mode 100644 src/model/__pycache__/DRSN-CW.cpython-312.pyc create mode 100644 src/model/__pycache__/FCNN_DBSCAN_big.cpython-312.pyc create mode 100644 src/model/__pycache__/FCNN_DBSCAN_small.cpython-312.pyc create mode 100644 src/model/__pycache__/FNN_CARS.cpython-312.pyc create mode 100644 src/model/__pycache__/FNN_CARS_big.cpython-312.pyc create mode 100644 src/model/__pycache__/FNN_CARS_small.cpython-312.pyc rename src/model/{ => history}/CNN1D_FCNN.py (100%) rename src/model/{CNN_FCNN.py => history/CNN_FCNN_big.py} (89%) create mode 100644 src/model/history/CNN_FCNN_small.py rename src/model/{ => history}/CNN_LSTM_FCNN.py (87%) rename src/model/{ => history}/FCNN.py (100%) create mode 100644 src/model/history/FCNN_DBSCAN_big.py create mode 100644 src/model/history/FCNN_DBSCAN_small.py create mode 100644 src/optimizer/__pycache__/utils.cpython-312.pyc create mode 100644 src/optimizer/utils.py create mode 100644 src/scripts/Add_Lab.py create mode 100644 src/scripts/pre_dataset_visual.py create mode 100644 src/scripts/raw_dataset_visual.py create mode 100644 src/scripts/rawbindata2rawdataset.py diff --git a/.gitignore b/.gitignore index 59af8c0..0ad58a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ dataset -rawdata +old_rawdata labels logs - diff --git a/CHANGELOG.md b/CHANGELOG.md index bda3c34..bf86a51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Change Log -## v0.1.0.20240910_alpha +## v0.0.2.20241015_alpha -- 跑通 \ No newline at end of file +# [⭐️Features] + +- 从 MySQL 数据库获取数据 + +# [🔄Changed] + +- 变更数据处理流程,先从数据库获取数据划分训练集测试集,得到中间文件,再进行选择空间点等后续处理 + +## v0.0.1.20240910_alpha + +- 跑通 diff --git a/TODO.md b/TODO.md index e69de29..2492718 100644 --- a/TODO.md +++ b/TODO.md @@ -0,0 +1,15 @@ +-[ ] 调研所有可调整超参数 + +-[ ] 调研所有回归任务的 loss 设计 + +-[ ] 调研所有可以用以回归任务的模型架构,及其可调整参数 + +-[ ] 调研跳出局部最优解的方法 + +-[x] 将数据库中的数据转化成训练集,验证集,测试集 + +-[x] 将训练集,验证集用不同的预处理、模型、超参数得到最优的模型 + +-[x] 将最优模型极其对应的超参数保存,并验证测试集得到测试误差 + +-[x] 不同的参数使用不同的模型 diff --git a/configs/main_train.yaml b/configs/main_train.yaml index 148d415..38b7737 100644 --- a/configs/main_train.yaml +++ b/configs/main_train.yaml @@ -5,16 +5,16 @@ defaults: task_name: main -raw_spectral_data_dir: /code/admin/20240806-NanEr-5-8-data/rawdata -raw_labels_dir: /code/admin/20240806-NanEr-5-8-data/labels/NanEr -dataset_dir: /code/admin/20240806-NanEr-5-8-data/dataset -labels_name: ["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"] +# raw_spectral_data_dir: /data/SEMS-model-training/rawdata +# raw_labels_dir: /data/SEMS-model-training/labels/NanEr +dataset_dir: /data/SEMS-model-training/dataset +# labels_name: ["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"] #是 - dataset: - train_ratio: 0.8 - validate_ratio: 0.1 - num_worker: 128 + # train_ratio: 0.8 + # validate_ratio: 0.1 + num_worker: 0 train: - max_epoch: 10000 + max_epoch: 5000 + checkpoint_interval: 100 diff --git a/configs/ray_tune/default.yaml b/configs/ray_tune/default.yaml index 733ef3a..bdd6f19 100644 --- a/configs/ray_tune/default.yaml +++ b/configs/ray_tune/default.yaml @@ -1,40 +1,37 @@ run: num_samples: 1 - resources_per_trial: - cpu: 128 - gpu: 1 + resources_per_trial: + cpu: 4 + gpu: 0.1 scheduler: - _target_: ray.tune.schedulers.ASHAScheduler - metric: val_loss - mode: min - max_t: 20000 - grace_period: 1 - reduction_factor: 2 + _target_: ray.tune.schedulers.FIFOScheduler + # _target_: ray.tune.schedulers.ASHAScheduler + # metric: val_loss + # mode: min + # max_t: 20000 + # grace_period: 1 + # reduction_factor: 2 config: choose_frame_spatial: _target_: ray.tune.grid_search values: - - _target_: data.choose_frame_spatial.mean.ChooseFrameSpatial - interval: [-3,0] - + - _target_: data.choose_frame_spatial.DBSCAN.ChooseFrameSpatial ##DBSCAN_data_augmentation features_scaling: _target_: ray.tune.grid_search values: - - _target_: data.features_scaling.max_min.FeatureScaling - + - _target_: data.features_scaling.standardization.FeatureScaling # labels_scaling: _target_: ray.tune.grid_search values: - - _target_: data.labels_scaling.max_min.LabelScaling + - _target_: data.labels_scaling.standardization.LabelScaling #standardization model: _target_: ray.tune.grid_search values: - # - _target_: model.FCNN.FCNN - # - _target_: model.CNN_LSTM_FCNN.CNN_LSTM_FCNN - - _target_: model.CNN1D_FCNN.CNN1D_FCNN + - _target_: model.DRSN-CW.SpectralModel + # - _target_: model.FCNN_DBSCAN_small.SpectralModel criterion: _target_: ray.tune.grid_search values: @@ -42,10 +39,15 @@ config: optimizer: _target_: ray.tune.grid_search values: + - _target_: torch.optim.Adam + _partial_: true + lr: 0.000001 - _target_: torch.optim.Adam _partial_: true lr: 0.0001 + batch_size: _target_: ray.tune.grid_search values: - - 128 \ No newline at end of file + - 1024 + - 128 diff --git a/src/data/__pycache__/pre_process.cpython-312.pyc b/src/data/__pycache__/pre_process.cpython-312.pyc index abb675bcac768ece8252800babb484e1fcecd027..a9c1c383ba9cf493ef0603a38778bee446145291 100644 GIT binary patch literal 11272 zcmcgydr%uknx7fX2q6gswm{-xEP-DFjCmMu`~qWRaQqU`?6#=m1UFcuJ8W1a^2k@GZrb4390kef*(~^wK&e+UTs~~ z-q$US1Vnb6P3?|K)6-vn-QU;G@7G`Ve~dmU59T6b@A_Djd=+G!B^-28ZgF1RQEwG&oFbNyMSHMGK+IrE}|B^du^ab|tx! zT9QbDB5DbiIZLqWYgAZjOEPC*6S~$A1b=`cIF-|)s1*x=nj4ELU)MF)KwtELjKr2D zAV=$3>Nd6*u}qyS&7Izoj=%LTliS>4M)Eo1wQwe)B?DsV(O4$L%+XjD#FDusTn3lP zWnH7gSz5ABQ>Q6gQqMTVwcBbsU7Q7;P4MsLIeRznZRG@^sQa{bTVtm?npRp|x_({p zQ|pR1T1v_`m9O7a`jmECRgbU3%ZD?!bT~ZSoTuG#*x_l{Zac*BLZ{b*%PlS{E-KM( z+u`;29IZY~@^^Hf2EJ|H9-qtW?J8>Zy0zP?IiXe2<=$`KzIS=>&OeRc`Qdx_e{^>C z-LrRxUe?}6b*QtEGF|WRb)e99oOQQ`YZiV-x`cSR9^w}Wj%XoaFu)*y0ia=3XVCyi zWa+ac3;->3BiW*6)n^G91_|)iK-1{t|+|bN#-oE?R*`23aIoF*xUzz#%!(W z`#-)va`xVD&jau5O9S`D-^X%Fv7Evq5k`QFaEyog}YPPT#_r+gD3q}h5`r^#QM{S(L*TZvIP+5^>cKqcC#WFMTnq{x!80U&~ z)<6E{?k86flZFo65T7a8^h5g@Ncz5|3kMx`L8^d%8cK@8=_Pk@G4 zWH=KrH6a$26d;^Q3STT#rUXfxCT&#Dc1W4+e+PPHcJR%+?|+Qy`pM|*OFxd=v-dCd z-TTQ4sA0~%2%U%Ph6Y2OErCZm$vPH>i@^EpY$q=pTRXg7fwQ;q4mW2Px*fhwhf7Y0 zw3%QRS{<%VPrID#LM<1IYjL~|G6}MZ@A23enHD%#8?ONod?I?G)sNS~OF-2{Dp>wp zvJMGf*RXS6!&ohyra_t64nWPYdCS?yp}a~J23d{cE4%iI4@EXvXP9<@aiMJ z(h!+B!(G9Q{C;{;W4@ic0fpA7oFWu1o65%YtSNJLKRsxeQxl0v=QIPFA8H>$cA_=f zXAnXCf=9qT=+_HGYy>!?{fzR2ALz$u4=Xg(*M6vffI@F&)by%+Q3j`C2BH*#aH6(b z^aRL-WeDbZ+F&EIFvDVglJG@G8A}DIw+UADj!Mx8QVmM}K$Kcd9Q_}H=%szBN}6+t z@)Zlex!wU9)t;rVGm81?WgwR^R)+HBG$~)PFu<_t42VLQ5D7JrFcF1{ndnus+TMgf z0?!82edOS?K9q=`fZCa)KxRDu4qgxdX!cT&@92~GYM$Lq<{wA z$?xDgf{uoWZt-mEzj-0NI>Ra( zU7anBS4mm#aJjrE?cJTNT`o>Gx;cjc>vMz8!TVr7b3U-z@{$O3r-zNwv?xXJIo#cX zYyg!Sy17<2sNvBZy#IfH`O9Bq((b2=Jl%ddsjY|iI9j>79``YhFOm}>6JGBH zUJu-`{4IlkN2824qZev;9t{pY2fZ+pL8iQdoZ#vde6T#?)l8;gRqT+}+$kUmvc~P` z;vh|snQp$*X~>ZT6+i^L2QRZU3S^DX-HkKKsi>cXNUMZN>1{4YJN~pgdfMF_AQ&IO z8ky`A(6WNpVD#eAgqKZrR05Vz3P>udkAGD)NR0uCqDS!MLm|S;&=Y;cj49)S|Ga;6 zMJTsa$}Jt=D&=mwkuBx!3!3Wslco)+=S~isJontdb7D^E_!cRra?-GETAzGw&%hqh zTsU?_GH;yJZ<^L54J8e`hMyG+8pPzrNzK7&qj`8)(75_bA|uhx^jFVhWL+|im`05u zYl&nn8DA?|D{tr}>)xAQ4Pnf` zZCWFl*3Vd1iL0Lqt*($(S4+r`u)1@|*Yk%GIJXOV&#cIH7D8XbHpjK>8a4$Ww6!ruf# zJKux@2vfnrO)W5jQR5%al*8h$T42VRfQnV&J=8rgXYmYqWI+cVqK}Xcym1V+GY&8R zNq_;v^h+>I>X`XkorY=?P+yDGc!6QV{ z{5TPOS@c{tT#IbTdUcPeT}n*5Z>(K(bUdPV3-yi9{dj#-qp(-+D^MyEYin3FK1OB( zR*s~<``Qt2C0wiM7z>pVuk?Oz@*`$#{JaZhZLcAa>|7TsTlw-mwiOl00bL+z3uFlx zzC#)Ma2bQYrin!G=AoZ5XJ8W#zdWLZ5Xh4hNgM`kxv6|3|M3M72Hc2p{E;goFHI$>{k_al*{BvS)=Uc@1y;WO@$!rsW2#2 zw|4Y+x&X?u7;&n@yL=nPZ$bjlp4w*SSD^3Z=w(4KwED|Nj5OQ9i_q@!9&@;0kKX3R zyL}$bets2tp>;y0o!(9l)>Sqv;+Ip98v>(fZ#M#}0*|JrOm~!bpO$Il?30sQdGP2& zv8l}Xdb)wEa&aC2+4dwtY~p@VhV~k74Pj z#pKD>cYnl3k8Idb`kSXy%p zhIUPvvMxM-{&~?&Ew`E&D$;ADA;BmA* zmZZrV^?j--{1U2UoJliXXgc2{E-M)?l$KS9mG$EO15#y!w5&mNaFc0m{kx{j7SUQN znae`vjj;dd-!qk-Gkiu$F9@ZVNa-d0PtT;LUC20}ak2E$_eQ>VY0JnK(YA5o$VA=E zJaJQ9FzXp{e`7H9;2e>jR3RR2naa%zZ+@nXIn&0Bi)G^S;$T*Zm|6P4`UzdItXj&f z4jOk(=j4qxj@DdXKduWFY>`%M3Fd5_UcO?~G1_q5IMx-kZIV`Q3NGLLm6}*?z2qEm z&KZcjvWJ8wFMVjw+!Df?8?qEjmf|U6_Sd8&{-RRK+#WRUh!xm*ZMn4aso?UB(>WGg zS|VXBc}OtU^r5G}(i3@wV&T?cZpD{`I;~ksT{bm09 z`N5T2hZ9GwZ|NT-fkuC_Lfhg7o=bvG{gYma{#igX{NM6wQukJ>{J0^SM0cc`m0o4nJ0n+=qY-BdrZO zfyKMP!J^Yju!=_gvsGkG9ia(egw*6tCzn^nOEcJT^D(p{%dh zCIH6)@uad8tAX7^q-DN!Zu&drW)|i?jYikE(_Qlmil5Jx96K+8a~U{LT4X<3=5$(~u@-J3L&#TX(?h^~@ZaT#J#!!8$RNoq^_e%9%@tJPXc3jFij!%E) zDLTxCa>}HfvPr}Gh_=IY7>rEni?AZACH-npc!CX8G)NTy8oI=u)1rR$q~0Gt-7TIx zgVO`?)7!=4J&^vX{=}RXH%UAReq6$t_$7dZ08a0XpAHgE;PjKz`laDUtC;YGDjTKB zMzP|cm~rS+eN)&=h6aQ3AZvte<(k({xkP_T`0Eo|IeQQOf=f$gCDFofYy zEV2U!`=^TMO;grumc%}{bYJwCES^7&jULkW(w^MM^TwCQlJh-Ka18cXdGX6obnazh z{vl|mI37HOvzizpkB&w5?rLv4j(W*t__i20(?lcY`b)mStr1fkplA zhxxR)k3A)jBk5b^Fhh=HoMY*e$pEi-bX=hsG+zf4*AZPu&>Vj}S63!@Nx=#^kxPHk zb1f2&Kj*?nu=qHX4yeN;dk_E@T4})NO$V4Zrh%}f)bUS2IkK*ib9W<`0USxThYMb@ zO(u7=zcqrMtCccY-+spUUhVJvKKO2ke4nk9seK-qs_Tqo5)r{GQ#*L>ZIAKgW9oeix!5Xe+CP9=A;sw%ZAC%c#4@C-6vnnc;Ze%gdS$ zxB`J(8?ySCx5vW@*s*|J6M_c#B2I+J$(|NDTHKtE?`-vFEVA;kq@O@O;VAYN+)gou zQu3sfyiiJklu|HO8%$Z>p8)qGfNsc@47njgzGTQB+YmIA_N#9vrwz3Yc>0+cU2=cV zxibT2g1W5X{kL>E)3EJsKHofi{EednyU-pSMYu~{BVA+G5l=8}ZT~K0g-;Ef8fGr( zMsz{LihlK1TEb)*rLPz+8-nS@58rB@3=`J1Ku%|YuHDSOM1`t{?(sYAjWx~bH(A>s9mC**%U#&3N$_sW{fYsNPG z+&W%0o_BR~2sY;hTY|Y;rR=Rr`SbXnNS`pj@Ab@YrB9fDH2tUKm~~8bIZMheT&Tcz zVK}dwOCho>KO#r1qpFeo(Gx#)jFV%nm(Ps4q|DNB?xU&+$Hb2Joi|qfd+N=Sn@j(t zK-#cR%BTyO8zgf>(0ovAIudF+CN&)sTUn`zlgwQIuA%*}?wNx#xHfWb1Ce4HRuA`# zHjlN5hV`OmJq8>$N>=k6%rXyLhvS=J-~m`hL8C1Z-S9z2+jhIGwcFuVP>&0J>+SaA zJq}kSCBbfIy{&dTk6dRM31h5_fh_L^+EoGz3d38pJ?GI2xmE?_kUUE~pwx`!VX~fC z`Y?MrV`m=Lki|^Jmu=(q+!zG}6NGCU3OtRa-t;)Dy7Q6h`N8epy!@C0D zU9tn;r>a5MsT#t!seqbX6TX$!5WYC`@vdS8_~NOE`sI7y9ibU_cA>89aRS!N{Wl^W&8t1#rjyS+gZPJ}UTEfCd(V5H7%LPDPUB z11f=}A1)zC6(BZD)S zOza60=3nzSy$33VO?pflyzmTGS*FMm9ws*Rl&N+t;wvbY*mV}dJLDG!Eos{ zF-N^sMW}Jv@nQ@ktzwLj*t7ZKqb|heMnk;sdeSwh7kWe(E^pP~idHS2+^XxqaOFp$ z)-)(n@v0*MHrN*$5U}lhxoddr`DPUv4(3LX{yS#8bo+UixxjhlKroi38g+}de%b`#DHQ;XHv zq1&DEvcSVy?q__{p! z^1_45XJ>yBnElZw4?j9H^YNMc{vRkGqBb%E)-6XM_V}4IHj2I>2#LU^ZX8hM? z2m5C)4#x3f_QyY({`JMb-aZRyX5YEaCa*G^rfG7XK^vt>UXWst&{)JCwN8g@3 z|I3+yTMtLhJos`5($2g!_#p5pTTV4w&T~>d>>r$c=jx1q;9>vw|I+{F{jqzD6?~D7lud?ab7n4L1kki@J#x53-{*r<#f&a&i?DF6f`7n#nl|C_qBGVJYd{PKS zCOm{B5JEZG$0vgjqQ!-hAS4q)atQI`5c4S@6q#&5B^g2rAth!{=lxL^Y9Zkl8b~eF z-iO&Uqp5hGPjoF(V@RtOVljIT?~l4bA;Gxi^S=sh1sbV*dhlzaetQA$kGlKhN>lhe4@iI}MHUO=O3= zRtTXO(t5(;?k3tiq}5@;VRBe-8)+)GcusZ`mXca-gmVLu8{Q9obz|m>Gf&ZQpwZ%< zpV`Fah5>&6=U*hq7s=za!r~{Ch^9YiWb#WqGK`=4qzq~MMuXZqly_b zj4Ec}G5SQ~fzihIDw8^&>5M9dV@&G&7BgB4a1e*)^OazfG5);h*ZEVg&Dz#UFquy` ziTjXc>3~r$WQmMjmn`~RkdgenOMq#nooHmce;?1m^wX#?rV{{^Z_WtB^~0vBwvzG?1dcQli7vMFG>OswCF9dFR5Le zg;L2Nm9V8nQ}NrFT^EhxeYo_xOek0G1K5aA|BF&WI{C|_^TN>XQ~2aQ1@dXIzGMIv z$&tEB`XxSDpTwCJ!_zdrUQri-iL676CqX?UJp{d$!GE_Kcwf}T6+Y<&42QB3!2vu8 zl@H&Q9||oOVQ1&0*bAtte3IwVQ-3JoIVl1-wGS&v>A4DB`ryVbw3iNCz5m|WgEJq@ zUcAo1`x*bcu+nlE_5Lli>Jm|W3Xo#v;+U{`{`L02^z9F32d~YZI~PG4ER6l@?dU!@ zbMu!R5S=~yHitji)e<&t4rzk$(cR+__6SHm^Df}sk7xRC!&={DZJHf;zY=Nzg2;=& zN_0^-6}vqo>`gD-ycagMXf4}@kOgd+ zU%oeU;U74T-T(Oo;19?3``;g&y?&VkDFR2DnAGNdTP$vmmGoFVAPq8V6egTF8+wSQ z=T#9Swf5TFBpQp1sM4Fam$2dpalnRQV)02=bz0pfZ?dT! z5W5vbPH!>FN8kywx#wj{6Ec}>PT1-_00vSRvCGZK5wid)GU9F=Ha8$>VasNufT%hd zsoiCBG7^WYjY+b(ojnc*Be6P9GD&SNXPean*gno^-BDAyjT^VLxg0%RP9_FvokS;gpPl0_7;4J~WF?@>K z!$>qANhGFv7sPKvaXa`%NaWrD{Q%ohHl{ZYD5ng@^Y$V8XnxR8`8AfA+$|cYo65<% zv~FbG^|TwAS2J(qUd;_SCz^vLJA*k515NjG%g2sVIco=+!Z{^lWpqx>K+~K8%PkG% zRM0sUKeGl>Df4 zGo5J;Y@#yF!ORuo>+dY1*EEG{_tCZcsFqf$c3-geFr9gLU?-5vTRdu~b1SLrs)1Kf z@lfI_sFEg&(iA{=NI4BvUp=yV)E>yE7HyocPUdX)E2q@y=Uy3nh00w$UNmkP_uXj< zHNHkSzD8M((T#0X15V*xRJQXgwQFuE;=q$iELnRlWiW*@Zk)&tZQ4U`+C%N#M{jDT zHtwhPcTk4TNrerSs>mXd7{wxxw%Gko*eUOV5O^Mo*(D)){NoXF+69p@n#L{&htW76 z>|6Y5&aat$;{OY!h@UD&f=gf*=e20CPV?pCQdZw(^a#6j!W}ne&%l%Vq|Of#(qeZ< z(&G_!W1j?{0(>Y%qnJc#9)KHAH?4_Es{kV~KdyQ9=EA`5)tL;^pL zRzz3_!aZU@US9%=klO%Bcn}NOCy_?dC&yE+r}8}NOZEu*$wcZY`vRU;)b%Cf8n&0T zUKj;@D?NF93cHE-MctS@fC#Va_*^V*Q+}s9qZB0 zKI}C77h+YITXaG!!dgJ*EW-Svv*&sxE!YWB{1=pi`%+@;xdOWgr>)@70jDO3RkCp4 z3qZ&H+drQ9`7jcXS>5C8jdRoEKjXeW_#`m%>z^QCa=Rbtg+SGJ>&LSnUg9)=kOj1c z@D=dY%nwIrzPtwLsh#%Lu%{R8Kewq*8HqG?Qq`Gh*^slXv@bvI9^yx!Ms;qQJD5=nx2$M~Xnu z&gAUrI!2JS-n{a-YQ%mudDp+K~~)f_vYJ$^mjF zMl`pU=(2pCY|qYi_i==qh)(NHb&Lcjf%0fJFFdysGUSAXhw>%FbV`;ZO(QV=R>20 zT#wqa0bXt-VDpm&xg0P>8?(2(E>1rp08^n(h?s?~j13V(n)QC{Ng9?{aB0WLj*pLB z>A2i+#c|m|E!%!4Gq`w9Fn2Gd$eGH@yOcGO^>Ou;m6umuS$lacRk?MdBUsoJ%-TgM zjQ8|e1F9zqOs%0b3xX=sXu(}op@0XQCe^#c#@xY%sT3`xD+s169Nly`Wzm!_gEA~0 zeVsOxj1>iSOXo0A%F001RI&Mn>Z&TR=~^nKUGj|tBBwQJRC;MpQ+BO-`!ae^wd7>IETn5v?woR4)z}EvD3kVPo0X zm?CAhUmDKJ@hhgXaxP_$WRKPbwoGQNg0RsPGL|6VZCptkSB@(t(r>0r;Pk4UL1V+e zZVxpcr5le@4iDASOFK?d-cz*W)MVpn+IZS8pEBf!3`=Okl8~W>Hq-<<$335dY(THr z5j5=l*Hxj0BXq+N>bRTo^wP)KyvI*XHk_snr~R^UQJG&coJDI3!wZ*yFO$|5gcp~B zFNfAH3Tu~yv}Rgs9_#%|yJ}7c^?#zr(hTn%JKr(Xao#cHpbA$}YYqerhp3h#LETZd zQ>bHYQ(D7$)sTwHT^86hsa?T#3rGsuC6r<@rCAK`Z>pfD9-%(`_M1|8^IyMpBRTD# z%MCj;k~`&@J8~tz*T{Bc$$y_K+p$0na=*FF<$r5P1+dX8{PSRKg|K z1qGE|P=4`g=c&9BD8zUW(FY?0m$6h?1rsU7<$V%d(I*w+lUevJ#g!1R>XV+9oR+qW zL5ZSn;#RzkP$szwy;j33vJO5(^|xXD671nU)(2sjeyNyk0`_+s1`RKfk~`oNxf5Qn zxVX`w3X{}hb9z=RFOhM}3%5kD%K_3Xkw~fJMvY~c18yBaz}8zFzYaW08T>w?&%{1AF)7mhFc7Yx8o+}NdNc%K};y4N>Hq5mO zwZa(FDpZ0twpB1?o}%cJ_lwS6@X7jRr_nH%_sPzF{n8Bv@2sLkD}*mDB;1ozXA;;oRvq%$IL*0A0P;kv4`Ceb(jDc zpGKh3{H9`G8K7r@M)h+eqXmta%cj=uN9fI0&!{zG$c01!Y4tE_&#I zOT4fM!L*cussNn>u@9)BQ+a>Xji?u7UN|7)S6!I2`DoIz(+^ftG-Z9#<8Y|s7M)Gy zCM6P6@#wc#*>AbNq&}Glb4sO{Q;bUYiQja6hn5H;f21Yy=hYp=`)C_w&r6AOt%PZb zo`kKGKc|)Qs1${0Ec4oG}Tk5FWyBaQ1y zegR5oWf7^xpem7C1!7^$#XtO#-=_rb!5>c_9hAV4km4;EWn6nsE78IgFS?)AM+A3O zQxpS`YoHho93a8&;r{};hmjn!IZ04@VdNorfpjFp(T-oDjiLau6@5O8UJdZ#gbz** zk&$4gMUYF;H(2FCZg3OCaYl-C>x>e~EXR7<+X<2b5LGJ}IZ3cub5g(!AhK*kIVEn3 zhmjB#4+*0UV`ND5aKni;azF;biC#uzDJP^wYT?v*>bZbsH-2SyQfJTI30tnSw(qa(6A!lp$%&QI_Kv3Ra05{!=6jt5${;R zNZ;6=k<-DfrC(zbUA}))I5TH>)39}T%h1WuMZwG^V|4*tpz71~z@9+H)rRqmVEOuR z(V}71AB=gA^WasGXBd>79e{XF0joPcV0qfOu~?+ zh6)r!X;14i{I*faXqw+Sx}P?bj@1QqRdbjmrB@W#6V~d^Ck-X}J;M!SqTyHQ%+g71 zSvW2I{Nka-7t@gNH`)+T(M4;$f?e8(Km8bIGk(fjl-(Z z1{Ql##wt3!I#5NWFQ?SYze#3Ou$BDQjTYYDR%}kMPnY~IT~?nfZ;Hyr>~Q9$nF*4W zNLY!QolcC3J4zy)00v5uIWT<$5|KyI%MC9^DMAk1R8QwnHLgy~Mt6Lg4G~PLJf?mJtZq2T^ zEN#JJJd!2+GFfnn=n57)6K2^t-g`$*Z`gfz`EKd}st(oZqAlHb4|K;C3KVR?VmnJ= zxHqiHq&0=%^dc&~JiMU7zcakpOzRg-FDSTlYUI?Icic>wciee>a={+|&Z))4SDcrf z)Qb8$3aVxgb@0&S;uc6eSB6+OSB`13sq8XJQ${JuSfQj1xGp?sm?1#>_!B@dfWw3j ziHt82Auq7-OSCUy39%J2m5F=!eWy?K9m1LrWfF*@NXT;e{-4n{FPDMH0|;TR{{fNK#i zLQzO~1zh~TM>rTtbligjn&z+{9FT$SYr^(EvqxC7l%!|ZmNiR}^EeLBQR4n8jJAOH z!C^^MY?ONz@$vS>yf5m$VC(`f;vZJ{Wycgc7W6&81!_1Tj9_zAt7q`&nIj3(BTaMM zgV3|REG`m)d=FkvxfT(r*(0Enfa#4fJz4&APCV5oo_{R)9ryzF)pqmKwCxGj9+q>M zz4B#KgE50H+_MSV25U?gl$2;3BrYj!eo1G~!f|YLe*Zi1I+NU9xaLYJ*ULfJ`J7@d$afmba0>6r^WTG4qW0(>C=HmLW^fa!l@9JL)gHE z#Xe1+?geXGrcVp&jWNoZ(}H0F#An5tK6)DXy4o9gU)22$^DE(c!PX}mj6fo@4bS0y zU6Xi{hLzDUm|sMbT>yT z5#wCXyB1k{`qH1#y8LMEUf4P{-=b%$* z#gbW_S*+H~FFuqZx6}~w}@sdk$@%<-5z2T{4E$p>H*^lvJX){g^vFTTJ{A-Ee$+Ssd4OcHAj<3{BlC&1d-O%kXe#4T1)%jl326Gu8w zIMgLbU;rbvfr69lg(OT0YcXw!n8(DJt^n(uoEA&ww!T4l+1oc@7~-;kX;1e_paF&= z(K|Sd78L=^vY^!H-W!P)R7Q{NOxR07$Xo_4@Uhtt_Fy!qVllCBWfR!q>SE;XPEaF* zL5j@X?XZEG7%EO~K@CE>0QRB}Bkim~mDq;tR#MwQ?*%3;ks3f>_dqv+xeeGjF>>~% z*s^LSnShETTPK3maF=AV`Zz|x*O$?w`#(>$!i9>AD(!^v$mn8|Bl{UQiKZy0ty4ut z27B`YHH=VnZ1*H&(S8O)Mzz<4PdQ5y88lrJ=YQBQ3!MPFc^%*4W2oa zc|Lb2cNh=lo9TS>m?yM!JH2#!XlXOOv^l8TKcE0}oqW@!gChqoy*l#hSaD!QFt3(U zjzvy03aj0k=U9>J#w2LmO z81_BUPVpnoVMDvP&@N<}vs=7}3++XrePSyYI);pR7PX1N#Ko^1UKNImR~1|7imelN zYEMhBqLtQ``;}Bq`MvZaI=%8Cqy^f#`jMYOw|2u*Kc-(>RRZ!7OM3yUH2-rWsjdMW%Q)Xpo~getjYGqNVi+9D2@nnR^q=+Z5r(t5hI{*IE`hX+du%2+aG%)X=; zQGA?sCG&FTmE6m@lzB6%`rhAkO<1Vr{nUZiLI>LE1MO5tCw;(1?YC2QC$+^DEO;FX zI3?PS$~i1{Ag3416o75L;(7@%5jkwAkqb3SAjvC|CN8uKg?3AtQIg$~*HC1{=@nbK ze0DCST>=d56+2L5R+r>a2I|@;Ml3~tu5qxD%Bi7N?4)WMsI10G^(!zUKmpq^(s9W# z;uu>M@Pt-2(kmN7D-Y2t4^b;x(B*=|me64*eb^Z~?4=KTsn+jN%YDK8KKAxq%y>hC zbs3$%>?_Uk$e4_q-ZWC1_f76UG}-bhb;KELaZ%kK=#bZ8ph4F~ zoK^@yXB`l2M#EvhXbTF})`@BLDk=qvUOBa@_DjW01-0oQ(3v~}modoT5o9<9q{6he@>RkQ<;39*`hwBSyXnQ@!WARR zuwmio&btP4xNtQW*ml=Y8ZN9ER!)PFo_a_<{JpWYW9ISH-@5PA{>y1<+d=A}h01$v zQfr;c$U5IL)N+x$bYkShCEti|tT|M%nXcG8;g~o|?K?niIT&03Cl*J78DNTL(5(kk zwD7{h(86kZVRcxaKc&yQxNO*aVFPIMuh#`~g5~Sz>~%r?`lla<*#==tZl73sy%4>0B~aQ1uv-6&U3++(d>_2{ec;EXL+z>H+A&bWI@}o;-0A}WU8S}H6hb#+9V9opy|-C zJeXSB3Ix=zL@y=%ZIO2J>GTmW7v?ubjGk>a%0NvVUq1F4+W!YMVwoiUN}+w4s^a&`ho0Ph}nWN`3H~ zb$rFRQ@=^sHti9m>`zMB5q6IsD?9N(nySWj=kJEA$ zhRjal<9)25iWfNiyqmkvXeWD<7Tr`q_FhQfJZlBpFNu#coK!Y(_U?1>VC19sz(ad^?CvXQb;>xen1D;ZEs85WF6xs@tt zsQMby!0G1Twt-CpI2a#j_5O|@o(da`L$WX&{h#>zys#mg^MCKIZed&?*DnK;-5k?L zqS1m;$w=vF&uBZHT^^AA!a6Prw0)`^-%3|*qO&%KjPq zHnt3G_wV^(!`uQ?g*g+Z&F~)@I7KP)*yGg_ailXJZiskpZU9jX&6+0osU5iCkFc}F z!YD14E*IVdVA{f{Etc1Ntd7VRxy6FJ+AJ0_8})|-WJdA>@R5s9Fd1>4y-dsA&?M36 z9;rt!q&4Kmf;?i8$I4qQt|os4AFpEEd&NYgP zW!Yo<<}h%^n^|XK^Y0pe)A$(0%e|F+n>Qw>LF5Jz!wECG?(O&w;&2Gb0FE> z&GtVhE*GK#H=}PdRy1b2of`mK(Wm#(i|yfYlt^{V>bAAP?_ags;8Jnj*3J9t88x!} zK#f6yv+&~|=q3s$_Oq)~R-&H)F?SJGC;88au#p%1_@8+(gG#~zzkEUd3;Jx}*!$GS z-=gZFJs@6~hoAovegcgA5+%q%FEn7kRjlQv%X*TAAUfi5e+I8PiAW@RB$kV$kJB)b z{*PGdf5uY(h^74z)BF)j`6H(KM!G{L%728R_uNq>CRGo#-j!tDlVsb zCFOF;S5h;q3TYeq8^hYo{S9I5y8Z@6ySl&Oza?R^b$^ns{F8LqAF|2!*CHUwu{=ttSf1gu&%UC%DS>P8S5sr$yry?reNK~wnWxdwkgF}1BOe^V7T;} zn4{j7M5u7t@gfW(En=VChzxDos_&ja!@!x24_9MtQn=PC~kk&R)0O={#;~cXlf`))TIFzAg{G zyzt=i*}0$i=YI6b!;j9)ethP>?+40}n7-;sdJ&q!eaC?K7fNa&E_i4a!e$}?D75)PejGd!IO zS3M(MOeDt}3Q$TiP*vk8KvjdQ;jKL>DNbcl>Md@|Zj#u|F~7GOMvM}1V)pD?v%YI{ zLj!Xchhlg!_v0VU{QBZwZ=Z!UbMIVdlUJFHGq>KIefQGrm3L-7KMOpZ9sDJi4-(GY z`su^J|7iBz?;GdN`DVVjaQ}ldTl?Ax``r6)&)oXu%*4+jdiLD=vu}TX|H3D8V{gx$ z|K;r9t%sv$9(*|rX=mRWdf@+*EvK3-=Q*h!4h+q`b9L4?_;BF+e;Ii5{`f!5e)0j( z0Fn>Se>QvWhxf034xanpzbOz#75w$ZnTgLk2#dRyB-nzMn~bymAB1C!GZXI`n=Ho& zdw6iX_r?9&SJ`yzi%B2}$mwWqXBUfbe@VfW!2jbWc6soMyqH<&O0O6~;pvHCUMYma z6COem2%((p}vJyPQrJVeTNISZKP%ZME2=iU;P--njYT)62kOZuE1lzdv+y_KP>? z&JM$jTR<;7XIy4iyTxvWWjWH6Xj+Kq7e9s`gBo(QqURPnC%>Gz`Of_dpUu7d({Re4 z{=@xS=kEV{`2MYn3yKUg?xhNZH3kagSs7jC0+P`GR)?7cZa9ra9d@V1U1oIlK*(XS z7uOjdeDd@AxBl)S@bt!eGhe)8GS(9vmR`HtWpq1@m8M$L0%C6fBcPLrXVZK5v$GFw zyeZIw=FbjYo%!`BRCM-7{{(B(+<$oI!Hu6lg4m*}B1JV9Ynk-81DHuc(UCF9Bt?xG zCMjy*F}ir;fzibFDwDF1=}b}-$C#9bEoL-k;2;jo=PSV|qx`v%uM4MMyQRI0U^1R= z689m~+zF#z$PymAFb%pHl@NWfX(M%g{udpQ;Nl*oD$?_hcXM1 zYY^PUD^WN^$|@k^hQe{ZhBE2ik=Fk57$ZZoz94 zc0eHtv!Nc1d0=-IAosO!UHSo$;gU{qe}Xr`EA~JR@X2h#&KDs8@LBYh*qhKH&P1tX zkV@FkBB}U2%%+XR@jhI7T_%((_X1`_kpD#~A)WkX(s^K9_ba?|uLAkhSbrj*io|eT zB?A(#tY6~DjN)l3U$2M@s6^H&#uK2P;U0osOXt7a^t>i?FlvQtSm(lf07W(o=mX;W;S+EVU0SPVT)5UHah0EwqsiUcLX`_=7Va%w4?B zVf$I%yRgi10QLSYwB!;Ia0+;0_Tsp(bN==A;LPn0=7z4#ojVtX8Z3tW>+Q%UID7M# z915K~`!)wY*`*S8Z4PFFg_I_50r+n!A3PLns1AnwgZAeOt^fw}o_@-5?4wDikIhI2(G1rsq`= zBenEdT_hTdjHuF+yO*%w2yVb`VPyGYWV_MW0di)YM^$D#(PeG#GIm*9Mo*%#1K_#^ zx*WZBJ0r0;PBIDYPDi`N4cVQYoltc~h7-qn!Etq2dWd2ri4b(uZiJQ% zMvCAelj82~iQZ0AxLq4Jw>#~<-3}%Zm0~8HCtQruLy%4v=TC-B4zMi&TQPi!*}zCN zA4w#mdKSfQLowU;Mo8q^0sR2mQ5L2%3@WGfhV!;z+gM&eU->nbk=P>|te?)#y|iw0 z-SyNP8CNrI~Y!QrT+f-N!g`>577s%~GP?l7HkcyK3>%UwEVqjM^$tg69RQ1MV= zEmTRJNvZQ8Jgl67s;?ehJ!bRgQA;*XTBfqM`;^nFv~#Zvy+Y-zo+zBqPk8S%2b*4_ zn_izo2f}8fxoAyw9_tBeLsEzxn z{hgG)Yf527r7AK>BtxC>8#d#GPtkZn?xRh0P8Qj7y9e2l#+B5KkeyQVwxU|^ak#xI- z-PkL^rD&Yv3cf})uwT1{9oZId=M&26Prwt=AD>r*MM^y_sM{iOUNPUQ=REN>6PQj*Hd^N^(VRo? z&iWE;1(DY9KCgmr2VkS$05%dv@^_l;dcIfi4Bi(r{tNk%BQBs{0rSB~iZI;L_~p{}@3xFRddA12C3axc)473?nTiFDS-^ zO9(;K86z!xi&cwW9Gd<3gIWJDYUiMDc9?Cv|5MP(3h^AjxY~2PfYydt!)6IHYwY&y zZQ$4EAfplpQ$2@Qz`Zb67wWb=vJz9f>kZ`e9Jg;{p_iw6FW@a++BO1pi;FFVQIwC<_;XpqpZz-G1Z9vYV@ps#YqxIcNbxFItb&kXXq4A!R5jgZ@ zKc^;)_{6c|G)CHKcOE0r;y^A#FAxkdfRluSEJr~T3QC=D1i~crSlf@=2}X+W8Wcv7 zP7*X5PSVYY9X$wWT5xxtn^EwifJyEko!w|WKn=Tlm_+1-M!GFbf~5yY;*1pU?d~a7 zabOf-2qum1ZnMw~7o%Z&Jvt_-w+9tt?yy@r8Bqs`^v{eGWnmNuzFHj}PR@r$4Y?k* zWdpoiNWkVN3vxMNiZ*6XS$&LtL;$8joe(iITNx`NhScf<*ppN&H~-R((H$QjyV7~N z^NRhlom#&APDWtqosEWM`nDIgq)FQW);( zG6$2MC@__pQZEW58OQSPCKU*Huz5it`$GHW4hFa{092zVP-(S}tMipjK_$&)x;yE9;D{MYTl zrlWMzQOfS7di!YmNy>AIwx616I!zl+`{dL5yr6y=tzQ<@*Ux~&j|8+w*-oL3 zwNGpG=aYt$sGQ~gO;ehcY`1`dg>AC!*9PSfj9s4TNjek{<%!Q zLoK;ema!v8@_V&xN2dJuIkFu~l5%Sfb#-$hY#TgfpZpsuu3Ida$QhR z*#zYmpLT)DD~>{p2NC@+LU0*Nl~pj2Qe56I!4>^dAwH3X-%?x&@k#yC(~{HD4lyWE zRL$Iqw-L%DYtd^pyu$0?LsWk&HXy+s-eY|bhUu4rStntCw_?!nVkx-;K9M`&1&fOd zov1Jgy;g^N<%(h%x4dvm1iKs{%@T=}N-oq`b~)hI0R(J4MX~F^voyi)BMKex8o;KL z)#uuV+9*SXALPYq{~Fq`;m-1@;ot~fb@`%LJm611YCkT^Ej{@;dltM(#`RD2xmrx(RZyF zoh&AVyB`L^HBi}t=rce|^!N`WWR-slH zqFRMY&_uTirp!|mz48Ik*$ZCTfb29H=JI~o*^k`emcw!?<#-VN_DWAHI*_JQ&SPYd zRG*(i=9S0fke&TXiq3Uq?Br4Oehno&c=uPJfrNwd$oQBY^z#D*Au{%)JFE^90OQjL zG?L#~1ha(&bSB<|w16xM=D@E&2F)S9toYfYF>?d}$Hl9T9+LyNkfh}#lokzn*~3Pm z%msrU3HxY9dL;U z79p6HQcxA3Lm>76HFOH^kGNs=g3JS_L;R`>lQs`cT6X%uYKo?;S9%-{bzGveDcqz) zVk#c__A2`=$D7bE6JZXi6my7C>0a@h&hO9?LF5m&ME<(W@ZSN#Xw%MT4QGw)@TW~_Yl7Of zw07<7yG|j)qpMJG*B0W&HK2*45 zB2)|ZUe2eei5m?XJRUmecEY(*?T&nR^1MdwrtmRNeXV;qoyN9A{)+ z&74YI9!y
>c3ypu$)-AA9objY<}EPEEec)X7RDOWc)mY*215^Lsb+0&y?Rvw&zfCwElte_1m zzQ)oM+eCxgL+P3DQfH17jjg7$%I~Vn{Y4YClMVEmrn}Wm)LxKb+bR4web|0?ul>Ii zZJw;X(@1YQM715I%$T=TNp1TKnq6-BI)2r^l~xv7RPNguT56(oOJ)}3Uph5xG+9(uDWxu@6s4?C(hgh~9yH7lpnd!aAQ-@5!iRXq z7x9o6nD@oo7qPh53falTJp8`XFZvE;M;%(hVU<8RZrueWBP7EjhcZ5q>laZwbR0An zNef3kQP?BiF_BFdDTDXn3(!VAD8-Mw0Z0%GJmYdIp3x;1w=AS`hRm&czB?e6O$qAwrw+O*=uTS&%XTL{TJUIlb~{^8g2<7_SteVw-S`6KTM;uos~y zB)kGHe%~z|48=R{!2wNk*bffK!0aabGZYffun4EBvx!3LOi2pWgx%91w=FIjYq&c=XJX1nH5cIp#s= z*;X182|>OGuP0v%i`489&`H4bhNzw_dpakc;uSAEmi!KU0pn_$>1o=wIBO5fIm}l1 zvZ=wGi02|^M$KoR7t}G{x`WxO`P>ngA7eg=>rXFOi=LYw)+b?wx2=fK=Y5ft3=F~I z^J*oZHsX@!yb6%(eh6z;V$@hh>c5!h1^kFVmW6|KTn+b2!d&iGKBIm}%!qlsN?aQw z&&BWAIBkP9rW;C%HxA;Ll)A8_vuEKrHZs5eop^HkSv(21G~Sxk4@b*5Tf4YlRqjHb zZML<%FXGzJp-apYee4|7#*Hz_Am~-NxA3vu}@XmR?S?;~%TSArHiul{eKq2p+ry)DHO zS)Ez5*32h9nGZof6<~Os7JWdgUL~$v>BQkrWCP$A4lqtQ43J7O#%mpt`7LYO`jRsr~iLnJ~LAD+iyWM%h z%-LH+vK31}lZdVkF$(?)3?p@eaRu3rD4#;F(};Xh#2m)xNn!0~P(fVv6h;>)UhH6< zEHQP}FrpKTg1>(Q1t_{LeXOn%42ZjW9mnBv2~cDbIAd@)ko`MU3X1a-029zL4Q};- ziF5qc6qm=CmPBs7z%?Pb7)x5&drwRPYYyW=m8gHOXjk?L3r4!H((gzG=pgm+>VhVLy^b@ z97cnR0A`s{YINz1L<=gTLv|+YjUZ$$1Go3sYzTWWYE-ePSh%kVY;kroa#t6qk-;EE z=IXIqK}`%5C%2#mAzc7_%ZHJ6)u2jjLv|}E?V$Gpla_D|ps#zOo50)#Y@8T5drfS4 zHIqm{#gVNO!D_gRGgy5bqu}ey=+HHwr&{4eMMji%!gyq~(aDkhjEh85l+)HFg+~T^ z?J1ER1FScVyN?B>5QtZ7Hb&16N>qAOd@35EF>xxsf~>*05)m*XuZz=*Cw0}B`9aAf zLVz27zy%z=E|EkD;^+?Hsu-$(S-7M1XGrIwLF+Vt&43y_V>sh{ z&T!5M9?Ub*d8To9uwpx1u{~JPLRYi|wEG7YV6KyAymWB%;H6hbUmY*Xr1q*l4g}bQTEy3Mpdbc^a8>e@JKCgq`4Z6NQdN(p#`3A$j zC)z1~#5rte7Z=)vOmlXN_i&-TD6~&(;X=od5zmr#F_^gclq0nvxOi2*l`h{pX`}YE z2FlxLO_@(gWtZJcE2PsZ?`0Hmrtjmg(-~F%d{A)O=$h@p6$j}R2Lmfw>FU-%M(ea; z(WT_kWU6T0gk@56vwdR!Wd3CR?{@uWSFpa7u5YE<4%79oQd^EtN1zeTAkj+`z0?~g zXrhn8PeScHqD`oh%QhoxqRcJgP>Cs6vV|_$5-e$;OB(JdseO2$grE$?(}t`|ic!VK zsaG;CXI#m-oI{y5qpI)yP4}dkYS~X6crAFKgFetfb#~DQtkixRWphwloPqq;p@37O z?Wml?VmosBz)S(y)+cU|027hJhMKrglLV5yB5CGAyHIGiqy;6}EqM(^hMhjKh0AB- zQraZI;6AY(MP_zO9;KtMePY0pb?2IfnyBm=YUNIEO7!(g?I+Nk1nfxPvUD)*i`EvPP})rCRz>bvUIp`7xsF{!pi zRp;$D6pQ6=jq+=1MBYVU!m^n;+CHyEjW(An2;6u=lppEu&A^<_bQHLb56+yU$Z zyPF+DJA$fQT9r%XRZOWWr&q7}(s0v2ZQeJv|Ik$HtJD!kpw&tBxS>N{hk*uN7k1hp z1f6w2v>6SD{h}=>R97#iRkc(y6uoM?w(d*CO$D{-AkdjQ0+%t!z!79P2BdmnECH#o za}+|5xlObKk!po`38B@EKx!40TtF4BgkzfgwU4ny$?JWpP}-ugjJs(?p@Nm8%8-8X z*v`9pQ>b7y7ua@JUlJ;)8BxxFk)CQ;HS)djwd1CVl;67U)cwn8YTH5Tpqa{jZAxRA zPR~5wI^24ZymVsp#3k>jcf2K7zL_rHJZYaiO6@y9Z8;cN1Sb|p0_k9irq`|qQ?$_H zg5ct6dU16~mp84;ytsVCb72E$^RL(Yvjb)8=&W@C-TLXgg0YsdZP!=$lLDn{>B6;v zymixy3dbyC`>yN8j|YnD=p}W5MQffUpy3MxYhH~NEPsqC3iQ6l`BW@_anM*v8!JP) zoR~5)ukE6<)&z8GBjl{t7ST&;1B+IN2kCqwmS6Q4ljZAuJD#Ypg0k_QfyJx7#uC!k zk0y+WMhd6%3()M3T>XI=b!SDOcsF(Q*i?S|h~l2HFlelyjWt2zYT76a(SY&Lh&+_H zIG9&O=T!ysYUn&<9q6T3HU;ut8IgsSnXa6=eCo4fzp{O53oP3NhiaQfQ$kBh&@}jL z@vllgEeR}HHcv6zs=Mk{>^u`| zcfa&JTSn`a1$C=v9WsH;p=+DJ((Rg;Aqv2Heno*v6Wq{3Z)l;`@24^ke5E@0%{snf z+^OG`Y@6nYQuZgM?1(OLf!PcaUnTJW_!pqyJC9V+5Qu$jC`HT@F<|LO>d~05C>S@F zU@QX0f?%PARQ7PwGb}_y9@v6oA28a8V^qVLFY>||*ia?$`Jgr`-XC%Klpx}o!y>K> z-Mz)docaZB49OiC$i^)kmO)qzVa}9Mf}^k;!rZMNF_<-ip$jr|fV;V2!;jPQBn+9I z#KwDBLlqBj_<1*XpV3bCCM~+Dg6zGJz>B=Iq^N=n`YgLtrOZAt3*j z&JRY>b9Dog$c7Py;=H?&xAT8qeRTJgV;eWHvQDvry?>4@9o~SSxFN!jnZ4b|8A@CP z${cu>xO%%GJyPv6a_+vVi#&rqH=rm*7hE=H@hOWrD^Z;6n6nqm)=@X|Fpb4$j1qz8 zZo*Ak+dY}}0LdaU7HhS@eS&#~xsZ+902b2df|>#VFF{QyttlOE2xwLe%0p;7@}1_$hhVZ+FhOQoZwW0p}2bk25~Sx(CB=fKRgxE8-`^eIQl>F_qic`7U%!oUG3tSK#oraCcD|jk3?hn zW0KL5vEH!`I;+et`-Npf!gZY;>d7J00^#o}u6SMc#F#$7H%+G|qEzYqZz8WLx)N8$vT;z`P4S;2ghs7RJI zzHc4_XQG95CO7}C={HS}QOx{aQJHMnWZFE29QR}7eBCBe$cpE68%46R@$Es_p32tE zV<60Hyq+Z)Zrt+!vuYpc+7tMx?X%#Bs(N}yit-V%i}7D>V`PgjedH=j7)~? zFx2gy(u#KYIfV)f-feYMpr3=PtgT#G-fVGfCCh8dYs-qv)C~hoA{2so@Xl1%dP>lvMYJv_(H&KhXRLWX8g=CV%$C e;XB#Xek*130KoKe08vRU#-s+eRsRRpcH64wAO|{j5${HB%@JP>b zn(sc-eZJ33pB2(OcsA4>2)nWGq&OcrBFWlCY5T+X7d z#UcsRox%(vYZ&5LffN{2GH9~;-C``d#gUepSW;S)TC6wuAWJEu=VWtMJ*gGR7sZS^ zSbBJGh)7KLp6I=R3gwF;dYemG`xrTNfMzr>eBheQ%`U_Gfrmjre6kjMdcFQ9 zAx2iU&uqM`az(sAB}FVi;uc4Id@j&8@$prWIQ+YH>d890IDXEtU=;V)cLjB*oFr)N*hzAmPAQB3W+nBGM(y)O(v f;SV+pEIb`qmzX6!vM~rNT~MmMEL1o72IndO-u$MH delta 529 zcmZ1}(j>}znwOW00SGL!_N2K_f%JdmGQoLX`VBnjgbgX~pMQ26DbpOK%Ns-KvWo0+F?WME`s zU}0dU>zA14TBK{LYoQC$JoyTfsk1#$Lj%JdA;l$1%heaEFV|V9v!Z&3)nz5O%R=rA zzIO!VmoP17U&y|kXCcpu=pC|`72GcicrYGO%gQEKsIca~B{`^kG*^aMIsdU$V$NKE&h=)HjB zqKM{ZR@Od74lS@#K5$H)%_hVAfM0C#9=7yAtxrOXtZJXxI9TP1c!3IvSb)SWj`;Xo zVA#aRR|OE^QGIAo>F1^<=1s0*mz_L|ee2|Cjth)dlXW;{IXz5}h2#DJCJx gDEGmLfrY0d>k_lXM>Ymwr3*?WmxW3v*K@7{0KJ-)QUCw| diff --git a/src/data/choose_frame_spatial/DBSCAN.py b/src/data/choose_frame_spatial/DBSCAN.py new file mode 100644 index 0000000..c1d65be --- /dev/null +++ b/src/data/choose_frame_spatial/DBSCAN.py @@ -0,0 +1,142 @@ +''' +@File : mean.py +@Time : 2024/08/12 13:53:36 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 +''' + + +import datetime +import numpy as np +from sklearn.cluster import DBSCAN +from pathlib import Path + +class ChooseFrameSpatial: + def __init__(self,eps=0.15,min_samples=10): + """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 + """ + self.eps=eps + self.min_samples=min_samples + self.description=f"{str(self.__class__)[8:-2].split(".")[-2]}_eps_{eps}_min_samples_{min_samples}" #此项当没有新增超参数时不需要修改 + + self.file_path = Path(__file__).absolute() + + + + def load_state_dict(self,state): + self.eps=state["eps"] + self.min_samples=state["min_samples"] + def state_dict(self): + return {"eps":self.eps,"min_samples":self.min_samples} + + + + def get_specific_data(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + """ + 获取指定时间的内的光谱数据 + """ + + if isinstance(measureStartDatetime.item(),str): + start_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 -1000 #测量开始时间(此项目中时间戳均以ms为单位) + end_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 + 1000 #测量开始时间+3秒 + else: + start_timestamp=measureStartDatetime.item().timestamp()*1000 - 1000 + end_timestamp=measureStartDatetime.item().timestamp()*1000 + 1000 + + start_index=0 + end_index=timestamps.shape[0] + for i in range(1,timestamps.shape[0]): + if timestamps[i]>=start_timestamp and timestamps[i-1]<=start_timestamp: + # print(f"开始索引为{i}") + start_index=i + if timestamps[i]>=end_timestamp and timestamps[i-1]<=end_timestamp: + # print(f"结束索引为{i}") + end_index=i + + + return rawdata[start_index:end_index,...] + + + + + def run(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + + rawSpectralData=self.get_specific_data(measureStartDatetime,measureEndDatetime,timestamps,rawdata) + + + rawSpectralData=rawSpectralData.transpose(0, 2, 1) + rawSpectralData=rawSpectralData.reshape((rawSpectralData.shape[0]*rawSpectralData.shape[1],rawSpectralData.shape[2])) + + rawSpectralData_normed=(rawSpectralData-np.min(rawSpectralData,axis=1,keepdims=True))/(np.max(rawSpectralData,axis=1,keepdims=True)-np.min(rawSpectralData,axis=1,keepdims=True)) + db_norm = DBSCAN(eps=self.eps, min_samples=self.min_samples).fit(rawSpectralData_normed) + + labels_norm = db_norm.labels_ + n_norm = len(set(labels_norm)) - (1 if -1 in labels_norm else 0) + + + if n_norm==0: + return None + + max_i=0 + max_num=0 + for i in range(n_norm): + tmp=(labels_norm==i).sum() + if tmp>max_num: + max_i=i + max_num=tmp + + + selected_data=rawSpectralData_normed[labels_norm==max_i,:] + selected_data=np.mean(selected_data,axis=0) + + # space_intensity_mean = np.mean(np.sum(selected_data, axis=1), axis=0) + # space_num= 10 #空间上平均光强合适,且方差最小的10个点 + # light_indices = np.where( (space_intensity_mean > (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean))) + # & (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean))) + # ) + # if light_indices[0].shape[0] < space_num: + # print('No Enough Usable Space Point Found!') + # return None + + # intensity_data = selected_data[:,:,light_indices[0]] + + # # spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0) + # # top_10_indices = np.argsort(spatial_variance)[:space_num] + # space_selected_data = intensity_data[:,:,:] + + # space_selected_data = np.mean(np.mean(space_selected_data,axis=2),axis=0) + + # space_selected_data = (space_selected_data - np.min(space_selected_data)) / (np.max(space_selected_data) - np.min(space_selected_data)) + + # # Savitzky-Golay filter + # data_filtered = savgol_filter(space_selected_data, window_length=31, polyorder=2, axis=0) + + # spec_indices = [106, 127, 118, 0, 52, 23, 113, 77, 157, 175, 195, 24, 218, 108, 8, 211, 2, 136, 11, + # 36, 129, 109, 153, 188, 200, 196, 7, 19, 6, 198, 193, 221, 156, 187, 162, 204, 85, 104, 41, + # 16, 185, 125, 14, 149, 91, 138, 72, 146, 35, 53, 190, 148, 75, 18, 17, 96, 167, 192, 201, 31, + # 158, 183, 32, 40, 123, 145, 161, 27, 209, 216, 101, 51, 147, 58, 182, 49, 119, 13, 179, 140, + # 105, 45, 55, 33, 73, 111, 97, 194, 121, 89, 38, 12, 197, 173, 160, 131, 141, 37, 208, 47] + + + # selected_data = data_filtered[spec_indices] + + + return selected_data + +if __name__ =="__main__": + raw_data=np.load("/data/SEMS-model-training/dataset/raw_dataset/test/24609591.npz",allow_pickle=True) + print(raw_data.keys()) + + print(f"炉次号\t:{raw_data["furnaceNumber"]}") + print(f"开始测量时间\t:{raw_data["measureStartDatetime"]}") + print(f"结束测量时间\t:{raw_data["measureEndDatetime"]}") + print(f"时间戳维度\t:{raw_data["timestamps"].shape}") + print(f"原始光谱数据维度\t:{raw_data["rawSpectralData"].shape}") + + tmp=ChooseFrameSpatial() + output=tmp.run(raw_data["measureStartDatetime"],raw_data["measureEndDatetime"],raw_data["timestamps"],raw_data["rawSpectralData"]) + + + print(f"输入数据维度:{raw_data["rawSpectralData"].shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/DBSCAN_data_augmentation.py b/src/data/choose_frame_spatial/DBSCAN_data_augmentation.py new file mode 100644 index 0000000..5ea220d --- /dev/null +++ b/src/data/choose_frame_spatial/DBSCAN_data_augmentation.py @@ -0,0 +1,102 @@ +''' +@File : mean.py +@Time : 2024/08/12 13:53:36 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 +''' + + +import datetime +import numpy as np +from sklearn.cross_decomposition import PLSRegression +from scipy.signal import savgol_filter +from sklearn.cluster import DBSCAN + +class ChooseFrameSpatial: + def __init__(self,eps=0.15,min_samples=10): + """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 + """ + self.eps=eps + self.min_samples=min_samples + self.description=f"{str(self.__class__)[8:-2].split(".")[-2]}_eps_{eps}_min_samples_{min_samples}" #此项当没有新增超参数时不需要修改 + + + def get_specific_data(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + """ + 获取指定时间的内的光谱数据 + """ + + if isinstance(measureStartDatetime.item(),str): + start_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 -1000 #测量开始时间(此项目中时间戳均以ms为单位) + end_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 + 1000 #测量开始时间+3秒 + else: + start_timestamp=measureStartDatetime.item().timestamp()*1000 - 1000 + end_timestamp=measureStartDatetime.item().timestamp()*1000 + 1000 + + start_index=0 + end_index=timestamps.shape[0] + for i in range(1,timestamps.shape[0]): + if timestamps[i]>=start_timestamp and timestamps[i-1]<=start_timestamp: + # print(f"开始索引为{i}") + start_index=i + if timestamps[i]>=end_timestamp and timestamps[i-1]<=end_timestamp: + # print(f"结束索引为{i}") + end_index=i + + + return rawdata[start_index:end_index,...] + + + + + def run(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + + rawSpectralData=self.get_specific_data(measureStartDatetime,measureEndDatetime,timestamps,rawdata) + + + rawSpectralData=rawSpectralData.transpose(0, 2, 1) + rawSpectralData=rawSpectralData.reshape((rawSpectralData.shape[0]*rawSpectralData.shape[1],rawSpectralData.shape[2])) + + rawSpectralData_normed=(rawSpectralData-np.min(rawSpectralData,axis=1,keepdims=True))/(np.max(rawSpectralData,axis=1,keepdims=True)-np.min(rawSpectralData,axis=1,keepdims=True)) + db_norm = DBSCAN(eps=self.eps, min_samples=self.min_samples).fit(rawSpectralData_normed) + + labels_norm = db_norm.labels_ + n_norm = len(set(labels_norm)) - (1 if -1 in labels_norm else 0) + + + if n_norm==0: + return None + + max_i=0 + max_num=0 + for i in range(n_norm): + tmp=(labels_norm==i).sum() + if tmp>max_num: + max_i=i + max_num=tmp + + + selected_data=rawSpectralData_normed[labels_norm==max_i,:] + # selected_data=np.mean(selected_data,axis=0) + + + + return selected_data + +if __name__ =="__main__": + raw_data=np.load("/data/SEMS-model-training/dataset/raw_dataset/test/24609591.npz",allow_pickle=True) + print(raw_data.keys()) + + print(f"炉次号\t:{raw_data["furnaceNumber"]}") + print(f"开始测量时间\t:{raw_data["measureStartDatetime"]}") + print(f"结束测量时间\t:{raw_data["measureEndDatetime"]}") + print(f"时间戳维度\t:{raw_data["timestamps"].shape}") + print(f"原始光谱数据维度\t:{raw_data["rawSpectralData"].shape}") + + tmp=ChooseFrameSpatial() + output=tmp.run(raw_data["measureStartDatetime"],raw_data["measureEndDatetime"],raw_data["timestamps"],raw_data["rawSpectralData"]) + + + print(f"输入数据维度:{raw_data["rawSpectralData"].shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/__pycache__/DBSCAN.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/DBSCAN.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cfaf00e71ff1e148bc1d2bf6ea31f27fa1f2452 GIT binary patch literal 5946 zcmc&&Yfu|kmhNt~-UxwW3n4c#IFN~)2q7LOCL0?JcF4mxHs07nJZa171|;-KcWYv> zG$AQ77856i%Fe*%F(I?FiBoZ0xbh-#X4bX)$5mUk)g6%%)8nb~)FLpyhS*azRr#@* zbGvDQF-hgeRP6=b$2sS|&-u=|{ogj51%ov7ZM+c=YLPqsHdK~e{h#_k1G2$43J%Ta%(->oTpHN5XF|lUGIB*|^aT0;C zy6`4#n$?mqG3E=FLJup*6|vUvmQ@L@_WnxceOMVvXhyb(v9eaCs2tj_V{a1ccsAe_ ze+SPFypr$Wl>*O!cwq|7bbH!hp1VMP(+`$6X*=n$v-WV+?W3cXwrXvK+G?~}-Y3;Q zJ*CPCY}V)-uaxw8hcS^1pwJ&ZKy^*84&t|>OrgdidJK{ z9)R`)#$sR-SP#J%8T>R3c0ruRdPqhGxt<{*Hy~^1F%B9$CfU*6ACL3wJ}wYtyAuH+ z6o>?MYX;Xt9kp+Lsf?Y%Qy5)frwC}RXg!7rNLEQHENsx~tEG0Q$X?x8*u2WoYK*QR zL#P(OkSY9iu-+5_+O3D#3UdfHt(Q=&)wJH>QjM}&YPt<$6lR7KFeXoe%)*D~7fzpp z8(X|MvGCz97eGDEmEI&R8l>Epi^Ot{>Kl3~2GC%Wje&mH47iU!Y!cZ2ZJ}51jVDhE^`}W&!zrkx| zJkjS(L{vh=`N2&)&X6%uA$00Uf^UiO$Q?Zo~LP9&nF@wLAElWn_MU% zfKGGB7-bVp_kz)Yh$(P_k4KUME1MMxTz0=lCzCuI>E+O2a_Ag+blI@7Z`yoJKnVD{ z_wMiZM&k?{@d{iZ6br@rd^{IanL$N&=w75eG_PupPgPW~Of)xi5NLX6%bi+Qt1;1C z@kpPCv1J41*qkBHn&!%@Gvutvb&c9OMZR17c5#;SP2N&9rB!)+-K) z>JYc=z4Fgj8?M$|O^ZM27mtO+^03l(zP#d%{@3r$mDfq-b=mUz4Eb^~LJ=&e*$Zl* zx}x8z?WDk)AkDi?%RygPXa!v##5}sL)l++qSgm^q&Iu#Hl?VZW<rd#Ri*LWp9N}JlZ|1 z-|3#{AMZbZIOly>f`9ui(X>lle8H?iumCwzu)GOOC~Pc+hta0iCC#t}r0+`7oHjQi z6kup0K-vg`zJCq|*TxQ8wb@onZ6ypb1bcuS!kM)G|9K6tytEO;+pauAIs0D<}g30Br~ z5W=t?iwaAdAa3H59NR4fIH5!9?^vPkjWGq9r7#jN8@a%9$REgd7@8C40!y~BF=kEE zs`5iIhCL~p5l^k+Asz+y;8|9Ziio9stN@;a4TgF{K^o04RJ{S5+#yvBWAR7ec!sgR zTCgIg=-i%j`Xr|>>-2wg&#ZOFHL5yCc_qr5qjpHt4zc<1T=SDs^OM=;L*n7SEY+W* zIEmu2lprPtC2CMS^}IwqpE+>Ny)EayUvl4{b2m!vMzQJ9TvM0S)Rk>|Qar?F-Mu+? zLUJdvZcgN%libgVLqC(;KO5bDovO=GO%m0VrJ8e8n?$u`srD-!64iONQKAlH4$Qf0 zbM6Mo-H>%R<=nd^_wKB_?Mk)e-gkAArTmYA%HTW-xD1??1mPh8HbiOgJGnh>rzV3=os>XURx`|ku^Owz_0Z^ z)=2-r@rS^-qz$Y12WMTfZduR-s^d=Tg~B5@y!lRU%AkeFfPTg>ic{7lz+H4iN)if> z-IPh|rA613YbkpH-;PMoXEszJSya6{L$&rc@D< z52F}iUDAAM+Lp4VtOFei4pXV3v?XOrThCSj78{_y1@&j(Z-h`0IbFQwyeroY3e8k8 zgI_RdJ|kV6Dh3QUcd2lTFqf->?68NB$-v1FFPjEfHo=6Vyb=j5L8SgIghNdr<0<9p zp#Z=IeB~4Hhys*k70Bg&gcw9DA?qM$hb(ZCL%O~iB+9y82-l5~zzH@2A-^udf*8*V z9Bc=JS~4n==z;68E2zzRQ19(1yB}pJSe5O7$=v{*Fn0i1hw`4chWJ~Wo&rLrq`C8wr zx)P8|J2M72k0QsI{}(&w?kc-pY|rTDN=h?Eoede=atDr;REQ=@0sbxz(bu4)|JuETKnp?;c+ADnGmn-nGz|~Jr1GdNqUmRo zfr!fCTp-FJgXfS%E9SvH0k!uqWmv`Ka|o7|tOP1Nf|%3TEyAd8xalPHwKom8e)mmF znZEwkgS+$~^Ir_- z-+gJ}cX3E-N8pzo4gA$wa-H4!^O)V5Qy7iJo}* zSKflb*M28jBh{`AapU)|!gmKXgjD%0N%ZZ~f*5B$zm4H>$VQrG;z63`P`D)P!MYQJ zTnnn(dDwP;dWk literal 0 HcmV?d00001 diff --git a/src/data/choose_frame_spatial/__pycache__/DBSCAN_data_augmentation.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/DBSCAN_data_augmentation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00e1d7284afdfe34382482b89d9d09a67997a294 GIT binary patch literal 5417 zcmc&&TTm2N8t(4txx#Q65r(meppbP;U_eBaY$6CKi4iZjYS3)kPB$>a-0bd&fo98thM`M)xLCRNDbR-s!Xi|@)QzNm8#U%y8r2>2SP|H z4_mc|;#~gg>2v<`o&U_=?RFamCHkJT^I|rJ{R<8BVxH~X`8SSX=P(v)$5@;t0(d*F zwnV!DT0?*g8rzLH;*kMU(A;iT`^JDJXl=LR7=dlUSkrNgHD4q&i|sZpgSB+8!Z2PY zFwWq2>MrM6*2>y0Wu!IiAeY72{mbVh^xnnU&PA9dD##<3#b$DL)?NtXm$CE20v=dm zvwwn@1H9ay;5mS|1o8aEP%S8n_{jtLrN3C-rhiF$E|=>sTVNiB`mFUC>a)?kTqHG} z_9f~HdsXUi6FOBUyxC+F6yhv*^P&gdPmY^&ew3vk6y>fk@fF`UFPJ8- zsa&(xQ@P$#wWheLx~`_Wu6muVz9A}hg?UY@y^9G&xKL;DV@#;iR=Z&#!TibO@AO$~mS`t)hrG+NMBH?G)sJleL4>*P5B%@+{_=84X5fbQ@G zM2-i7scBPNV?(P~+XAhgjd$!$F^tb)90n%`YbRJEiyz0~^bp6fc9JzfZR{c4CdJX% z6%GsBW}XRhZ4pNFF#)e(of$gG$-G=FnRn8to_vGbCd79 zFge&i{YwAsH=c$iz5V_`>dZ5+390iZU}q-ZejPUI*2g2Mfj^_YQkQ?~%GJsEQ0nY3 zP*MZ$-@5u@>cl6Ya%=3l)WFlXFOO;RX;U^72&hb3!1?rl?%uuoJzk+0X)Yqrik%Mn zLbSjHBLPmJTVa6>Kvy#6Q5AzA@`{zFy#YoLXj(A}k$_KBGFXT*-WL(kZW|O*-~t_d z7Wm)+B{2bQ0(kwt&BHPx<7sQ&)>a-2vs|ED=I7 zr}bF_I{keX5k53-of3ww$UYu^1UM$1DSYs2wUswP9{kbOSg`1HhepI=iZ+__bx*GX9v=_uiXl1`QF^J$I8oswr93jGSV zMX~t=Uq}#{ke5?PpU4II9N;JxIM192_n~3|SOM^Wo)y&~8jJw8bTJW5G4gPyac&!* zhjjAbA{C-M*CsN&*re+_W~t2~HchjoEeVR5XO1B7P_n@^FVbn2V&_8aykUmM_k~#Q zsA5Gtt&95vl=%UO%@%$snoVveCjv-uUSEgLtHM)n=^`NXBPrMyOxO#{(~o^?!!q-w z{ME^PkDTvG!jMP$=V%q?T$q4E@^jXg6c|AyiD;4 zN|d5KGSwp;drGFBif_N+T9I_!C%f)Ty4K3BwNlN)$(mNVrZrKsL)yh9TpdYQM0Q0I zE?yFj$gU$&?~}6Y$-!+msmdf(BU3dAsy0bA$W%juYP{AYQ!Urm%GCDw_6b)-(p4?H zsuQl7q-&$>+L&-PTq~7bo3H1|u5IycH}jUpGbS8V(y>BztQe`19c6Lrjokd9!^5s* zp<6C=Unr3a?@#2Ge}kEs#yKuNam?;hd*q);<>i*vo*VJQbj`< zIzO1mzHihvRyDTs!|E&9*NPIAtv6lE&RS>eU}naFDyWpRt0YSm zUjPgI15`%Yxta@Ts0_D!8kd1HTOdOq;0%#^X4q$p85hHBlm)_F%Xe$&{m&kkWS((Op+=g6~|&PDv{+hk4}^=4Y`dkr?%+pu| zzZPoDlm3PFd*N>BGtJ>2tTj6B3kp!toxGr2GU<5eM+a;{iJ9~q3((K(&tJfXA>b}r zBDILi^j^%O>**~EIQRsh6l(~<>rB1rtbnt)`pUw4;C0q#gZ~V%C~d>4>jF~4yQ(iU zW{G7Y`Ja1b_1RwA)Q6By-d!D3)%Hi?F_+;3noWtZB&P?z(gM)gTHG3WzC0ATtKs<5Upk(L+!n zLJ`FP*&I~NQ65F?oCU?u0ePG`z#QTNkP8|D9Eb^=$QL6`gAfh6vsDDS&;G?15{6!!b=<7g9cu@u2n@%d}tdN}*lBZE>+9T~{ zq(i=h(;v@<CHw0Hk_ zKi_Kuz7Gf*$EruP$A4SbJVmt92YTAgewFX3X6J7!uAK^!h zNF3E9dyvRMTy2Vb3CVKL4Tp1h_pAHMJUp{8=kqD zOdt?GLPvbw?f@6HgM+DO1}87QmOAr+^{bCxQtTa3KE!yr)@bk$$4Aj;q(1IXy?Ju- zy_3@?&S)tZl2nhK%AdOW{N(vpf5b;8GP(pRO=mLx&eYX+Qy;ydDx;GcrI7K{uY&RU z5Bzyjb-Ib$pFR)IOfA*Z_;pc~ec2G>#7941xbqYEYL){xShR zo8dzhUOo)(MV5n~BmkIv=r=@TyuL_Jh2ZN9F##UEfgnfIl>$)#el5_DO5Mh0 z#3ut`hE**3+AGEg?+b|v+0FF`D$wx_Af^U_VuIg7;3o>c1yN8^pvJ`amS1ae^bo%X zXebj9nxLLB;5hy@VaCZjc^IDaPt5TJR`UgR?-$sL@5p8xFZvoo^-L>)k(qJ-X9nkl zJuklN%-+Gh($YN=+cj?Y^|$TzBdD&XYRU_Hfjgz$? zB4w@|X_3ks<u6FRYTPo8;Ba$<CbN0Z(ACG*8unx|7|DNNf z?qylZ=7yiQMh7dLE)nXMJwDz#U-nX zONv)JO3R{ReVEs!s_U6hgbVrH&od#Pqx4yh7yRLnrm!%-Fu%xAS{@FGj91i}TIwUs zFs?Kl6@%e$L%uiM=qO#!3Et!)#tvN=>pVaH!3U##ZEDY@vGX6Rr`i*z+s6;Q@sF$f zNBiHL`0cyn{pS*UJ0>oCs1F|PJEyjFB=%iN9EhuJuZG}h6k9&4L;Zx3wX#HGE7uGe786UX;~9mdX`2D6M_?M>|c6%OFRe>AcG^~5_L!3xy= zo{96PcPuY1Tvl9IgG=hXIPvBuiLS$mj?QgW<%z3pi8H?dp+vkNT8azD&%B+8Ux1$R z{)1!3Pv|`}+BMhI|0t|L-=T?X2Vn#9r&q4nXu6T{hiLi^m9L^3C={*vgo-(%(qq99 zA#fU+R*ZtkD|VXp1{pzsce4-)`b8y;g@EAw+W%E-$$jt`6qCRO>v+s90%~@<@2oFi z8IdWd+VD)(^2RXB1(%CF%O}W3SFncDiuhDnJ|gK7%+PI1spz=Kk89*Sr2q<;luHck!_MG1>iCU*4emiN2k( zyG+V?av*o$sV}x%e|q32(&jDF)@o_T3-Z<)Y0Hb!B09c)$hq+5+=t?(&UDO1#5)}a z@G>1WN8zQL#3*qp=MoY~jV=^~a{tI-DfImhremZ9M;iVwOb|e@_WHjS>jQngEqBsO-5NU0QjSI0_O~6e6_vh!} zkW3#Z(r98ZkRA>#i|=EiAB|c7!oqq$8@cJqI<|B;dzqZQ?83HBYCfzPD3Vug7|O1Y zT@?dOLz|x)-drnhu9X_X!wpSxLzC3pA~(D|v^gekj7hGTWQ*|+f}(qjV}}MUr7Z?i zKuY_Mrb+3YPCp<&Rbr56VOyt6${J_tt&ntEO)1jJ13f7U&zO-lr;M>|u!O_BTWv)EEb|8KLv|8Id^Os=)n95d?){k|6v%^72+ z29V;NA_k^bSbI_)j(Ko&7A`$|rIt2%o?*{@U>cFRtS@{N>{!C5@^soaZM7xHgoL{O|evm0})@&KHUUQSE3 zA;B*;)0$Sh-Wmek=|R7*UW5Ux-^&R~1}0^Fq$ZyZ=QHOOIaW*Aic<_n=sJ!OqdX@F zc33Usb-n1>-N;8nEpzXa&!HWSB+REUgOBFqp4fe4_i6sju9LgY`ufU-9(iJ5%TUfn z$+qz3oCo9f(fQdYY)5QT-kQFZpBX;4T(?LYcMO)kAg_Hvs$qs|+2PuHxwc;Nw+`06 zBG=D>qCML)Mab`YjuAX7yysX$ywaa^tR~ zP`_mH4?s7M-%C8k%&&Cw7a`F(@T%Vo^-q-X{*Mw@-YbdXS{&H1@5((L z9+zUJX*TSoX&$qYVg_%EH1n@PU08bkhgXjh{+ib+%V1`am|bKT}>TLJ(fXNPTQstS7ujpNAs{NM|S1(rVqOE z&x*3ExNXx{w(LRM!nX2}w2ZDb9cvDh^aKXeJZ%+Une37?r`Iv;E|lGcQcjUnR57sY X^Vs#+KuB_I|EuZQTWQ3Ur7ij28gzC% literal 0 HcmV?d00001 diff --git a/src/data/choose_frame_spatial/__pycache__/mean.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/mean.cpython-312.pyc index 79af4abf4d2544f0af37490c281afd6c5916ce43..c2fed2363f092341f7d0ff93184921cc1ca67c31 100644 GIT binary patch delta 1034 zcmY*XU1$_n6ux(6?#!Lp-PySDCyBG!T_v_-S<^~eq5j0yM5NScT%}cPw$t88*t)w3 zcW0$ZCy)oF5n7sXUkd$U`=lb+R6%?*K3mExD@4Z^3w>yy4@wFxfi}H&*Ddxwe4P8; z@0{<?4z*vWG#lT1Lp}_XD;nok{#jpNUxT|sn5UAs3-e*#-B(KA0GgzQ zvKL|wEfT(&ep++x7xc!DzmI>Vx2<6Lfz#r`*R~zv9>&SvGs?X77ERdc3)JPWd9X%1yvWh#l14g$jE| z??#9yGb+2KrDQ}9JAhLVvA?wJ7Sx{0G@r;c&pI~XM?nZn}qUthlZL%$14-mvtu+N z$c-h}sZ^<(^Bn38I-ctlN^Y2V#txN9n`1YOZ#rW(Z16gc}#b4xC)v)?G%L429pOBL0te3GTUo!*3?zdfl{kG zwxDE7FQg&P=SzHD@_8E;-s7{jP=R?=0s>pD0T}FeKz}9!~6JBR*UWdI>0%x-4({+i3}lpDrrzz-3(yIBarx0dI=_< z0uE2kKz7>P(HCvRvsXwxzI*oVhq$MKt=U5lBIcKSF726rWqxFSy0)_s$+DWcQxSIm E0t&(aH*xI5h9vDm-8|Z)50Q!#nkqfGl8e-;C5^V;jhobVlG$}d z%{s{;&~QLt&7my>8V(3hLA2_DNN@LaaA#rKmRlH z%|HMDeY2mm{1o&&bi18^j647QI)=S@PZRXy_x8hN{?fdliJIdsbN}}-2s`mvChS!C zd2oThF32K&lQ{(6!Cx~Gyumy-4DUjk1OSz3CioFhSre*URm|_iAM+*{Wo;?}6OtKM znbEutuCPO}X&WBcWXQ4wM-lzsq}ih~j~{X!C#3PTp`mL^Qf4ep*OFt%OLle23x&3a zyW*3o)}^Sa@pM;jckiL@Bi;RxsFFU3B7Kn~5mhl1+{MRcPZ9nc_^Sp4LrdJ}o=-h% zfv)vHUop^kFTNHyw%k(;3=~?9FDon3YTXa6<=pDv>XpK|^FLl)9Vs+lT;NJl(}u5g zfqTzgCaFODNmpbdkRP3wKO}II^l9ws1Hc3=6abu*+?)v}xO}0pk!tf5Q_(O>W+ucJ zrPf(WpzNoI%07Ewj5!NrE^5mqLrc`P_+(nu5j9t4Es6B48RIF2B z?%!YZ@4xr@wELFh!}<(o)z1Ij0#ln_AZ+Ij{0M^2RQ+MCl2ft z{=>&V4 zZzl4I#mjdRw-bfVqs!B!*1?tOl5cpyz10tDg9RyII}?=!{Ee$)xSpIwehR$=z6AEB zeGQ^-*ai;?bjq0rkHRl34K4{~An|Q~HZuGyEK9FIK8PA{T}`93d7hX|9w&~}oM}Qn za>U}(nbg#E)LKb~4&vK2-IwTXmN1zyrZNWVpquR!`YF(tW0$Zky_}jLZuu{lo7b{@~cHi8Ysur`#`c^s;{e$5Tjz diff --git a/src/data/choose_frame_spatial/__pycache__/mean_CARS100_3.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/mean_CARS100_3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79e33b20fa2883d1dfec40e3e46568f3ff5c2590 GIT binary patch literal 5655 zcmc&&YitzP6`p;1pMGQTvY0i-fC0Z>V29YoHjfY-ut`ZA$aL8mugBgOGqV`GYvLdX zYlVQLG~v-uOhkhV1eqow4v>^ck&>!^X4s9EPKiWRuOGi+pi-r%YI^R>&aNR&nyNo~ zW#4<}oO92)uk)RA_s?dt0fX|+DVy`Hc^LLL^dXm|Sm)tC6&Q8`Bd~UiP!LL&qFo`j z%61jBDwo==Y1b$aPwmpWliHKSzQ(0<>)Z7TOo?s82<;(^NV=qyG`1T^BcbbFfnhYS z#7I@w0@-AuCG>>hvN1lw406e&xodGkLhfCPbuPgyQ9%~TWWq$633E1l{{}mwe2fR0 zh?F1Ur2;SQM|kPL%Rsy?8&r>#IqYPD{JftnZ;)5go<^p1b4H!muU=)ZvS5M#A!s(#Z=hVH4N49CyGgsJ$k%JA-$A+K z#IlmIisF(r#ieDo((;kEpPu zsHCXWP~YhFuyzM4lKMJ*y&zZb^|LOox4X#UbsOrNNX8*gV&cuwiPx@7UcNj&HZXni z+QgL)r{6ggJ%4C&=#_789v#1NZ0e7{oxJg0^zh)+=v7I0eC)mHfx+mJ>(Qa$^uWQ% z(}yJP_$P0Vk6oucK0gbz$-$4KpT2CEMjP5vpb*U4o^ER;oixdy`62>i-{g$znApP{_&$M=7&s8tt|SzPFmNFy)M`QnwWe2Hpb^p=JH1|p z+(6siWSh^V*d}1iQ3pWFMxH$91MxcMLtU8vKFjMBhwQtp^D2WrHdcH zaAZX+tX}Scey!Y3wDMR5hGy^I51fhjMaf#aEY?>wB=_|4(y;oqSH6VG7s>WzuuQ1tbaK#3l{Hh$x+=#fug$oSZ6 z(ZerKT^W<)G`u_Xxzf%;oz*edlmKO9l?r_-|2FC>r<8x80U?kuQ z(UgycqZBkgn)0x4fNF+xbuef*ZQpG!ChV-exUG3>TcO)akgh_Owo@L;<1A)qhs1P< z$AovFbHW+vpo)=K;EfHfZKWk8csclopu=$($l`e4!XF-Nk<82hv&=fE2C%>Dutm0E z^@Zef$sud;sA6;pXRVA_Yk6z!SYyaqe=D80HgOA^gX*wu!MM&GBqGTQ1>SVR9z-{zY;B> zMo!$XV^d@rq4_CE`4pP4-9<1?A5aDKNJBu=y$d3=8GRF)KFAYdR0~j*0X?!Glhh(f zMsYOvFoPx)Ul7E$KRSbdL(+~ z^u&p4(r_8I8I9_9+&S+%YVKw-(8qb*zz~kaHusc3|U)5xWZ z3Uq>jVki&8+C2_ZP*W`FrqS*RI&gK8g&-#A04@L&pr=PQh`N1%8J%_?DQIX2L}Y=1 zPD5id!Os~#O}4Rinr)KDr^l$x9wJUN#5FNO5^e86xFV#0YMRC4EWu2Ah*_UTiBEY5 za=)NQJgKXo7?iGwU?)~|I+{(IlVkybNC(wHIdEj5Z|NLZ^8=}cFqnB7wg*7RgqgCq ztd)_hVm_-llvVmcUf5W5*Sa)fE#$3*5o;B1t>UV;M5?#()!RbVt=tZ0$l4jP(!7-p zSy|5C%UgSc&)l^viC9+gmQ@i;1#hX~DmOpu1cMW+oj=oj>%e;kDfS)gx`({1QH;l+%^cnXs#mU@$^P z5rdaOeFB3p8V}jPiIspNpakzx%Q+k%UL2ph;%F>OW7sTi9G4T^fQC?9R?41Ds1p5t zEkcf2qfyL!tOT^gZ0q6+;Zc!*Ru20JoeAwMeRCkvqtXaVyPE)Cw8ZRr{AVk=|Jn+; ze;@3%JlB3*Ku4$$ighJFx*qf;aRN!*Fp4&@$j`+>=;iT{;s7cV@Dc(96Kh#thp4q5 z-YorwfPOCRyWlkYjs2#6^Q=`M?|rNU;@0|;S$L62C18^004QA!no?%*%(HlTz)JdTf=N#NX z24PCv$w%&IMj!*Cg1Lpxf_b2{%)XyuBp^pu#3?OYS%fp)xIZ{#4hO*W$G` zd%S*Ur|ntBz6TyHkVrd7+cqzxdbSN-zlX@3MoEX5_ECN0Gzt_2CVDwEg3{v?)F_dI z{L$|gG(DXpO$sWveLqx`hqi(cor)w4eI)H=a2M50(u)ukrCSA=VvL|svQMM;Ds4km zE>!SpAYG(`B?%Fwg-l2r9VCD|OL`cJ?Zri_dWq@*+h!N#>|_!8Qx1|5k`t0*95JE@ zg7*O1+arjP24`5(#|XwA`#!Mbak8B-Dwdq$KD2yBFiUS1c@l<%EX+^JV0Xv4lC_%8TFn(U zaO-z)&*I!KouRDGpcyh`+mf?Ar+UuQ7xtaoH{!mP8(!Y@WodZvR!(QVlQ9p_e}Uz! z?v#$ps~)TPT=|9eGcC9Ag;4!2zIGS4+aBISMD}#@dpbF)KeXonzxx1Rdw|b->CW=n zG3N8KFRDJP;+}dj)QIzSVAK&NIwC|DPjqqJFF}e;DjJmcG0ZMC^-{B9z4E?Fk=Lkv zh$*v|1@tZ&L)Hx|_Rf_&_MP3WA)uO)+G?m$Ulfb_icvxx&qS&Qn9i@#PhxFWmZ7czGL_ zy7bD&ZZ4YH$U zuix&HICKVb3*>P$L<8*im^fIxB6Jd>q6#4-gP`~j_CTrC*dC-V(3Cx_snFy}(ntmw zbHnQlo>+}P{%%7K!;ow-^PS5{F~Qn9vlZIQ>x)#)S0rNk9UiYOrR zCvUtlapvp~_y{2UXeXpG6T$Zj_)hnPbfuo8cLD=tQ%n=o zGb)8babKCFP(RGV6sdp3(!a(kzs8n*jV=F&x?Q2jyAQv-DDr0dl$hES?7FR5aL1e( zY(2hna3?pfo$GXk&F+Cse^Kj$D~=Zq7QR{>R$IP-ua=W*hM(aVR&bU|&QKLrSAz&= z${XIq6*lrqo4CAY&afe@-k2aa@JrWod5xT*DXeZ5$+qDIBdht`>PYT7K6f2wdy+HM zh1K;UuV7>aSKh?0Y>uql!mr%I6+F!uwuaR$-{>qMopqq`jxqUo^7r0%Wd>h4e(KC<{Dw+{2bt^Ui24SX+%7G2{wV1}hnHCT0xPltqRvWQ3 m@U{kS@p^9kPVTuEBhL~1a|E}O|VQ literal 0 HcmV?d00001 diff --git a/src/data/choose_frame_spatial/__pycache__/mean_chose_top_10_spatial.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/mean_chose_top_10_spatial.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea294990a39fc3cb0ed418acacb4fdf06ee59e65 GIT binary patch literal 4722 zcmc&1TWk|oc5IKIiSvRQI}dOOFH`ak2_$8ckOWW*+0xLZO=)Kx&m^(q7c=8RQm3SB zQXG|%R8`Ak%MvQKWEYBp+R_wgmr9kotNpoyT{%0;2i3+-BCS+tR$8ea+dX%jaSRRG z{_T}~=gfKCGv{@W|7tWE2w1-{W_AUQ1o032A}{R%Fh8Xrh?4|Ov=g*~R=O4K3hAkA zS1E~|1g#n*X!4RW!QDsq$RAw!oI zfJKIy@=#5vi(pYyZ% zN%>3K)0i|D7JF#PR?8O3*2t$^A`=4bM#%%qWckariuj zhGvFy$VSB9nTWr6W#;nb>B)h)Q`h2GKAC%eFm_>ZX7u&%ZXTb$abosQzni)7VQgr4 zcH(M+d3y50xq;!>vFowX(A>b$nKOe4>h$ODO;28Tdi?<&a5KZ7#lCpWFozx5>PWaX zu`gs9F~1VtPZA6PnNGATX~iJXuA-H&lQ`Sk)xD%mE#@?J`+OX;gSC5@Hou*B+T9LW z1J(j3ucp-92Z<$xr9qC;}@T_D#@cvWO0z7@F~*b^)dwdH8Q;T zlw}nJwmxteDDjWRK@eeP6 z>!)vy#fE;5y)QX*=IV`jXf*cLDZs>ru1()~H+JlE5SgBQGdA?v?3Kv`e^QkJhZe6n z9UMOQpC5nx@drgoE4Wg{@vNw)D2Ln5aTF!0IltS0JH?2SHE2Ztcy|k@mPg}9aM>FnXp0zu@PH$H!$2t;3hZH8N6UT|-5`ikkDNX@} zV<_I|r^?G@3go@0qbSh9Q&eBxPh(sn6SKeqZX0X^#J>zgfpv_$m~lQMoL@Sjm{^VS zH%Ig9g#5b6rf`14tsEi08RfNv$cWB7tuuz`XvRt*W92uxRk->HM{2Y&jH5a~0pEC% z;4y==Tr!aIbS2}zhzu&0${x7`(F(cqy5tVOC`u%`lzm_at?YyQpw@|!lnsqg~O7mK6~QR*vPx_+b2?nLBYG9-6bLgUW&W0El$*j7QXCnB(mpzo=%RzcNru-F81?Gq4#TEG~z{=K?I##@kuG zS=N%XfNk;8Nt_|6i4(P~{SYoRF%wj?Je4GgM#f90yegbxT7t#trJ2K`9+M@I!vT}3 zU)tDAY)y6-!$aXQ4riy+LE#hn)+|wRKjFfW$j1pL4h4DzQ*X#PooyODcq%t)u?ZI2c%fj~6wWUG zp3vy?Lyc3JmQ#hp{il4lGm9rSPVSvN_*vW4(p&YnE1tQVUoe^`(>>wL$0rPv<&%3q ztGt?ds~}v~dN()kE&V+ssJWL#WUUx|Wo#>2v1z;wttb;R%aN{}&4v$tpi|(EFLjEe zkQgTk==;e#1}F5rjmY5_TG)h__E^<8I9<25G(f2r>)YV}!LyuLtJxAj@h-T~t%OjQoX`m_)3ed`Wi zz)Kg}(%2%hs6N!qu#Bkk*bl?z^s+b@U^p>jkyGlFSqqZ}1?M~MUI!y)EDRb85**I7#fE6LCR(D34S4%ZOEf{iZv+W$ zC*<7#ao3a^HEj}1n^18h+P)7xOQBzPg-zWdBg`V!)#nbKK6HV-`11Le$33?SBkP<0 zTpn4q2kG*smOU2I-!bQ&)1B6#qMFI7uasYFzSN+dFN7No2z3Y0i}pwd9qs5AI=Ydw zKiu)E@ZzgN-K#>;k*W1{liXJoUsr!wjh=id+(ZfWaHu0fcSdQKK)X=S5g29|MWgaQ zLD&073jt+;Dh_x`~@W{+>1%oZN6L+k4V%M+oD>AZq-)u*d3tiHJJ{JQa#lY1kp>u=d3 zdApF#e8-Y^F86dUT2mFaY@TpU^dd`RC}YZu^%Rc3gjJZIfa48BRslFe>Zq;wKBXux zo(?I|Kv6J92HY4KJgyE{Q)@wg`?A6pI39|%cUiW!T$sI=irjl=alNPMrxFD?4R|^jHedpV*v*01#W&)g zyfJrdBr%s@PN^^?`pk{D<7dzPgpO+_fNMM<6AyhfbK~RK^$#RrTuTAG#ZH_9<*EP7 z7P-Vs_@p+t^5JXT&G40yy4f$@hJO}`St!9<8^Ha@fCj6%Lq6TA0x6e+CI@y7VZb0@hy`0n?L>2GxOQ94s@E;WZ8nU%m0gj#&)gN?E zBr@R_><+-;o0Z!N`#qIHp}4QqD#&>gp~(6>k@Ih|N>Om1fbE`JL6GSo*KL(~%D6oA z%*g)X{phiF)a{NKJp;S`M(RTwMv8}v-zbfcx!=ND>!~eczZCMSQ0``AsE&{|z=G0? z#&)6NCSgr8Dr!N79T9TpBDzsnvmF&RAwzS7Y?08`G4uE)p|B=exJ@YBhOECphWZHE zAkl2&8&GAlu(2h&@hM^BQ^>X(8TLfT)^ByWVO{<}(^OiA~=oXY;AuOw$*oC(3 z6{?;=Z7-k$-9noax!8L!5-3`5P4_5vKmil!JCy@F?rR9O0i~}R(;-`}V6BT<8wG14 iTD2W*-;bVqDf%2OJV&Gb3^H_nLw4OOAV`bk+W!KZdj2K= literal 0 HcmV?d00001 diff --git a/src/data/choose_frame_spatial/__pycache__/meanwovar.cpython-312.pyc b/src/data/choose_frame_spatial/__pycache__/meanwovar.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7275cb2008af3313b8cf6b26167745e0d0e907ef GIT binary patch literal 3484 zcmbUje{2)i`Q7>aGfqr^0SSRPg0@(KBqj}1p@n3DAR#TIf_5x4yIp)2+lhVl?#_^8 z4mDj;9IZ6asw7YtDWZjn9}0p}LnsSUC78BJzQ$A1+x}ph?by|&;fKxhcDs0FbEOZtcwLj5OO zbR^o2SY01tsSBhg-9mACR^Pb>A;Cu?PS;`0nS1)r8ZLlE3@t{GSfH)*aYu3fPFu6a zoEB^9(SxoT>{AzJ1zRkfrDN%fTAYzH&-rErjY8JUnLEl}l>8L27S1|9=MRmwJPU&yR)XQwx+ITgQdP9 zBzXdYrq$wM_#nr-?LTFBx267VP7u8TUNu-YIrd@Zx4)Sl zyOio5$c$dmgp;vL*~mcZ(9P75Xg2cR^oc%=oBZZvGIrC;2SXCjrU!1MZXdK{ap0O= z`Qe+*JeqkKs6&WDP#>s;#5Lk3+X<)-qD9A2eW-!=28U5u*ysraM6O9-{M@b} zBY7EL_TNvQJQ1)tSI#Cr2iKtvelza_8bZ=+?I09J9r<#=#gNNX2h_;(z!EtwOO^l= za;^&#ES0y`YwJ_`Frx9PDufBxQMn!VKhywjq_3V&_YFc^r!NnsuYQsqh-6PjGG82o zaAmIdrv~4H0H;nJf~co2orNSMZ;hn-KgW4cLpyzCEFC?PIywxLRR8tl*s;{1Z$Kp( zJDlo2n7JI&)D}enGb%I`t%xjkR2P&Tt2&ZT}}1cn(9{U(&4L_55GMI8Uvsg{XZfc%VfnbNgcmK;@k0 zkBQ1GPN723YTmOrx6KZ{q7x-SG1Iim$A}_LD|#{L^GZqq3#Bf2)o)Wwxo>um3MF#B zb^#_B60tqI{qfc+mXVmMUE6-NYn?y9a=vwvz<7Bt@2(OBm&SCdB}lj98l*+72&-@# z?+@%_1n6bOMAM)n(R9!97wfe$$5hYV4(CSDBP&{AAEC|_eO5G4RyA2xE0@)dx+co% zVotg2jrj7-F=pI!ulPuQgt>H%cW;%kD2(8Kn*v;@N^+bJYi% zxgIzHn;{e?o_4`}Jpi3CSFPBV1}uSV`y!d^fgad}$^Xle?nO)OiL(;3SxgiWy@_h{UJ~bLa$o11J^|2okM|m^hwQS0WAql(&E{1f|%` zNihb-JG%Sp)+?>!PPuklqO?i2HH~*A zcJ7(n*(UF7i+2VlJG?Z%H$M)uvCmn-|9Sq;!wnnq^OT>zUSrq@O<`%)nt)I!rpqg8 zlMV;MtsN5IGP}rZasTlT82-M+ed0KV8(Vr-H3#h5_&~_*vA-oUZSdZJ>(a&9cLtyd z>`j3X&sI203dsi*3ePT>ogu%X-|yiBPSN?9E^h({dYSpY!d9YpR^SV6}2(VrkCn`m_88t~&zYu0I=aM*9 zZB-H12XwFsS{v(x5Z|+8p2h|G3Xm4{koyQqmRFqKe|-O0;oQE@_MLaf8WOK;9)B}Y zzCCU#yT9lMQFC%x>1oq(Q+(CN*sI@?cMW$8@$Gvj>fez!y%TR`5^d~cn@4W*#J#-} zZ3pDm1M;Q=@~U^IUfC2Azg>TK!<`NB9Y33Bq~$Gi{O7I&+dj#5$ZSWv^Ie#~9MM2N zM2OM;>b1W$KO`UOh*gc`3?fTjjut*JA#2HSv0SixvYtO0_R_>#qot3 z6V}(J7A?JBVn0)S#yQeD>XcuqjaSslB^#nek1Em9k|Fm|_sO=??&IzeK30)f`TBT8 zV#!u8ya6p(0iUVPnr4Rq-$OjUq-e!L({P=Ie0Xi8>0g8xpT-duLg<82jMf4A5~<2+ z#05NQR8#~D5zF+Usftx24I_pbWYU-ab1|v!C1yK7{pcooBARd@;8 zS&fksBkZ0UhR3GYwa{%A-T)eYYlueyJ+a|izJAmnfOkSW&b%2bJgkRHO2Cc+pPW6;oK z(C)@26WmQfbI8(UF%TnpkTB*h!dT85b;~9zo5NUlt{_AR8wqP_nV0E$_Ri#-hu*O? z<$}f?dOe#hcV<87b;g#lVshIpVC#VXt>>o|o1Cn(rDzT-@SDwL9p}xutjP^ld5oQP zw0OR*{R&|mtb13$GFf;V|7*a z%F3$yDt*-+Urp_*n%bJx_PYA0)XWRIRZ}w^jfkH_PX_~=+8JY{!;(=fr}Hbzn&WCRF7RAzxbAV zx+{IAYvRz4{&DsB)Zhz~zj<|H@IrcD_hkPYx^imZg4)@g-hU;1D4}*fJMnUtE=|3A zJT-8I3rC_7v`uuslfJgsuHwLL_8JtINVf*~Ahm-FO03||9?$6sCQ}8_+Cx|Z0YsXN zjG>D_2pNIqn8|Ba<~KC+yvS}8=n&f&p(TzE`ZKoJpK1DI>tf}zCz-J!nbHe%FWcE|IA{@G{HS%3n5_kTB7yf&iJKs<6Q?~ zU?(p2j9+?byt`99-Z}Z&UKp>*%lpy?pM?=lpV$w>K7Qd0%tGpFUwYrqaU!&_op@t# zJaH&}_!zXL_gzj69!>9m7fezE2h#iYPF@_)^=G=W+i{w))C&QzcIF9NxG+pZD;@N# z2y&uyuZNFF5Jz;-TkC;$j~{&p{7YRsG5OZfnF${6xvciQIr;o^=~vza57fb9lNZly zEw8D%x29?vx^&>u>A$_$oefB3{luisnOHI|p*%#A^evnKI9vMGz zQkTs3u0^8$q2Pf&LX+2yzyMUtc&^%DVAX%!x^?SIL&avVMKOtzpyX1NKS+xrMJX0B z667Ss!9a`)oc2wMB_hDrLJXQkHn>B?u<(4jsgj{3y0US@BaP)Do?(OKl0b7|E*z*7 z1-~xzYwn{H*|zbmv_Lh~KiUWjSg}zQ7)lh?Ui9?{t;h)T;kDwo0iEQF z0^Jn^B^R$6Enb@}UOUh*TwFIaKUut9F4~YVkJt)QHdlffb(bXF zB_G*JaT_82>{!Jp#F(7=E5vvYnT`6nVm?bcy~$yJnG!cMhS!am?U^yn);lbWxx<=e zJx9F9EsP~g183?hC2q;A3EY&K?EbVgbG!QO+U+3Rxt%YZ$yNCFsOXAj@U?n)Eyo?5uo3V%WPheyrLGCl^%=EWI>$M}>!#Pr5 zmj83jne9K$S#Fl){ET0-_xOy542fG8lMZ{_cAJ;>Y5yEq{mikC{IksU|bzIHgJ znbaM3GIj3KWV>_vnSy@;!bKO){kXtoQmhO1Kj1L8X3Q^aw^AmTLQ z&xpqn%?K|7gHC-1(SdLyUP5d|{21{N;zh(V#45x!#7`05L12ifxXYSc6!K*o5E_ zG~&027~)Ap83LEK`g_C$L?7Zg!~w+J2pLfc@H!M@IHH&_6+i}xh7?O%Gb^x)DMarE zaAC+p&2)qna3~cMBnu^nh7`;Tl7M@)*Dd6sdjM5y+)my{hmMh2wnk7(RI6o^Y?paOE ze%^FWqrgFq4*OZfE%6b`S2bg?K<^9kChM12hSJPjw?8`3W(P}4@FAjzUySFA*&<+g zDJCHrZeKV@euQ?6agjG*YD|?bJ=xaVc1AecdaCtYV4!~Fp8JQkjFdho+loJ1aAzVn zRj~M^t=A?mTQjioJ>&b<>sI-}t;2OsCD%SBZ=*-HGo#y^liQnRu48!nGs$hwB-cKZ zT()QIp0xwwd)4o+zP?(1_y@xcRB|09KkXl3c8oGDNv1{Kxo3(H)=+P}NeHdq>h#-+ z2aGpOhGh-LDPk-vOSo>>h_mome$r7i>R6g|EbRjPd zdQRt`_VsP+_a*OMB`;l-EL@#%r;3)GT;03+%w1>8PL-W28Q3y%*SaBkr05~pR`5w- z(aFWVi{<4jhYMHrxAe#4!uo`JtmNKbZuzyL|9-i=K3VdBY%BW0N){FN1P%v|Z$BC6 z4ajBR9N0BdQa`k7WZ|ZSD^+lJUudXW-twsYWYb8&)`aVGv!$Rq;k@A_iJlLzbcMr!Ul7esn^pnxIMINU zEaD>SBA*-G7SB}KGRqQO5q0ntAvj-Al-Iz0ANE$SNxcu+)z1JN{2UVQ7A%SA0(jI9 zefrPF#NeChE3ef?@yE}Og8f%+6L|BL9ExIiKlp2d=+sw`1%f{k6L4!$oT8tL#41EC z0H<4R_32LIW~Nx$S%DW7TwM`;c_~(bh7CWY*tI_yJ>pGLD58D4Vue2`_`%T@wy+nv z5OAZM9gr_=-_pnLe&J^zfS4xYQNRt8!C<&)%rTg!781j}zmWN#lCpoA*BF-EB!C-& zfta0%mJdw@V|j}b!ol6$yN_9i^GZ7(9?Q!=80(IW=9MP%O8cC{dFAJrWL|aW!ynrg z58H}68^#>&gKN6i9P%C7a;UxU8^ezB&W#_Nb7f~~pMBI*mGo4}r9RoWaj5nE`1SZu SShhd@k@<-m4q`6U{Qfr@ (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean))) - & (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean))) - ) - - if light_indices[0].shape[0] < space_num: - print('No Enough Usable Space Point Found!') - return None - - intensity_data = rawdata[:,:,light_indices[0]] - - spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0) - top_10_indices = np.argsort(spatial_variance)[:space_num] - space_selected_data = intensity_data[:, :, top_10_indices] - - ############# 时间数据压缩 ###################### - # framerate = 24 - # timewindow = 2 - - # time_data_intensity = np.sum(np.mean(space_selected_data,axis=2),axis=1) - - # min_var = float('inf') - # min_index = 0 - - # for i in range(len(time_data_intensity)-framerate*timewindow-1): - # window_var = np.var(time_data_intensity[i:i+framerate*timewindow]) - # if window_var < min_var: - # min_var = window_var - # min_index = i - - # selected_data = space_selected_data[min_index:min_index+framerate*timewindow,:,:] - - selected_data=np.mean(np.mean(space_selected_data,axis=0),axis=1) - # print("timewindow_begin=",min_index) - - # if (result_dir is not None) and (filenum is not None) : - - # for i in range (selected_data.shape[2]): - - # Z=selected_data[:,:,i] - # x=np.linspace(400,1000,Z.shape[1]) - # y=selected_data[:,1,i] - - # x, y = np.meshgrid(x, y) - - # fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) - - # surf = ax.plot_surface(x, y, Z, cmap=cm.Blues, - # linewidth=0, antialiased=False) - - # ax.view_init(elev=30, azim=-60) # 更改视角 - - # ax.set_zlim(0, 4095) - # ax.set_zlabel("Light Intensity") - # ax.set_xlabel("Spectral Band(nm)") - # ax.set_ylabel("Time (Serial Number)") - # plt.savefig(f"{result_dir}{ filenum} file {i}-th spatial_point_line.png") - # plt.close() - - return selected_data - -if __name__ =="__main__": - timpestamp=np.zeros((600,)) - input_data=np.random.random((600,224,512))*4095 - tmp=ChooseFrameSpatial() - output=tmp.run(timpestamp,input_data) - print(f"输入数据维度:{input_data.shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/mean_CARS100_3.py b/src/data/choose_frame_spatial/mean_CARS100_3.py new file mode 100644 index 0000000..64bfb58 --- /dev/null +++ b/src/data/choose_frame_spatial/mean_CARS100_3.py @@ -0,0 +1,125 @@ +''' +@File : mean.py +@Time : 2024/08/12 13:53:36 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 +''' + + + +import datetime +import numpy as np +from sklearn.cross_decomposition import PLSRegression +from scipy.signal import savgol_filter +from scipy.ndimage import median_filter + +class ChooseFrameSpatial: + def __init__(self): + """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 + """ + + self.description=f"{str(self.__class__)[8:-2].split(".")[-2]}" #此项当没有新增超参数时不需要修改 + + print(self.description) + + def get_specific_data(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + """ + 获取指定时间的内的光谱数据 + """ + + + if isinstance(measureStartDatetime.item(),str): + start_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 #测量开始时间(此项目中时间戳均以ms为单位) + end_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 + 3000 #测量开始时间+3秒 + else: + start_timestamp=measureStartDatetime.item().timestamp()*1000 + end_timestamp=measureStartDatetime.item().timestamp()*1000 + 3000 + + start_index=0 + end_index=timestamps.shape[0] + for i in range(1,timestamps.shape[0]): + if timestamps[i]>=start_timestamp and timestamps[i-1]<=start_timestamp: + # print(f"开始索引为{i}") + start_index=i + if timestamps[i]>=end_timestamp and timestamps[i-1]<=end_timestamp: + # print(f"结束索引为{i}") + end_index=i + + + return rawdata[start_index:end_index,...] + + + + + def run(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + + selected_data=self.get_specific_data(measureStartDatetime,measureEndDatetime,timestamps,rawdata) + + + + space_intensity_mean = np.mean(np.sum(selected_data, axis=1), axis=0) + space_num= 10 #空间上平均光强合适,且方差最小的10个点 + light_indices = np.where( (space_intensity_mean > (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean))) + & (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean))) + ) + if light_indices[0].shape[0] < space_num: + print('No Enough Usable Space Point Found!') + return None + + intensity_data = selected_data[:,:,light_indices[0]] + + # spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0) + # top_10_indices = np.argsort(spatial_variance)[:space_num] + + time_window = 11 + time_steps, wavelengths, spatial_points = intensity_data.shape + filtered_data = np.zeros_like(intensity_data) + for wavelength in range(wavelengths): + for spatial_point in range(spatial_points): + # 提取时间序列 + time_series = intensity_data[:, wavelength, spatial_point] + # 应用中值滤波 + filtered_time_series = median_filter(time_series, size=time_window) + # 将滤波后的时间序列放回原位置 + filtered_data[:, wavelength, spatial_point] = filtered_time_series + + + space_selected_data = filtered_data[:,:,:] + + space_selected_data = np.mean(np.mean(space_selected_data,axis=2),axis=0) + + # space_selected_data = (space_selected_data - np.min(space_selected_data)) / (np.max(space_selected_data) - np.min(space_selected_data)) + + # # Savitzky-Golay filter + # data_filtered = savgol_filter(space_selected_data, window_length=31, polyorder=2, axis=0) + + # spec_indices = [106, 127, 118, 0, 52, 23, 113, 77, 157, 175, 195, 24, 218, 108, 8, 211, 2, 136, 11, + # 36, 129, 109, 153, 188, 200, 196, 7, 19, 6, 198, 193, 221, 156, 187, 162, 204, 85, 104, 41, + # 16, 185, 125, 14, 149, 91, 138, 72, 146, 35, 53, 190, 148, 75, 18, 17, 96, 167, 192, 201, 31, + # 158, 183, 32, 40, 123, 145, 161, 27, 209, 216, 101, 51, 147, 58, 182, 49, 119, 13, 179, 140, + # 105, 45, 55, 33, 73, 111, 97, 194, 121, 89, 38, 12, 197, 173, 160, 131, 141, 37, 208, 47] + + + # selected_data = data_filtered[spec_indices] + + # print(space_selected_data.shape) + return space_selected_data + # return selected_data + +if __name__ =="__main__": + raw_data=np.load("/data/SEMS-model-training/dataset/raw_dataset/training/24604919.npz",allow_pickle=True) + print(raw_data.keys()) + + print(f"炉次号\t:{raw_data["furnaceNumber"]}") + print(f"开始测量时间\t:{raw_data["measureStartDatetime"]}") + print(f"结束测量时间\t:{raw_data["measureEndDatetime"]}") + print(f"时间戳维度\t:{raw_data["timestamps"].shape}") + print(f"原始光谱数据维度\t:{raw_data["rawSpectralData"].shape}") + + tmp=ChooseFrameSpatial() + output=tmp.run(raw_data["measureStartDatetime"],raw_data["measureEndDatetime"],raw_data["timestamps"],raw_data["rawSpectralData"]) + + + print(f"输入数据维度:{raw_data["rawSpectralData"].shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/mean_chose_top_10_spatial.py b/src/data/choose_frame_spatial/mean_chose_top_10_spatial.py new file mode 100644 index 0000000..86136a9 --- /dev/null +++ b/src/data/choose_frame_spatial/mean_chose_top_10_spatial.py @@ -0,0 +1,83 @@ +''' +@File : mean.py +@Time : 2024/08/12 13:53:36 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 +''' + + + +import datetime +import numpy as np + +class ChooseFrameSpatial: + def __init__(self,): + """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 + """ + + + self.description=f"{str(self.__class__)[8:-2].split(".")[-2]}" #此项当没有新增超参数时不需要修改 + + print(self.description) + + def get_specific_data(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + """ + 获取指定时间的内的光谱数据 + """ + start_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 -1000 #测量开始时间-1秒(此项目中时间戳均以ms为单位) + end_timestamp=datetime.datetime.strptime(measureStartDatetime.item(), "%Y-%m-%d %H:%M:%S").timestamp()*1000 +1000 #测量开始时间+1秒 + + for i in range(1,timestamps.shape[0]): + if timestamps[i]>=start_timestamp and timestamps[i-1]<=start_timestamp: + # print(f"开始索引为{i}") + start_index=i + if timestamps[i]>=end_timestamp and timestamps[i-1]<=end_timestamp: + # print(f"结束索引为{i}") + end_index=i + + return rawdata[start_index:end_index,...] + + def run(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + + selected_data=self.get_specific_data(measureStartDatetime,measureEndDatetime,timestamps,rawdata) + + + + + space_intensity_mean = np.mean(np.sum(selected_data, axis=1), axis=0) + space_num= 10 #空间上平均光强合适,且方差最小的10个点 + light_indices = np.where( (space_intensity_mean > (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean))) + & (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean))) + ) + if light_indices[0].shape[0] < space_num: + print('No Enough Usable Space Point Found!') + return None + + intensity_data = selected_data[:,:,light_indices[0]] + + spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0) + top_10_indices = np.argsort(spatial_variance)[:space_num] + selected_data = intensity_data[:, :, top_10_indices] + + + + + return selected_data + +if __name__ =="__main__": + raw_data=np.load("/data/SEMS-model-training/dataset/raw_dataset/training/24604919.npz",allow_pickle=True) + print(raw_data.keys()) + + print(f"炉次号\t:{raw_data["furnaceNumber"]}") + print(f"开始测量时间\t:{raw_data["measureStartDatetime"]}") + print(f"结束测量时间\t:{raw_data["measureEndDatetime"]}") + print(f"时间戳维度\t:{raw_data["timestamps"].shape}") + print(f"原始光谱数据维度\t:{raw_data["rawSpectralData"].shape}") + + tmp=ChooseFrameSpatial() + output=tmp.run(raw_data["measureStartDatetime"],raw_data["measureEndDatetime"],raw_data["timestamps"],raw_data["rawSpectralData"]) + + + print(f"输入数据维度:{raw_data["rawSpectralData"].shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/mean_var_weight_reduction.py b/src/data/choose_frame_spatial/mean_var_weight_reduction.py deleted file mode 100644 index f27edc4..0000000 --- a/src/data/choose_frame_spatial/mean_var_weight_reduction.py +++ /dev/null @@ -1,132 +0,0 @@ -''' -@File : mean.py -@Time : 2024/08/12 13:53:36 -@Author : Zhanpeng Yang -@Version : 0.0.1 -@Contact : zhpyang@outlook.com -@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 -''' - - - - -import datetime -import numpy as np -class ChooseFrameSpatial: - def __init__(self, interval=[-30,30],overexposion_threshold = 1,weight_mean_var = 0.5,space_num= 10, time_num = 9,framerate = 4,timewindow_time = 1,Type=6): - """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 - - Args: - interval (list, optional): 此方法依赖的光谱时间范围,默认[-30,30],即获取前TSC开始时刻前30秒到TSC结束时刻的光谱数据作为此算法输入. - """ - # print(str(self.__class__).split(".")) - self.description=f"{str(self.__class__).split(".")[-2]}_{interval[0]}_{interval[1]}" - - # print(self.description) - - self.overexposion_threshold = overexposion_threshold #过曝阈值:有多少时间点过曝时,判定为这个空间点过曝 - self.weight_mean_var =weight_mean_var #强度/方差选取权重,默认平均光强权重为1,此权重为方差权重,取值为[0,∞) - self.space_num= space_num #选取空间点数量 - self.time_num = time_num #选取时间点数量 - self.framerate = framerate #采样帧率 - self.timewindow = framerate * timewindow_time #选取时间窗口为1s - - - self.interval=interval - self.Type=Type - - def get_data_interval(self, start_time, end_time ): - - return start_time+datetime.timedelta(seconds=self.interval[0]),end_time+datetime.timedelta(seconds=self.interval[1]) - - def Overexposed_spatial_point_detection(self,inputdata, timethreshold): - #判别一个空间点是否过曝,inputdata是(time,wavelength)二维数组,timethreshold是时间阈值,有多少个时间点过曝则认为这个点过曝 - # 如果过曝返回False,不过曝返回True - - row_max_values= np.max(inputdata, axis=1) - overexposed_rows = np.sum(row_max_values >= 4095) - if overexposed_rows > timethreshold: - return False - else: - return True - def run(self,timestamps,rawdata): - # 降维方法:空间上选取强度达到一定的阈值且方差最小的点,时间上把整个时间段划分并选取各段权重最高的1s进行平均,Type=1取前1/3,Type=2取中部1/3,Type=3取最后1/3,Type=4取全部 - - - ############# 空间过曝点去除 ###################### - unoverposed_indices = [i for i in range(rawdata.shape[2]) if self.Overexposed_spatial_point_detection(rawdata[:, :, i],self.overexposion_threshold)] - rawdata = rawdata[:,:,unoverposed_indices] - - ############# 空间数据压缩 ###################### - space_intensity_mean = np.mean(np.sum(rawdata, axis=1), axis=0) - space_intensity_var = np.var(np.sum(rawdata, axis=1), axis=0) - combined_score = (space_intensity_mean - np.min(space_intensity_mean)) / (np.max(space_intensity_mean) - np.min(space_intensity_mean)) \ - - self.weight_mean_var * (space_intensity_var - np.min(space_intensity_var)) / (np.max(space_intensity_var) - np.min(space_intensity_var)) - - sorted_indices = np.argsort(combined_score)[::-1] - space_selected_data = rawdata[:,:,sorted_indices[:self.space_num]] - - ############# 时间数据压缩 ###################### - space_selected_data_intensity = np.sum(space_selected_data,axis=1) - - #按照时间把强度数据拆分成num_time份,每一份维度为(总时间点数/time_num, space_num) - time_length = space_selected_data_intensity.shape[0] - chunk_size = time_length // self.time_num - remainder = time_length % self.time_num - start_index = 0 - - time_selected_data =None - - for i in range(self.time_num): - end_index = start_index + chunk_size + (1 if i < remainder else 0) - chunk = space_selected_data_intensity[start_index:end_index, :] - - window_var = np.empty(0) - window_mean = np.empty(0) - - for j in range(len(chunk)-self.timewindow-1): - window_var = np.concatenate((window_var, np.expand_dims( np.sum(np.var(chunk[j:j+self.timewindow,:], axis=0)),0)), axis=0) - window_mean = np.concatenate((window_mean, np.expand_dims( np.sum(np.mean(chunk[j:j+self.timewindow,:], axis=0)),0)), axis=0) - - combined_score_time = (window_mean - np.min(window_mean)) / (np.max(window_mean) - np.min(window_mean)) \ - - self.weight_mean_var * (window_var - np.min(window_var)) / (np.max(window_var) - np.min(window_var)) - sorted_indices = np.argsort(combined_score_time)[::-1] - - time_window_data = np.mean(space_selected_data[start_index+sorted_indices[0]:start_index+sorted_indices[0]+self.timewindow, :, :],axis=0) - # print(time_selected_data.shape,time_window_data.shape,np.expand_dims( time_window_data,0).shape) - if time_selected_data is None: - - time_selected_data=np.expand_dims( time_window_data,0) - else: - time_selected_data = np.concatenate((time_selected_data, np.expand_dims( time_window_data,0)), axis=0) - - start_index = end_index - - if self.Type == 1: - print("time_selected_data[前1/3].shape: ", time_selected_data[:self.time_num//3,:,:].shape) - return time_selected_data[:self.time_num//3,:,:] - elif self.Type == 2: - print("time_selected_data[中1/3].shape: ", time_selected_data[self.time_num//3:2*self.time_num//3,:,:].shape) - return time_selected_data[self.time_num//3:2*self.time_num//3,:,:] - elif self.Type == 3: - print("time_selected_data[后1/3].shape: ", time_selected_data[2*self.time_num//3:,:,:].shape) - return time_selected_data[2*self.time_num//3:,:,:] - elif self.Type == 4: - print("time_selected_data[前2/3].shape: ", time_selected_data[:2*self.time_num//3,:,:].shape) - return time_selected_data[:2*self.time_num//3,:,:] - elif self.Type == 5: - print("time_selected_data[后2/3].shape: ", time_selected_data[self.time_num//3:,:,:].shape) - return time_selected_data[self.time_num//3:,:,:] - elif self.Type == 6: - print("time_selected_data.shape: ", time_selected_data.shape) - return time_selected_data - else: - print("Type is not 1, 2, 3, 4, 5 or 6 !") - return None - -if __name__ =="__main__": - timpestamp=np.zeros((600,)) - input_data=np.random.random((600,224,512))*4095 - tmp=ChooseFrameSpatial() - output=tmp.run(timpestamp,input_data) - print(f"输入数据维度:{input_data.shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/choose_frame_spatial/meanwovar_CARS.py b/src/data/choose_frame_spatial/meanwovar_CARS.py new file mode 100644 index 0000000..b72bcce --- /dev/null +++ b/src/data/choose_frame_spatial/meanwovar_CARS.py @@ -0,0 +1,78 @@ +''' +@File : mean.py +@Time : 2024/08/12 13:53:36 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 数据第一阶段预处理,从连续光谱数据中选出合适的光谱作为input的特征 +''' + + + +import datetime +import numpy as np +from scipy.signal import savgol_filter + +class ChooseFrameSpatial: + def __init__(self,): + """注意,算法所需要的超参数均从此处传入,作为类成员,其他函数不允许传入超参数 + + Args: + interval (list, optional): 此方法依赖的光谱时间范围,默认[-30,30],即获取前TSC开始时刻前30秒到TSC结束时刻的光谱数据作为此算法输入. + """ + + + + # tmp= + # if len(tmp)>2: + # self.description=f"{str(self.__class__).split(".")[-2]}_{interval[0]}_{interval[1]}" + self.description=f"{str(self.__class__)[8:-2].split(".")[-2]}" + + print(self.description) + # print(self.description) + + + + def run(self,measureStartDatetime,measureEndDatetime,timestamps,rawdata): + ############# 空间数据压缩 ###################### + space_intensity_mean = np.mean(np.sum(rawdata, axis=1), axis=0) + + space_num= 10 #选取方差最小的点的个数 + + light_indices = np.where( (space_intensity_mean > (0.25*np.max(space_intensity_mean)+0.75*np.min(space_intensity_mean))) + & (space_intensity_mean < (0.8*np.max(space_intensity_mean)+0.2*np.min(space_intensity_mean))) + ) + + if light_indices[0].shape[0] < space_num: + print('No Enough Usable Space Point Found!') + return None + + intensity_data = rawdata[:,:,light_indices[0]] + + spatial_variance = np.var(np.sum(intensity_data,axis=1), axis=0) + top_10_indices = np.argsort(spatial_variance)[:space_num] + space_selected_data = intensity_data[:, :, top_10_indices] + + + selected_data=np.mean(np.mean(space_selected_data,axis=0),axis=1) + + selected_data = savgol_filter(selected_data, window_length=11, polyorder=2, axis=0) + + top_features = [106, 127, 118, 0, 52, 23, 113, 77, 157, 175, 195, 24, 218, 108, 8, 211, 2, 136, + 11, 36, 129, 109, 153, 188, 200, 196, 7, 19, 6, 198, 193, 221, 156, 187, 162, 204, + 85, 104, 41, 16, 185, 125, 14, 149, 91, 138, 72, 146, 35, 53, 190, 148, 75, 18, + 17, 96, 167, 192, 201, 31, 158, 183, 32, 40, 123, 145, 161, 27, 209, 216, 101, 51, + 147, 58, 182, 49, 119, 13, 179, 140, 105, 45, 55, 33, 73, 111, 97, 194, 121, 89, + 38, 12, 197, 173, 160, 131, 141, 37, 208, 47] + selected_data=selected_data[top_features] + + + + return selected_data + +if __name__ =="__main__": + timpestamp=np.zeros((600,)) + input_data=np.random.random((56,224,512))*4095 + tmp=ChooseFrameSpatial() + output=tmp.run(0,0,timpestamp,input_data) + print(f"输入数据维度:{input_data.shape}\n输出数据维度:{output.shape}") \ No newline at end of file diff --git a/src/data/features_scaling/__pycache__/do_nothing.cpython-312.pyc b/src/data/features_scaling/__pycache__/do_nothing.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09e3c62cd0c527f6072e6da5111c33e6bb64e278 GIT binary patch literal 1040 zcmZ8fO=uHA6rS1Ln7Fp7w56>yU=%@fXp)8&G)PTR>&aXqg1HRq&L-Kq*^T=Hru0yu z+6uiX_Rvdv5GfV(Xi@4}yjWC}#e*Q}&rO?y(1Z2OZnjz<%r|e||9ms=+S^40@>YtL zpJ9Z)vqK<4OKF}0WgbyfL=;m_!$pomR}kf=5fxTAKdQ*rxkSi$1t&2#rfNzYBnzu5 z-62&~PuA*U?wVTNw`bCsp;Y>8YA_QY9LlCM+2M0yZq%_WhUq63E0SJQ^m6=$q?g6q zb;Yz)L-%T?lj-E3n7e4`wj|r0X`)iALtM^qY|SukC1s;3<}N9g9L&*pzPh#cuyOCn z=7ZJ7+TE=;3tLNb8;diInfr~o#~Y8U%L?-PcX4w2_uk&#SR&-|mTkI+36V9)vIucQR!viF_Xq`^ zOtoeMzixL)lggy5*rco(HzkdzddWy&SFjYVWU7tjwrOUk^ocaYwY-<@HDy< z{SZrijt#HJhF{1ZV%e9O_1MV!{tL6h$4K9olc#5ePmvQ%Muhw?t88Y*Q1cMLJetG> zl)(9Al;F&Eu(;u1RF)Kvo2;?Rn~pxw`QK<@>jI}02AD$MI(lZK=26h|z%5)zm#PY| zDiQ*uI2zO22)XS@n(r|?AlB?=(8++MdfiO+WFD}AtTupYw8L?sGfj~T_4u*@WBii~W1-oDaMv#`f_rxmfY;;? DBasHW literal 0 HcmV?d00001 diff --git a/src/data/features_scaling/__pycache__/frame_max_min.cpython-312.pyc b/src/data/features_scaling/__pycache__/frame_max_min.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76cd4f3d42ae7772fae58d8a1528eafe7fae0cb2 GIT binary patch literal 1194 zcmah|%}*0S6rb7M23(V#>!&>A{49_-3~Y>cyALZ)V=ty!pME_l=D_ z0y%don5|-jzR`^@Ty0U^1B)>vqcoDS%xE~xFz5u5*%2gjlZ@w;W{ZpvD7=9kpXyUJ zB?{65BPU)aIaQD4i+t*|np@W=6UnZ4;y}DJ8SU)qNhEu^5Avy_1$)piy}Ljv2ZwO17&i4d#pBmof^rW*8S@l9A(6y^1CIbyTKi zmS*o(ZaiJQJyV&zzVvQ<>B*gi$G0lCZdUHxU${5B_;!L{rkClLyeoMKthiEbgJ}#Y zD2;&$M#dv3&B_cQS7Zgw3HB+XT`-jaNz_z5EBSFWn5~Vf=H{^1ZNq4&#vgU_WwK@; z3iz6hS;oFqW_Eyj91SBv^SdImu=D$dib&x4S8;6l*V@`zpAc|Z%Ql@&gh-lbS%f$N zE3c`xvsnh7Of_!1ahl3A*ImY+{JuD5KgmV0N2o$ z$j-;yv*459TqOQE(p`>pPfK%=p4Z88`^)0%JBp zLZ{AWh!z@+n)I>`YZmm7%Q6rTO@Pd1H{I3YiZ6Vj@He5mbMiPVIu7?HN>fpAEKh%cjMcjIib-Zisp z(Y0KuNP!wDm0A^Hpa+rzDp3!uI8lMZ8F9fB32Vgx3As&gB|aeKKzOtE+NlXo+Ba|B z-^_g9%>LBVBOs8`w?dgZi_o8R;tAh0>ZiddBN?TU%*d?9q*)eSLNd33Wd0shQAu+) z8};SC15-#{R5fK1WDdTpcm-!wJ(jbD)RLNQ`ICv{bUbl3K9!uDnx0D}=VoSw)a!X` z)gX<+^s1=m6g@M!Eb18{^|nGx)zICZiC7{wC8XXkbW4;h*Hu`}*^rkq@|I>8>oLj5 z3aNR;l)Qb^?myhyyu z$!f)lXswANP4nblGrs2YJnd0tyr|5Mg8L3CB24?cB6BeF*aMDW*6JbqVCMHSZ4~t{ z95At`{~R73E~$quCH~joNcGgqbUaxNkCph%u4;Io#NX(uLp_p?GSg!48;F!q zo5ADwE4Dbg${xvV@j|;d4m`wGzoA5RyIdDY#OR=(yi0yMEYoAZwK#% zi4HP4;sa1*UIGQ2^o7g*TmGBx-|j61o|{y=!#AhPv$tk{>5f!~#!K^?m;d9sarqgC zPK=W<^g5oNcOX(mMF!vjt5XW_%RBzb{eOO;8=?rMt`9%U@gf?H#-F(%DMiY$;If z9V&&~gDyn<^xo@YR)NPTU<~gkuTlF1#_#1ttq~#p(3j8^$T+ASn+_A|r#2$Wm;FZcU>8UO$Q literal 0 HcmV?d00001 diff --git a/src/data/features_scaling/__pycache__/standardization.cpython-312.pyc b/src/data/features_scaling/__pycache__/standardization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84f35900f3626d7a3316d9331b3c60ab45ca4f82 GIT binary patch literal 3846 zcmcH*ZEO@(aQANau6L#1_3l~=Jy0}Pp*@NO+7hT*(PFGt8cENP&2qcFyR!Ge?jG%( zCE*8=YA|}yc&(7MO-!IcgCr!hYCwO9@rT`%q|F8s68VgOuuU}Khd8r)yGPFh{lnyL z_RY+jH}B2Ndow?ml{pCj`%!PG-a!z*Az=WO8!Y=_kR>=GKyV~y5y*fAzE;5+r2-U5 zScq2$jyg(k^cjn;5uj5RpDp<5rOvtw8KBj4l~h6tT~Z#cJ@W0Cco9+`h1Ij zTg#?a@22P5{hQl2Z*z9Okd(u5QCAFv*;s;)g}nW2EadEbn-`@>Jf^MbZ}vBDa(2EH zkI8IM)>!FqA_en0<4IYF#}70I<56d47cT{kecU)Zd;8)iH;#RCbMn}&uP)xcbmI2Z zaTwn?_TG)-pWXcQ;;rwdopx3k8jpa6xPCzx=1Rs%*+eh{2p^ahd^V za`Hji;N-s?X-GLs9k5RjBLsu%Yrt87GNKD{16I401*X`3ZC6 zGDgN825}1oa3)8?8fAtkMfm8Rdt~#Szwh6_-zVakedVfEl10_QFhPNpB!*FGDIr8; zwUmQ^ijjm2iJ;aF2yBQ6@iG$<;`>>FiNprtDuv00m>pJWCd#u&9AZVlv1C+4f2bu4 zGXNP1>z1(lrMQrkdDWqD5)k!ih`z)N0}}e$dv8|@$I5KWo|j+WvoRXycwwU~vXK}B zvPBYux^O@bu*67uh+8C?jd85VMba$p6_Ou~f|krMY4^%}Xf^~DU{B%!9U-nZG)_{d z%BRZb8(J0`wkQo-z7Ebew9m9E4LjzZ+&M~JDe)|>dj`L27pq;P)NzOLsc{44G^pwt zlwGe|eKLGv%|e}Dsq@d*wT@CBmM?=LBKTmA;TZk190rsnMo5mtAVRbN>i|40w}6+= zRCpJ1^UT#?9+l&n83;T&$20Q~qHpUF%VBX5PcK#?szOu^XqtYg$Y&AVFar`pW@TPN z!(OlGK~{wjXE{dWGh8Gnr`HttyFlImN>Ur3BgA60d(4*EsJk;itnrMwGdpy*=DEh) z4;^AXXs@`|c$qX;9)bb5k{E<Ch>a1hhG4u4LrrD;;UB7hy)IGP4UD!9E zz^@`SR}#{FVI~6T;%PKr^uY+zYS2s_kmNu%HHw|ycvKFP#r<&PciBv{!6F_3n_9^; ztm)rJ+x(I=N*iZMg94Cqety-~B=`=6KB`6_b zHo@zaW@Y_R%W{RC(Wa<&Xe}bqWE2MMFb<}3Rmh&3x3Ufuv?l+mLk05nuwN+wyN1WM z(w!~Ol%EtQ(&Op*%BE5KVrj)kZSS|um9D<(@?^uA@YvAA;qk-gTQ9VqYro>!F_ThU zua0&d?=CLZB<{NGaAtVYeyVJ$?EH%tcAwjQ#k~_GyC1Hz=yId#V&gMtZM5sksx{ZE zYqL$6rm?<>{_+0Fq50|-ESFb{axpusCk~&qHJT$lWorS%u0>3v{W&8d)?!cUoSP%{=@Z%0C+lLg_ z5bo<*ttZ=^=^m3OQsb%9Rpaj}wa=aII~$k|C^cJWs%E+_bB(d>3~isSH8^x1Ys~s76$~DEJ#Ke6#`zd2c9U@Dge+ki$-2#lry}( zP|`>!)?|;5(z_o_186`Gin87aZ2)>jURlLtEL~ktvw_!v6McY=5O-}vb=_F)ME!WZ zQuWjqZ=KpZwHN%-KixmSeydWsb<}>X(mh(C)#@H!3HGKjHi`|glVPCAO=`{YDuy|j zWCdL!V(TWZLF7f$jA#>}X+m2@Yb!*!$ApIPL6WfjI7%#89Q5X;dYbkwts?0#xn!%N z8<#iGv`1G&9iL6LGYl6GVy_Q0: + self._var=((self._num-1)/(self._num))*self._var+(1/(self._num+1))*(feature-self._mean)*(feature-self._mean) + #要先更新var再更新mean + self._mean=(self._num/(self._num+1))*self._mean+(1/(self._num+1))*feature + self._num =self._num+1 + + # print("num:",self._num,"_mean:",self._mean,"_var:",self._var,"_min:",self._min,"_max:",self._max) + + + def run(self,feature): + # for i in range(label.shape[0]): + # label[i]=(label[i]-self._min[i])/(self._max[i]-self._min[i]) + feature=(feature-self._mean)/np.sqrt(self._var) + + return feature + # def reverse(self,feature): + # # for i in range(labels.shape[1]): + # # labels[:,i]=labels[:,i]*(self._max[i]-self._min[i])+self._min[i] + + # labels=labels*np.sqrt(self._var)+self._mean + # return labels + + \ No newline at end of file diff --git a/src/data/generate_raw_dataset_from_MySQL.py b/src/data/generate_raw_dataset_from_MySQL.py new file mode 100644 index 0000000..9d61979 --- /dev/null +++ b/src/data/generate_raw_dataset_from_MySQL.py @@ -0,0 +1,202 @@ +import os +import shutil +from pathlib import Path +import pymysql.cursors + +import numpy as np +import gzip +import msgpack + +# oldRawDatasetDir= + +def getLatestData(dataNum,deviceId='DOJHBG',measureType="TSC"): + connection = pymysql.connect(host='localhost', + user='root', + password='Ghs@2211', + database='SEMS', + cursorclass=pymysql.cursors.DictCursor) + with connection: + with connection.cursor() as cursor: + # 定义 SQL 查询 + sql = """ + SELECT + f.furnaceNumber, + f.measureStartDatetime, + f.measureEndDatetime, + f.Temperature, + f.C, + f.P, + f.S, + f.Mn, + f.Ni, + f.Mo, + f.Cr, + ot.spectralData, + ot.spectralDim, + ot.spatialDim + FROM + furnace f + JOIN + online_test ot ON f.furnaceId = ot.furnaceId + WHERE + f.deviceId = %s AND + f.measureType = %s + ORDER BY + f.furnaceID DESC + LIMIT %s; + """ + values=(deviceId,measureType,dataNum) + cursor.execute(sql,values) + result = cursor.fetchall() + return result + + + + + +def getMetadataFromMySQL(deviceId='DOJHBG',measureType="TSC"): + connection = pymysql.connect(host='localhost', + user='root', + password='Ghs@2211', + database='SEMS', + cursorclass=pymysql.cursors.DictCursor) + dataNum=0 + with connection: + with connection.cursor() as cursor: + # 定义 SQL 查询 + sql = """ + SELECT + COUNT(*) + FROM + furnace as f + WHERE + f.deviceId = %s AND + f.measureType = %s + """ + values=(deviceId,measureType) + + cursor.execute(sql,values) + result = cursor.fetchall() + dataNum=result[0]["COUNT(*)"] + + sql = """ + SELECT furnaceNumber + FROM furnace as f + WHERE + f.deviceId = %s AND + f.measureType = %s + ORDER BY furnaceId DESC + LIMIT 1; + """ + values=(deviceId,measureType) + + cursor.execute(sql,values) + result = cursor.fetchall() + lastesFurnaceNumber=result[0]["furnaceNumber"] + return dataNum,lastesFurnaceNumber + + +def saveDataset(caches, datasetDir,datasetType="train"): + os.makedirs(datasetDir/datasetType) + + for cache in caches: + print(type(cache)) + if isinstance(cache,str): + oldRawDatasetDir=Path("/data/SEMS-model-training/old_rawdata/dataset") + shutil.copy(oldRawDatasetDir/cache, datasetDir/datasetType) + if isinstance(cache,dict): + print(cache.keys()) + spectralData=msgpack.unpackb(gzip.decompress(cache["spectralData"])) + # spectralData=msgpack.unpack(gzip.decompress(spectralDatawithTime["buffer"])) + spectralDataNumpy=np.frombuffer(spectralData["buffer"],dtype=np.uint16).reshape(int(len(spectralData["buffer"])/cache["spectralDim"]/cache["spatialDim"]/2),cache["spectralDim"],cache["spatialDim"]) + + np.savez(datasetDir/datasetType/f"{cache["furnaceNumber"]}.npz",furnaceNumber=cache["furnaceNumber"],measureStartDatetime=cache["measureStartDatetime"],measureEndDatetime=cache["measureEndDatetime"],timestamps=spectralData["timestamps"],rawSpectralData=spectralDataNumpy,rawLabels=np.array([cache["Temperature"],cache["C"],cache["P"],cache["S"],cache["Mn"],cache["Ni"],cache["Mo"],cache["Cr"],]),labelNames=["Temperature","C","P","S","Mn","Ni","Mo","Cr"]) + + + + # break + + + +def generateRawDataset(datasetDir="/data/SEMS-model-training/dataset/raw_dataset",datasetNum=1000,validationRate=0.2,testRate=0.1): + + datasetDir=Path(datasetDir) + + + oldRawDatasetDir=Path("/data/SEMS-model-training/old_rawdata/dataset") + oldRawDatasetFileNames=os.listdir(oldRawDatasetDir) + oldRawDatasetFileNames.sort() + oldRawDatasetNum=len(oldRawDatasetFileNames) + # latestFurnaceID=oldRawDatasetFileNames[-1] + + DBdataNum,lastesFurnaceNumber=getMetadataFromMySQL(deviceId='DOJHBG',measureType="TSC") + + + + + if os.path.exists(datasetDir): + #如果存在数据集,则判断是否为最新 + if os.path.exists(datasetDir/"validation_set"): + #"通过判断文件中的最新炉次号与已存在的数据集中的一不一样 + valNames=os.listdir(datasetDir/"validation_set") + valNames.sort() + latestDatasetFurnaceID=valNames[-1] + if lastesFurnaceNumber==latestDatasetFurnaceID: + "如果数据集已经是最新的,那就直接返回,不进行任何操作" + return None + else: + shutil.rmtree(datasetDir) + else: + shutil.rmtree(datasetDir) + os.makedirs(datasetDir) + + chooseDBdataNum=0 + chooseOLDRawDataNum=0 + if (DBdataNum+oldRawDatasetNum)>datasetNum: + if DBdataNum>=datasetNum: + chooseDBdataNum=datasetNum + chooseOLDRawDataNum=0 + else: + chooseDBdataNum=DBdataNum + chooseOLDRawDataNum=datasetNum-DBdataNum + else: + chooseDBdataNum=DBdataNum + chooseOLDRawDataNum=oldRawDatasetNum + + print(f"旧数据数量:{chooseOLDRawDataNum}, 新数据数量{chooseDBdataNum}") + + if chooseDBdataNum>0: + # + DBDataset=getLatestData(chooseDBdataNum,deviceId='DOJHBG',measureType="TSC") + else: + DBDataset=[] + + rawDatasetCache=oldRawDatasetFileNames[-chooseOLDRawDataNum:]+DBDataset + print(rawDatasetCache[-1].keys()) + datasetNum=len(rawDatasetCache) + valNum=int(datasetNum*validationRate) + testNum=int(datasetNum*testRate) + trainNum=datasetNum-valNum-testNum + + saveDataset(rawDatasetCache[:trainNum], datasetDir,datasetType="training") + saveDataset(rawDatasetCache[-testNum-valNum:-testNum], datasetDir,datasetType="validation") + + saveDataset(rawDatasetCache[-testNum:], datasetDir,datasetType="test") + + + + + + + + + +if __name__ =="__main__": + tmp=generateRawDataset() + # getMetadataFromMySQL() + + # getLatestData(dataNum=1) + + + + diff --git a/src/data/labels_scaling/__pycache__/max_min.cpython-312.pyc b/src/data/labels_scaling/__pycache__/max_min.cpython-312.pyc index 100ec288772159d137ba1f1f03db9ae1e3314ae6..3e6a76ee16fccd9ff4a7ac9f830165a354843ddb 100644 GIT binary patch delta 91 zcmca1d`_76G%qg~0}!;ml1x9dkvE1(%0@pWu_RGH*wr^!H#a{eHAlCkC^0iHGcSE} sFHxDD)yOcau#PIuNnd_$P`G%qg~0}wo3xHs*{M&1}E6*v8i{M=Oi#FX63JbfbrBNGD)12bK}#5~s` zT~l2P-ITJLuXO77UoQb@$jAEY{fJBiH&|Cmb CKOJTO diff --git a/src/data/labels_scaling/__pycache__/standardization.cpython-312.pyc b/src/data/labels_scaling/__pycache__/standardization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98e6eb917f6b9fdd29f367fe0313538022148a45 GIT binary patch literal 4017 zcmb_fU1%Fu7M_u2^k>I*Y)7(OwUI*`Du>vLowD)IX1%Pp#oJw{!EV8VGCS6crEy0; zof!p7b-@pzQJ0ifY+2b&zz!@VB3;rfh1lICX`h<159`4Jn;{EKiq^czWK6fu>0qbHh5ct%EfIcl!s?v=HB*Z+9;%xB-O zocZoA*YDjpckjYksNX&F&fT+r`u4-?-~GMhxsRI}50M)63aiAU#?}l z#wk_;n#?35#6xp2%nVp5jO${j<&>CFc+IVI(s1%6umG7CXJo`W_+Ye`V->b{;@7{M zIGjjvym(lVSRn}q*(*y?Lnz|$%8YCraIdVeNsg5`A;;oM!TfPb&`}sBw{N#C^qA~s zm`mn?ED*N?`&X!oO&6Lr0==7o7u3KDpF}qTgR6aN;Q0DezsOU!Tpe2n4&t|KtJRyQ z&brM{n-5wZv(?&$vTyuPpO2q=X44;5{oxINU!HoeX$K}EqPy)2>xj)x97vIvCpi)i z5KaoHQGlo074Rxn3Vxy7&l(N-QDvTGeZZs3Jj({*sa&^jvJs>xMtt?+)^iKS6yhlMmmtEpaMV=v+%7g^)EMYEpRFtz_qSs71z3y6)s1f zrzePzZCt{w83b>pVaf(Xo?Ynps?ezj4xmmq5CF#aR54q~UZSqJOYYTYZ*;G9-yHqt z*jHogud|!4Tgj;*_51`?JLfG+MuM*?eTQnQxXfTUa$OQYW2O=t2p3EFi~FpTtV z8(>rijfKA|mI2P{Vb7e^_|DrNKl{fvGr)279H@ZdTWfeyBg2^1nLoiXE%*Hd$FvaU zNLY0x2vjg`uf`W!a59Z`xTJ~fBQUkXNkzB*8PD$?$wCtPq7SJl9Qnp5+zR#)QYr^F zQeT#9&?V@hYDx}*C}Q!AN`Ry~B>uFJh+tn7o6e;C^82rRpo+$&v64J+J5;ew$0 z!k;{QEp#=cdWTj|eolSq{=$8;>x+gp=4NL7)v3)_85Mpf7(jCIxVRaYRQOFx z>aeVOWn9ah_KxCMVQf)Z&Msvy?Ol3HZ9jBr@=Bx>QQLmDx_5Q-#@O1}=j__cYx{3L zrye_@ww=%;Yn!h)P#9P|Sr~%cq#RPMumHH@wbla(2Net?V0KWeEqIxBxjWRVDWGRL z@QTO`GI&uj&`b~|ay&#C`N(t757NOmB}Gq7%BhER2-sDLhh54fb1hZj6gbDRTL{Pk z@x6m++4sR@F;a-A&HMk{deL{mcd7k~zvSQO>Q|fk^YopTefd3)38UujD{q8)F9g_Q zot;~IL}ybM1-#nE?xkL+!v1mV5ar zk#G?$-VdjXu|jN-TNajt4e#@6^YZ{kXQ@+d?$0~6noT6eLvBo+lWYRBbq~Wp2AM%UwWD6Wca7-M`&Q)4}aV zl8%$xuD$gB?J!MycN&2h+NcylWvGU{6Ff`Ha$(W*jZzoCe2b+_mcDw}H_AjF2Kei##yG;Pmwf_gHS`?Q6 literal 0 HcmV?d00001 diff --git a/src/data/labels_scaling/max_min.py b/src/data/labels_scaling/max_min.py index 181bd4b..48be106 100644 --- a/src/data/labels_scaling/max_min.py +++ b/src/data/labels_scaling/max_min.py @@ -33,6 +33,7 @@ class LabelScaling: self._min[i]=label[i] def run(self,label): + for i in range(label.shape[0]): label[i]=(label[i]-self._min[i])/(self._max[i]-self._min[i]) return label @@ -41,5 +42,4 @@ class LabelScaling: labels[:,i]=labels[:,i]*(self._max[i]-self._min[i])+self._min[i] return labels - \ No newline at end of file diff --git a/src/data/labels_scaling/standardization.py b/src/data/labels_scaling/standardization.py new file mode 100644 index 0000000..260a005 --- /dev/null +++ b/src/data/labels_scaling/standardization.py @@ -0,0 +1,75 @@ +''' +@File : max_min.py +@Time : 2024/08/12 14:03:38 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 对输出标签进行标准化放缩 +''' + + + +import numpy as np +from pathlib import Path +class LabelScaling: + def __init__(self): + self.description=f"{str(self.__class__).split(".")[-2]}" + self.flag_get_global_info=True + self._min=None + self._max=None + self._mean=None + self._var=None + self._num=0 + + self.file_path = Path(__file__).absolute() + + + + def load_state_dict(self,state): + self._min=state["_min"] + self._max=state["_max"] + self._mean=state["_mean"] + self._var=state["_var"] + self._num=state["_num"] + def state_dict(self): + return {"_min":self._min,"_max":self._max,"_mean":self._mean,"_var":self._var,"_num":self._num} + + + def get_global_info(self,label): + label=label.astype(np.float32) + label_dim=label.shape[0] + if self._max is None: + self._min=np.zeros((label_dim),dtype=np.float32)+999999999 + self._max=np.zeros((label_dim),dtype=np.float32) + self._mean=np.zeros((label_dim),dtype=np.float32) + self._var=np.zeros((label_dim),dtype=np.float32) + self._num=0 + + self._min=np.minimum(self._min,label) + self._max=np.maximum(self._max,label) + + if self._num>0: + self._var=((self._num-1)/(self._num))*self._var+(1/(self._num+1))*(label-self._mean)*(label-self._mean) + #要先更新var再更新mean + self._mean=(self._num/(self._num+1))*self._mean+(1/(self._num+1))*label + self._num =self._num+1 + + # print("num:",self._num,"_mean:",self._mean,"_var:",self._var,"_min:",self._min,"_max:",self._max) + # print("_var:",self._var) + + + + def run(self,label): + # for i in range(label.shape[0]): + # label[i]=(label[i]-self._min[i])/(self._max[i]-self._min[i]) + label=(label-self._mean)/np.sqrt(self._var) + + return label + def reverse(self,labels): + # for i in range(labels.shape[1]): + # labels[:,i]=labels[:,i]*(self._max[i]-self._min[i])+self._min[i] + + labels=labels*np.sqrt(self._var)+self._mean + return labels + + \ No newline at end of file diff --git a/src/data/pre_process.py b/src/data/pre_process.py index 28287bf..95fea00 100644 --- a/src/data/pre_process.py +++ b/src/data/pre_process.py @@ -14,6 +14,8 @@ import numpy as np import datetime import pickle import h5py +from pathlib import Path +from tqdm import tqdm class DataPreProcess: """ @@ -23,274 +25,85 @@ class DataPreProcess: 3. 将feature 与 Label进行放缩 """ - def __init__(self, raw_spectral_data_dir, raw_labels_dir,labels_name,dataset_dir,choose_frame_spatial, features_scaling,labels_scaling,train_ratio=0.8,validate_ratio=0.1) -> None: + def __init__(self, dataset_dir,choose_frame_spatial, features_scaling,labels_scaling) -> None: """初始化类,传入所有数据预处理需要的参数 - - Args: - raw_spectral_data_dir (_type_):原始光谱数据路径 - raw_labels_dir (_type_): 原始标签路径 - labels_name (_type_): 提取出的标签名 - dataset_dir (_type_): 生成的数据集文件夹 - choose_frame_spatial (_type_): 类对象,进行光谱特征挑选 - features_scaling (_type_): 类对象,对输入的特征进行放缩,使之直接输入神经网络 - labels_scaling (_type_): 类对象,对输出的的标签进行放缩 - train_ratio (float, optional): 训练集比例. Defaults to 0.8. - validate_ratio (float, optional): 验证集比例,剩余的即为测试集. Defaults to 0.1. """ - self.raw_spectral_data_dir=raw_spectral_data_dir - self.raw_labels_dir=raw_labels_dir - self.dataset_dir=dataset_dir - self.labels_name=labels_name + + self.dataset_dir=Path(dataset_dir) + self.choose_frame_spatial=choose_frame_spatial self.features_scaling=features_scaling self.labels_scaling=labels_scaling - self.train_ratio=train_ratio - self.validate_ratio=validate_ratio - - #加载原始的标签 - self.raw_labels=self._load_raw_labels(raw_labels_dir,labels_name) - - #加载原始光谱的缓存 - self.raw_spectral_data_cache=self._load_raw_spectral_data_cache(raw_spectral_data_dir) - - #随便加载一个csv文件,判断光谱数据的维度 - self.spectral_dim,self.spatial_dim= self._get_spectral_spatial_dim(self.raw_spectral_data_cache) - - #正式开始原始数据转化为数据集 - self._raw_data_2_dataset() - - - - def _load_raw_labels(self,raw_labels_dir:str,labels_name:list)->pd.DataFrame: - """读取利用脚本处理后的钢厂给的excel文件所在文件夹(会扫描所有文件) - 并选择指定的label名作为output - 并去除值为NaN或0的行 - - Args: - raw_labels_dir (str): 利用脚本处理后的钢厂给的excel路径 - labels_name (list): 指定的作为Label的列 - - Returns: - pd.DataFrame: 返回所有筛选后的炉次数据 - """ - - raw_labels=None - for name in os.listdir(raw_labels_dir): - tmp_raw_labels=pd.read_excel(os.path.join(raw_labels_dir,name)) - - choosed_column=["TSC_start_time","TSC_end_time"] - choosed_column=choosed_column+labels_name + self.labelNames=None + # _raw_data_2_dataset(self) + self.run() - #只选出我们想要的部分作为标签 - tmp_raw_labels=tmp_raw_labels.loc[:,choosed_column] + def _raw_data_2_pre_dataset(self,pre_dataset_save_dir,dataset_type="test"): - # 选出有NULL的行 - null_rows=tmp_raw_labels.isnull().any(axis=1) - # # 选出有0的行 - zeros_rows=(tmp_raw_labels==0).any(axis=1) | (tmp_raw_labels=='0').any(axis=1) + rawdata_names= os.listdir(self.dataset_dir/"raw_dataset"/dataset_type) + if not os.path.exists(pre_dataset_save_dir/dataset_type): + os.makedirs(pre_dataset_save_dir/dataset_type) + print(f"[预处理]: {dataset_type}数据集选择特征时间与空间点 ") + for name in tqdm(rawdata_names): + + tmp_data=np.load(self.dataset_dir/"raw_dataset"/dataset_type/name,allow_pickle=True) - # # 每一行但凡有NULL或者0都给删了 - selected_rows=~(null_rows|zeros_rows) - tmp_raw_labels=tmp_raw_labels[selected_rows] + raw_spectral_data=self.choose_frame_spatial.run(tmp_data["measureStartDatetime"],tmp_data["measureEndDatetime"],tmp_data["timestamps"],tmp_data["rawSpectralData"]) - if raw_labels is None: - raw_labels=tmp_raw_labels - else: - raw_labels=pd.concat([raw_labels,tmp_raw_labels],axis=0) - logging.debug(f"Reading raw label excel file:{name}, which has {tmp_raw_labels.shape[0]} furnaces") - logging.debug(f"Readed raw label excel files, which has {raw_labels.shape[0]} furnaces in total") - - return raw_labels - def _load_raw_spectral_data_cache(self,raw_spectral_data_dir:str)->list: - """生成所有原始光谱数据文件的缓存,包括每个文件记录的开始及结束时间,目的为加快后面读取原始数据的速度 + flag_data_augmentation=False + ################################ + #此段代码是为数据增强,raw_spectral_data是二维数据,代表多个光谱曲线,使每个光谱曲线都对应相同的label。 - Args: - raw_spectral_data_dir (str): 原始光谱所在路径 + if flag_data_augmentation: - Returns: - list: 缓存,其中有多少个成员,就有多少个原始数据文件,每个成员包括格式为datetime的开始及结束时间,及文件路径 - """ - spectral_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.csv")) - cache_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.pkl")) + if raw_spectral_data is not None: - update_flag=False + for i in range(raw_spectral_data.shape[0]): + np.savez(pre_dataset_save_dir/dataset_type/f"{name[:-4]}_{i}.npz",furnaceNumber=tmp_data["furnaceNumber"],measureStartDatetime=tmp_data["measureStartDatetime"],measureEndDatetime=tmp_data["measureEndDatetime"],timestamps=tmp_data["timestamps"],rawSpectralData=raw_spectral_data[i,...],rawLabels=tmp_data["rawLabels"],labelNames=tmp_data["labelNames"]) - #不存在缓存文件,以及缓存文件中数据文件个数与文件夹中的文件个数不一致,均重新生成 - if len(cache_file_paths)==0: - logging.debug(f"Raw spectral data cache is not existed! Generating") - update_flag=True - elif len(cache_file_paths)==1: - with open(cache_file_paths[0],"rb") as f: - raw_spectral_data_cache=pickle.load(f) - if len(raw_spectral_data_cache) !=len(spectral_file_paths): - logging.debug(f"Raw spectral data cache is out of date! Regenerating, cache file number:{len(raw_spectral_data_cache)}, spectral data file number: {len(spectral_file_paths)}") - update_flag=True - else: - logging.error(f"More the one 'raw_spectral_data_cache.pkl' file is existed in {raw_spectral_data_dir}") - if update_flag: - spectral_file_paths.sort() - raw_spectral_data_cache=[] - for file in spectral_file_paths: - tmp_info={} - tmp_data=np.loadtxt(file, delimiter=",") - start_t=datetime.datetime.fromtimestamp(tmp_data[0,0]/1000)+datetime.timedelta(microseconds=tmp_data[0,0]%1000) - end_t=datetime.datetime.fromtimestamp(tmp_data[-1,0]/1000)+datetime.timedelta(microseconds=tmp_data[-1,0]%1000) - tmp_info["start_t"]=start_t - tmp_info["end_t"]=end_t - tmp_info["file_path"]=file - raw_spectral_data_cache.append(tmp_info) - with open(os.path.join(raw_spectral_data_dir,f"raw_spectral_data_cache.pkl"),"wb") as f: - pickle.dump(raw_spectral_data_cache,f) - return raw_spectral_data_cache - def _get_spectral_spatial_dim(self,raw_spectral_data_cache): - data=np.loadtxt(raw_spectral_data_cache[0]["file_path"], delimiter=",").astype(np.uint64) - if data[0,2]==229376: - spectral_dim =224 - spatial_dim=512 - if data[0,2]==917504: - spectral_dim =448 - spatial_dim=1024 - return spectral_dim, spatial_dim - - def _read_spectral_data(self,start_time:datetime.datetime,end_time:datetime.datetime)->np.ndarray: - """获取从start_time到end_time的光谱数据 - Args: - start_time (datetime.datetime): 开始时间 - end_time (datetime.datetime): 结束时间 - - Returns: - np.ndarray: 原始光谱数据 - """ - - - def get_spectral_data_per_file(file_path,s_t,e_t): - data=np.loadtxt(file_path, delimiter=",").astype(np.uint64) - - if s_t is not None: - tmp_s=datetime.datetime.timestamp(s_t)*1000 - tmp_s_index=0 - for i in range(data.shape[0]-1): - if data[i,0]<=tmp_s and data[i+1,0]>=tmp_s: - tmp_s_index=i - break - else: - tmp_s_index=0 - if e_t is not None: - tmp_e=datetime.datetime.timestamp(e_t)*1000 - tmp_e_index=data.shape[0] - for i in range(tmp_s_index,data.shape[0]-1): - if data[i,0]<=tmp_e and data[i+1,0]>=tmp_e: - tmp_e_index=i - break - else: - tmp_e_index=data.shape[0] - - with open(file_path[:-3]+"bin", "rb") as f: - f.seek(data[tmp_s_index,1]) - d=f.read(np.uint64((tmp_e_index-tmp_s_index)*data[tmp_s_index,2])) - d=np.frombuffer(d, dtype=np.uint16).reshape(tmp_e_index-tmp_s_index,self.spectral_dim,self.spatial_dim) - return data[tmp_s_index:tmp_e_index,0],d - - timestamps=None - raw_spectral_data=None - for tmp_info in self.raw_spectral_data_cache: - tmp_data=None - if start_timetmp_info["end_t"] and end_timetmp_info["end_t"]: - # 目标时间段完全包含此文件时间段。所以取从文件开始到结束的数据 - tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,None,None) - elif start_time>tmp_info["start_t"] and end_timetmp_info["start_t"] and start_timetmp_info["end_t"]: - # 目标时间段在交于此文件时间段的右侧。所以取从文件start_time到文件结束的数据 - tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,start_time,None) - if tmp_data is not None: - if raw_spectral_data is None: - timestamps=tmp_time_stamp - raw_spectral_data=tmp_data + ############################### else: - timestamps=np.concatenate((timestamps,tmp_time_stamp),axis=0) - raw_spectral_data=np.concatenate((raw_spectral_data,tmp_data),axis=0) - return timestamps,raw_spectral_data - - def _raw_data_2_dataset(self): + if raw_spectral_data is not None: + np.savez(pre_dataset_save_dir/dataset_type/name,furnaceNumber=tmp_data["furnaceNumber"],measureStartDatetime=tmp_data["measureStartDatetime"],measureEndDatetime=tmp_data["measureEndDatetime"],timestamps=tmp_data["timestamps"],rawSpectralData=raw_spectral_data,rawLabels=tmp_data["rawLabels"],labelNames=tmp_data["labelNames"]) - save_dir=os.path.join(self.dataset_dir,self.choose_frame_spatial.description) + ##################### - #第一步,进行特征时间与空间点选取,并保存 + def _pre_dataset_2_dataset(self,pre_dataset_save_dir,dataset_save_dir,dataset_type="test",savaFlag=True): + + print(f"[预处理]: {dataset_type}数据集进行数据预处理") + pre_dataset_files=os.listdir(pre_dataset_save_dir/dataset_type) + - pre_dataset_save_dir=os.path.join(save_dir,"data") - if not os.path.exists(pre_dataset_save_dir): - os.makedirs(pre_dataset_save_dir) - - #数据路径不存在就重新生成,如果存在,就跳过这部分。 - # !!!!!!!!!!因此需要额外注意,如果有新数据,需要把dataset下的文件夹都删除,相当于删除缓存,重新生成 - for i in range(self.raw_labels.shape[0]): - - start_time,end_time=self.choose_frame_spatial.get_data_interval(self.raw_labels.iloc[i]["TSC_start_time"],self.raw_labels.iloc[i]["TSC_end_time"]) - timestamps,raw_spectral_data=self._read_spectral_data(start_time,end_time) - - if raw_spectral_data is not None: - #获取到的数据帧率大于2才开始记录 - if raw_spectral_data.shape[0]>2*(end_time-start_time).total_seconds(): - logging.debug(f"PreProcess Stage 1: [{i+1}/{self.raw_labels.shape[0]}] with {timestamps.shape[0]} frames") - raw_spectral_data=self.choose_frame_spatial.run(timestamps,raw_spectral_data) - np.savez(os.path.join(pre_dataset_save_dir,f"{timestamps[0]}_{timestamps.shape[0]}.npz",),timestamps=timestamps,raw_spectral_data=raw_spectral_data,raw_labels=self.raw_labels.iloc[i][self.labels_name].to_numpy()) - else: - logging.info(f"Pre Dataset is existed in {pre_dataset_save_dir}") - - #第二步,进行标准化,并形成数据集 - - self.dataset_save_dir=os.path.join(save_dir,f"{self.features_scaling.description}_{self.labels_scaling.description}") - - if not os.path.exists(self.dataset_save_dir): - os.makedirs(self.dataset_save_dir) - pre_dataset_files=os.listdir(pre_dataset_save_dir) - - if self.features_scaling.flag_get_global_info: + if dataset_type=="training" and (self.features_scaling.flag_get_global_info or self.labels_scaling.flag_get_global_info): for name in pre_dataset_files: - tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True) - self.features_scaling.get_global_info(tmp_data["raw_spectral_data"]) + tmp_data=np.load(os.path.join(pre_dataset_save_dir,dataset_type,name),allow_pickle=True) + if self.labelNames is None: + self.labelNames=tmp_data["labelNames"] + if self.features_scaling.flag_get_global_info: + self.features_scaling.get_global_info(tmp_data["rawSpectralData"]) if self.labels_scaling.flag_get_global_info: - self.labels_scaling.get_global_info(tmp_data["raw_labels"]) - - # 获取维度信息 - tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True) - feature_dim=tmp_data["raw_spectral_data"].shape - label_dim=tmp_data["raw_labels"].shape + self.labels_scaling.get_global_info(tmp_data["rawLabels"]) + np.set_printoptions(suppress = True) + # print(f"[预处理]Label全局信息:_num",self.labels_scaling._num,"_mean:",self.labels_scaling._mean,"_var:",self.labels_scaling._var,"_min:",self.labels_scaling._min,"_max:",self.labels_scaling._max) + + if savaFlag: + + tmp_data=np.load(pre_dataset_save_dir/dataset_type/pre_dataset_files[0],allow_pickle=True) + feature_dim=tmp_data["rawSpectralData"].shape + label_dim=tmp_data["rawLabels"].shape - #划分训练集_验证集_测试集, - np.random.shuffle(pre_dataset_files) - [train_dateset_files,validate_dateset_files]=np.split(pre_dataset_files,[int(len(pre_dataset_files)*self.train_ratio)]) - [validate_dateset_files,test_dateset_files]=np.split(validate_dateset_files,[int(len(pre_dataset_files)*self.validate_ratio)]) - - - #写入HDF5文件 - for dataset_name in ["train","validate","test"]: - if dataset_name=="train": - file_names=train_dateset_files - elif dataset_name=="validate": - file_names=validate_dateset_files - elif dataset_name=="test": - file_names=test_dateset_files - - logging.info(f"Generating {dataset_name} dataset with {len(file_names)} samples") - with h5py.File(os.path.join( self.dataset_save_dir,f"{dataset_name}.h5"), 'w') as f: + with h5py.File(os.path.join( dataset_save_dir,f"{dataset_type}.h5"), 'w') as f: h5_features = f.create_dataset( 'features', - tuple([len(file_names)]+list(feature_dim)), + tuple([len(pre_dataset_files)]+list(feature_dim)), maxshape=(tuple([None]+list(feature_dim))), chunks=tuple([1]+list(feature_dim)), # 手动设置 chunk 大小为一次数据的大小tuple([1]+list(feature_dim)) # compression='gzip', @@ -299,52 +112,104 @@ class DataPreProcess: ) h5_labels = f.create_dataset( 'labels', - tuple([len(file_names)]+list(label_dim)), + tuple([len(pre_dataset_files)]+list(label_dim)), chunks=tuple([1]+list(label_dim)), # 手动设置 chunk 大小为一次数据的大小 dtype=np.float32, ) - for i,name in enumerate(file_names): - tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True) + for i,name in enumerate(pre_dataset_files): + tmp_data=np.load(os.path.join(pre_dataset_save_dir,dataset_type,name),allow_pickle=True) - feature=self.features_scaling.run(tmp_data["raw_spectral_data"]) - label=self.labels_scaling.run(tmp_data["raw_labels"]) + feature=self.features_scaling.run(tmp_data["rawSpectralData"]) + label=self.labels_scaling.run(tmp_data["rawLabels"]) h5_features[i]=feature.astype(np.float32) h5_labels[i]=label.astype(np.float32) - else: - pre_dataset_files=os.listdir(pre_dataset_save_dir) + - if self.labels_scaling.flag_get_global_info: - for name in pre_dataset_files: - tmp_data=np.load(os.path.join(pre_dataset_save_dir,name),allow_pickle=True) - self.labels_scaling.get_global_info(tmp_data["raw_labels"]) + def run(self): + + save_dir=self.dataset_dir/self.choose_frame_spatial.description + + #第一步,进行特征时间与空间点选取,并保存 + + pre_dataset_save_dir=save_dir/"pre_dataset" + + if not os.path.exists(pre_dataset_save_dir): + os.makedirs(pre_dataset_save_dir) + + #数据路径不存在就重新生成,如果存在,就跳过这部分。 + # !!!!!!!!!!因此需要额外注意,如果有新数据,需要把dataset下的文件夹都删除,相当于删除缓存,重新生成 + self._raw_data_2_pre_dataset(pre_dataset_save_dir,dataset_type="training") + self._raw_data_2_pre_dataset(pre_dataset_save_dir,dataset_type="validation") + self._raw_data_2_pre_dataset(pre_dataset_save_dir,dataset_type="test") + + else: + logging.info(f"Pre Dataset is existed in {pre_dataset_save_dir}") + + #第二步,进行标准化,并形成数据集 + + self.dataset_save_dir=save_dir / f"{self.features_scaling.description}_{self.labels_scaling.description}" + + if not os.path.exists(self.dataset_save_dir): + os.makedirs(self.dataset_save_dir) + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="training",savaFlag=True) + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="validation",savaFlag=True) + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="test",savaFlag=True) + + + else: + + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="training",savaFlag=False) + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="validation",savaFlag=False) + self._pre_dataset_2_dataset( pre_dataset_save_dir,self.dataset_save_dir,dataset_type="test",savaFlag=False) logging.info(f"Standardized Dataset is existed in {self.dataset_save_dir}") def get_metric(self,outputs,labels): + # print("outputs Before",outputs) outputs=self.labels_scaling.reverse(outputs) + # print("outputs After",outputs) + # print("labels Before",labels) labels=self.labels_scaling.reverse(labels) + # print("labels After",labels) error=outputs-labels - # print("outputs",outputs) - # print("labels",labels) - # print("errors",error) - hit_rate=np.zeros(error.shape[1]) - for i,name in enumerate(self.labels_name): - # error[:,i]=outputs[:,i]-labels[:,i] + if len(error.shape)==1: + hit_rate=np.zeros((1)) + else: + hit_rate=np.zeros(error.shape[1]) - #["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"] + bounds=np.zeros(error.shape) - if name =="TSC_T": - bound=10 - elif name=="TSC_C": - bound=0.05 - elif name=="TSC_C_lab": - bound=0.05 - elif name=="TSC_P_lab": - bound=0.0005 + # ["Temperature","C","P","S","Mn","Ni","Mo","Cr"] + + + for i,name in enumerate(self.labelNames): + if name =="Temperature": + bounds[:,i]=10.0+np.zeros(labels[:,i].shape) + elif name=="C": + bounds[:,i]=0.05+np.zeros(labels[:,i].shape) + elif name=="P": + bounds[:,i]=0.005+np.zeros(labels[:,i].shape) + elif name=="S": + bounds[:,i]=0.01+np.zeros(labels[:,i].shape) + elif name=="Mn": + bounds[:,i]=0.05+np.zeros(labels[:,i].shape) + elif name=="Ni": + bounds[:,i]=0.25*labels[:,i] + elif name=="Mo": + bounds[:,i]=0.25*labels[:,i] + elif name=="Cr": + bounds[:,i]=0.25*labels[:,i] + + # if isinstance(bound, float): + + # print(f"{name}outputs:{outputs[0,i]}labels:{labels[0,i]}error:{error[0,i]}bound:{bound}") + # else: + # print(f"{name}outputs:{outputs[0,i]}labels:{labels[0,i]}error:{error[0,i]}bound:{bound[0]}") - hit_rate[i]=((error[:,i]>=-bound) &(error[:,i]<=bound)).sum() /error.shape[0] - return error,hit_rate + hit_rate[i]=((error[:,i]>=-bounds[:,i]) &(error[:,i]<=bounds[:,i])).sum() /error.shape[0] + + return outputs, labels,error,hit_rate,bounds @@ -355,19 +220,13 @@ if __name__=="__main__": logging.basicConfig(level = logging.DEBUG) - raw_data_dir="/code/admin/20240806-NanEr-5-8-data/rawdata" - labels_path="/code/admin/20240806-NanEr-5-8-data/labels/NanEr" - dataset_dir="/code/admin/20240806-NanEr-5-8-data/dataset" - labels_name=["TSC_T","TSC_C","TSC_C_lab","TSC_P_lab"] + datasetDir="/data/SEMS-model-training/dataset" - from choose_frame_spatial.mean import ChooseFrameSpatial - from features_scaling.max_min import FeatureScaling - from labels_scaling.max_min import LabelScaling - choose_frame_spatial=ChooseFrameSpatial(interval=[-30,30]) + from choose_frame_spatial.mean_CARS100_3 import ChooseFrameSpatial + from features_scaling.standardization import FeatureScaling + from labels_scaling.standardization import LabelScaling + choose_frame_spatial=ChooseFrameSpatial() features_scaling=FeatureScaling() labels_scaling=LabelScaling() - - - - data_pre_process=DataPreProcess(raw_data_dir,labels_path,labels_name,dataset_dir,choose_frame_spatial,features_scaling,labels_scaling) \ No newline at end of file + data_pre_process=DataPreProcess(datasetDir,choose_frame_spatial,features_scaling,labels_scaling) \ No newline at end of file diff --git a/src/data/spectral_dataset.py b/src/data/spectral_dataset.py index 24f26ab..5834748 100644 --- a/src/data/spectral_dataset.py +++ b/src/data/spectral_dataset.py @@ -4,10 +4,10 @@ import os import time -def load_dataset(dataset_dir): - train_dataset=SpectralDataset(os.path.join(dataset_dir,"train.h5")) - val_dataset=SpectralDataset(os.path.join(dataset_dir,"validate.h5")) - return train_dataset, val_dataset +def load_dataset(dataset_dir,dataset_type="training"): + dataset=SpectralDataset(os.path.join(dataset_dir,f"{dataset_type}.h5")) + # val_dataset=SpectralDataset(os.path.join(dataset_dir,"validation.h5")) + return dataset class SpectralDataset(torch.utils.data.Dataset): def __init__(self, hdf5_path): @@ -17,8 +17,8 @@ class SpectralDataset(torch.utils.data.Dataset): # rdcc_w0=0, # 缓存淘汰策略。值越接近 0,优先淘汰最近最少使用的 chunk。值越接近 1,优先淘汰已完全读取或写入的块 # rdcc_nslots=1e8, # 定位缓存 chunk 位置的坐标长度。推荐为缓存 chunk 数的 10 倍,为达最佳性能需要 100 倍。默认为 521 ) - self.features=self.hdf5_f["features"] - self.labels=self.hdf5_f["labels"] + self.features=self.hdf5_f["features"][...] + self.labels=self.hdf5_f["labels"][...] def __len__(self): return self.features.shape[0] @@ -26,9 +26,14 @@ class SpectralDataset(torch.utils.data.Dataset): #构造Input + feature=self.features[idx] label=self.labels[idx] + + + + return feature, label @@ -37,7 +42,7 @@ class SpectralDataset(torch.utils.data.Dataset): if __name__ =="__main__": - training_data=SpectralDataset("/home/admin/20240806-NanEr-5-8-data/dataset/mean_-30_30/max_min_max_min/train.h5") + training_data=SpectralDataset("/data/SEMS-model-training/dataset/ChooseFrameSpatial'>_-30_30/max_min_max_min/training.h5") print("数据集大小为:",len(training_data)) print("输入Feature维度为:", training_data[0][0].shape, "输出Label维度为:",training_data[0][1].shape) diff --git a/src/data/test.ipynb b/src/data/test.ipynb new file mode 100644 index 0000000..b72a217 --- /dev/null +++ b/src/data/test.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[预处理]: training数据集进行数据预处理\n", + "[预处理]Label全局信息:_num 549 _mean: [1591.5325 0.39140984 0.04271309 0.02187012 0.14065096\n", + " 0.02857196 0.00202732 0.10250819] _var: [786.9018 0.01670736 0.00040787 0.0000213 0.00179902\n", + " 0.00326453 0.00004455 0.00067495] _min: [1480. 0.079 0.0005 0.0003 0.0005 0.001 0.001\n", + " 0.001 ] _max: [1666. 0.829 0.0957 0.044 0.7264 0.823 0.152\n", + " 0.522 ]\n", + "[预处理]: validation数据集进行数据预处理\n", + "[预处理]Label全局信息:_num 549 _mean: [1591.5325 0.39140984 0.04271309 0.02187012 0.14065096\n", + " 0.02857196 0.00202732 0.10250819] _var: [786.9018 0.01670736 0.00040787 0.0000213 0.00179902\n", + " 0.00326453 0.00004455 0.00067495] _min: [1480. 0.079 0.0005 0.0003 0.0005 0.001 0.001\n", + " 0.001 ] _max: [1666. 0.829 0.0957 0.044 0.7264 0.823 0.152\n", + " 0.522 ]\n", + "[预处理]: test数据集进行数据预处理\n", + "[预处理]Label全局信息:_num 549 _mean: [1591.5325 0.39140984 0.04271309 0.02187012 0.14065096\n", + " 0.02857196 0.00202732 0.10250819] _var: [786.9018 0.01670736 0.00040787 0.0000213 0.00179902\n", + " 0.00326453 0.00004455 0.00067495] _min: [1480. 0.079 0.0005 0.0003 0.0005 0.001 0.001\n", + " 0.001 ] _max: [1666. 0.829 0.0957 0.044 0.7264 0.823 0.152\n", + " 0.522 ]\n" + ] + } + ], + "source": [ + "from pre_process import DataPreProcess\n", + "\n", + "\n", + "datasetDir=\"/data/SEMS-model-training/dataset\"\n", + "\n", + "\n", + "from choose_frame_spatial.DBSCAN import ChooseFrameSpatial\n", + "from features_scaling.standardization import FeatureScaling\n", + "from labels_scaling.standardization import LabelScaling\n", + "choose_frame_spatial=ChooseFrameSpatial()\n", + "features_scaling=FeatureScaling()\n", + "labels_scaling=LabelScaling()\n", + "data_pre_process=DataPreProcess(datasetDir,choose_frame_spatial,features_scaling,labels_scaling)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1591.5325 , 0.39140984, 0.04271309, 0.02187012,\n", + " 0.14065096, 0.02857196, 0.00202732, 0.10250819],\n", + " dtype=float32)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels_scaling._mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Before: [1612.5325, 0.450984, 0.067271309, 0.05687012, 0.2365096, 0.06357196, 0.0034502732, 0.450250819]\n", + "Scaled: [0.7486169269676385, 0.46089706984319634, 1.2160017876067477, 7.584260858761072, 2.26002739629723, 0.612572700572906, 0.21319305953525988, 13.385111606844243]\n", + "Reversed\t: [1612.5325, 0.450984, 0.067271309, 0.05687012, 0.2365096, 0.06357196, 0.0034502732, 0.450250819]\n", + "Before \t: [1612.5325, 0.450984, 0.067271309, 0.05687012, 0.2365096, 0.06357196, 0.0034502732, 0.450250819]\n" + ] + } + ], + "source": [ + "labels= [1612.5325 , 0.450984, 0.067271309 , 0.05687012 , 0.2365096, 0.06357196 , 0.0034502732 , 0.450250819]\n", + "print(f\"Before: {labels}\")\n", + "tmp=labels_scaling.run(labels)\n", + "print(f\"Scaled: {list(tmp)}\")\n", + "labels_r=labels_scaling.reverse(tmp)\n", + "print(f\"Reversed\\t: {list(labels_r)}\")\n", + "print(f\"Before \\t: {labels}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/main_train.py b/src/main_train.py index 886ca7a..85a6ccc 100644 --- a/src/main_train.py +++ b/src/main_train.py @@ -5,6 +5,10 @@ import ray,os import ray.tune from functools import partial +import shutil + +import subprocess + @@ -24,6 +28,16 @@ def main(hydra_cfg : omegaconf.DictConfig) -> None: hydra_output_dir=hydra_output_dir['runtime']['output_dir'] + command = f""" + sudo kill -9 $(sudo lsof -t -i :6006); + tensorboard --logdir {os.path.join(hydra_output_dir,"ray-tune")} --host 0.0.0.0 --port 6006 + """ + + + # 创建子进程 + tensorboard_subprocess = subprocess.Popen(command, shell=True, executable='/bin/bash',start_new_session=True) + + #获取当前超参数配置 tune_cfg={} for key in hydra_cfg.ray_tune.config.keys(): @@ -44,10 +58,24 @@ def main(hydra_cfg : omegaconf.DictConfig) -> None: ) #保存最好的模型 + best_trial = result.get_best_trial("val_loss", "min", "last") + + + + print(f"Best trial config: {best_trial.config}") print(f"Best trial final validation loss: {best_trial.last_result['val_loss']}") + print(f"Best trial checkpoint path: {best_trial.checkpoint.path}") + + shutil.copytree(best_trial.checkpoint.path, os.path.join(hydra_output_dir,"best-model")) + + # tensorboard_subprocess.terminate() + # tensorboard_subprocess.wait() + # print("TensorBoard terminated") + + diff --git a/src/model/DRSN-CW.py b/src/model/DRSN-CW.py new file mode 100644 index 0000000..936c24a --- /dev/null +++ b/src/model/DRSN-CW.py @@ -0,0 +1,160 @@ +import torch +import torch.nn as nn + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_channels, out_channels, stride=1): + super().__init__() + self.shrinkage = Shrinkage(out_channels, gap_size=(1)) + # residual function + self.residual_function = nn.Sequential( + nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False), + nn.BatchNorm1d(out_channels), + nn.ReLU(inplace=True), + nn.Conv1d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False), + nn.BatchNorm1d(out_channels * BasicBlock.expansion), + self.shrinkage + ) + # shortcut + self.shortcut = nn.Sequential() + + # the shortcut output dimension is not the same with residual function + # use 1*1 convolution to match the dimension + if stride != 1 or in_channels != BasicBlock.expansion * out_channels: + self.shortcut = nn.Sequential( + nn.Conv1d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm1d(out_channels * BasicBlock.expansion) + ) + + def forward(self, x): + + return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x)) + # a = self.residual_function(x), + # b = self.shortcut(x), + # c = a+b + # return c + + +class Shrinkage(nn.Module): + def __init__(self, channel, gap_size): + super(Shrinkage, self).__init__() + self.gap = nn.AdaptiveAvgPool1d(gap_size) + self.fc = nn.Sequential( + nn.Linear(channel, channel), + nn.ReLU(inplace=True), + nn.Linear(channel, channel), + nn.Sigmoid(), + ) + + def forward(self, x): + x_raw = x + x = torch.abs(x) + x_abs = x + x = self.gap(x) + x = torch.flatten(x, 1) + # average = torch.mean(x, dim=1, keepdim=True) #CS + average = x #CW + x = self.fc(x) + x = torch.mul(average, x) + x = x.unsqueeze(2) + # soft thresholding + sub = x_abs - x + zeros = sub - sub + n_sub = torch.max(sub, zeros) + x = torch.mul(torch.sign(x_raw), n_sub) + return x + + +class SpectralModel(nn.Module): + + def __init__(self, block=BasicBlock, num_block=[3, 4, 6, 3], num_classes=8): + super().__init__() + + self.in_channels = 64 + + self.conv1 = nn.Sequential( + nn.Conv1d(1, 64, kernel_size=3, padding=1, bias=False), + nn.BatchNorm1d(64), + nn.ReLU(inplace=True)) + # we use a different inputsize than the original paper + # so conv2_x's stride is 1 + self.conv2_x = self._make_layer(block, 64, num_block[0], 1) + self.conv3_x = self._make_layer(block, 128, num_block[1], 2) + self.conv4_x = self._make_layer(block, 256, num_block[2], 2) + self.conv5_x = self._make_layer(block, 512, num_block[3], 2) + self.avg_pool = nn.AdaptiveAvgPool1d((1)) + # self.fc = nn.Linear(512 * block.expansion, 1024) + # self.fc = nn.Linear(1024 , 512) + # self.fc = nn.Linear(512, 512) + self.fc = nn.Sequential( + nn.BatchNorm1d(512), + nn.Linear(512 * block.expansion, 1024), + nn.ReLU(inplace=True),nn.BatchNorm1d(1024), + nn.Linear(1024 , 512), + nn.ReLU(inplace=True),nn.BatchNorm1d(512), + nn.Linear(512, 128), + nn.ReLU(inplace=True),nn.BatchNorm1d(128), + nn.Linear(128, 8),) + + def _make_layer(self, block, out_channels, num_blocks, stride): + """make rsnet layers(by layer i didnt mean this 'layer' was the + same as a neuron netowork layer, ex. conv layer), one layer may + contain more than one residual shrinkage block + + Args: + block: block type, basic block or bottle neck block + out_channels: output depth channel number of this layer + num_blocks: how many blocks per layer + stride: the stride of the first block of this layer + + Return: + return a rsnet layer + """ + + # we have num_block blocks per layer, the first block + # could be 1 or 2, other blocks would always be 1 + strides = [stride] + [1] * (num_blocks - 1) + layers = [] + for stride in strides: + layers.append(block(self.in_channels, out_channels, stride)) + self.in_channels = out_channels * block.expansion + + return nn.Sequential(*layers) + + def forward(self, x): + x=torch.unsqueeze(x,1) + output = self.conv1(x) + output = self.conv2_x(output) + output = self.conv3_x(output) + output = self.conv4_x(output) + output = self.conv5_x(output) + output = self.avg_pool(output) + output = output.view(output.size(0), -1) + output = self.fc(output) + + return output + + +# def rsnet18(): +# """ return a RSNet 18 object +# """ +# return RSNet(BasicBlock, [2, 2, 2, 2]) + + +# def rsnet34(): +# """ return a RSNet 34 object +# """ +# return RSNet(BasicBlock, [3, 4, 6, 3]) + +if __name__=='__main__': + import torchinfo + model = SpectralModel() + model.eval() + # print(model) + input = torch.randn(64,224) + y = model(input) + print(y.size()) + + torchinfo.summary(model,input_size=(64,224)) diff --git a/src/model/__pycache__/CNN_FCNN_big.cpython-312.pyc b/src/model/__pycache__/CNN_FCNN_big.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29f911262178e8d7e5224f0da7ba7811f1ae53c6 GIT binary patch literal 3338 zcmb_f&2JM&6rWkIzt(ohmmShVAwYqpBo+kJFG_(xDkO-ALiyMpR*Pp7*4XPXyKV_e zrKm!xBcLQgm2;|ks;H7fkNy*odNGaFk0n)+%Eh-xNh`#u^VZ(A4WUkxu+2Vw(&GViLF9p;f;jipQh;HJL8knz|NCK0*$ym+9;Z%Wm7ku+v zMaF0XPhins25~6i7{Oe%Kr1_#@FhEeiX=>s#O2T^Ciq4`^1u!yK8LPh$vc8Xj};ji zR}&^xQl~RSO(k$$JRjp>(<|Sxz+dA$GTlYfNrfiS?GQrFAuuxij!}}8_t`-(;fROe z1%95dtW6@>DSI2x)2XYjgZ2kho!}3@xm|w@`-W6?rS9>2l(-qU&nPu)>+FS(8FnRLaz{p>2Ew7OsS1E<=w*s=P>6$6a7N_f$Zy=+8x}R8q$}I15x&1EJ?{WuR?x5u# zOevXv`B%J@!cx~fo%A5E}0K=<3U-=+s_dXR0JH`LUq6_n*fN-+#sHsG*Zr=Ir5 ziDD}8krSs!deZ<5y$mAQUiGoKK@)LX6(1NHk_UlY(#HD6Cn!MI^;iR*^f@PlZ_W6N zktb7VDG<4po;&*>@K!0%`#82Sugz-rjur;)4;Rm!FU2n8_*;>ss(PS6if4w47o}3{ z6VR{vP*=C@aJ*nNtT=%N3YYFH#q$?Rv5WPAjunRQA1!`-t`s|OtD{|cX;!+|^TTi{ z`Zm*qR|BXcniprqdwgL>sr^Hy3qEe|d`7e6YPOf!4}*=P@f12r?H@F1+Dh&3=lI*f z6?Slxa50k!fO$5y?q-xBtBbqIdMj{aW0HY4`^G%Z74kVYUCd0n2w1VT?NQRjm^v+< zuV!)Pb;NNo=N03)81#y9Tx@y8I4)-Vdz?v>?5J5d+@xE{byP6wc-7{b|9|);x@!Io z_fm1X9q-!wQgL3d`8!;bIGY^1+RW;^r6xJtT5%>Zd#><96x#hPLoSaE#8*^xoH1gn zAf>oZ^r8rb?3;x4L#ZRxt0|$TQnD;!x(kX6ICjMTE$oZbCeRoLz|4~yXqD^q9{T&e z?cO#!t9#p~*&W8B@HEWq%JAkYKLP@O2}EoOnm*262k;mW!&boVs6Jh%@ab6{j@UPv z)f1+c(Up`X)OZ>PtDtSA>8urcLH4m{H9KKtn+HJSwJ%TBXW4XGp%dSu$`+xqo(2w8 z3UZj9U?QB-IK#-Y2V?v@ivEGN6w#K4sP!QVukuGRZkssLYz8p?b&s};E2rO&3kX( zd-LYa%x`9YjYcIz+rM8lj(jg7^bZKE2^D7j2g=ML6{*-jS)9bU%yU_s;xNhvl6*xY zBn8(hm<)30W2ADoks8o&rjdp8`-MQ6Pl{St<*&bs5ZTEgO+ z2b}28n3l4MVqDItnvud)@q(0xq}P9tV)XwRen+St&X3hMmG6*o4Af&M2&9p+p)#* zqH5zNzosqvHPYiyJ-=q=H;&>v21~GoOR&32uzBOW)1aGjYE&^z&9IzEE^qr%!*z{N zwQT9PG$&};L|3&EtZfOBDZws}b57W@6=KV(o^?W+sRE;aNrUTf;^=o6Agw8Oo@kW; zm{dYEo6;o2It#H{9Kp(uX@o$2jws8zsoS#baHi=5`*Y@3M^#5G_i~@d9rd_l9(Ua1 zPB@{9hGN^A>4ef1CU{9VHHA2QIz{6EJ?he9E{}kbT5P{o;O|CPYpXYcsH3Z5 zfBxaQ<*V0OdF*<%%lQfo{O!YQA&~3!`)1^6}Zth zLBU()76a@G`3Bn_W@bDDEZe&FDC1#Fl@>2lMq&LH;&_;|%{U$gZ8MICE!&LaVaDs@ zOrT7wZ^5_;uadNaF<)Ej@@BQBH)Fju>td1o-XDp@*}|H=<@+=4jpFQL-hTf^arXHB zjH?r8FSGM|x)SgB660#cnZWd6!w(WT7{e6f0xoV}*R(Op00P@Nk{WgRtfrU_zUc6_ zVvZ6h8rca1rcHJOfy7BSC5|vgbknBbJ*rUjhC~RwvB-WP2f!j^=_A>ku9AaPigSd% z?~sFUmH~Ay>2Yn+Ir6na)LXI55Lr<}M5~jY(Md#6uAynVXR2rB%6#9C!$qm5fI%0T zJ}`A)rfqK5?5_D6E6v?2(%S{RDmBznKdr5Ph4m}cPo&1_!&8T6Zrn-Gfy8J*nI4%M zx!dTQcg=Rq_ub#QBJHluWINjURD9;6xq;b%dFB446{)*`*Y=^N=7LBLL2ZL@Z<uj$;ysIs+7*b(h7xi(LXc#sZ{z zb{lPQZNmHio!&1ryQ6v+c9vd)Gzw4Ppc9_^XIxN7BtP`oMtqL_Fe%?pEtH zO#)}zX=P$>I&Y?IJ!dM0Bl$e(1FNuWrJHnDBtQ<*mpPqbIn7g`5#0NiZn=CmtB~=Z zQ8`7}W6#Ai<$`=j2B-+Lv_7R|Er2op3&s9MdzR6j$EfKsifr)x7&p(Hhf;4K;MYFk s5FaTde+zW{DRtaQOkQ}x$0i5<;A4;Y*o?M(@YL`8dut)Yce*3~58`%*xc~qF literal 0 HcmV?d00001 diff --git a/src/model/__pycache__/CNN_LSTM_FCNN.cpython-312.pyc b/src/model/__pycache__/CNN_LSTM_FCNN.cpython-312.pyc index bc5094bb6bcbdcf97e672dcdc87e20631854e162..a72681485500d9fdfabde228801ce9f683babd25 100644 GIT binary patch delta 164 zcmbO%)hWeunwOW00SI;l^QCX(VPoVCE=Wx-DN4-o%}+_q*(}c}$jrzw*^p(Gl!<;y zVo9QYu&ZydZZ1et7pN>VFEcNF@_!a3G<|xk0W3@}Std7dZbg%E;;La}w3)o0JCRXj kvLa8Yurbi~TWmnb0nwOW00SFwk_oi*+VPoWV_VbJP2@dg%cLTCE%QFfxGqOxJWLc$Rr=O9Z zo2s9flAD>QZ)9L(Vqjrlrt6oO=USv|s%xQ}l30>B`74VOnh7eb0W3^KqLa%wx1z~d waMds}T2J1}oyce~S)3Hq)$ diff --git a/src/model/__pycache__/DRSN-CW.cpython-312.pyc b/src/model/__pycache__/DRSN-CW.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b0bb35d6cee3af0fad333704974a8ffb3f369e1 GIT binary patch literal 7881 zcmb_hZ)_XKm7gVd`A-Y>_~EKxoxT-$F8_5i4?h1 zW|y)gPzN1gpwu}zOK1QaEh@tX>K^(bAAR@b3iOLus+F@*0Rzp43b+qOr3OV? z;O@Oy?s6s5b`I@Gn4LdwX5P%a_j~URe;E#k2&BLMzIFJ&8VUIwR_r8{3mbzRAqzwy z3YQ`S)|X3r20R?`kPAfd+#rfq^(1}vz{>Zo*D4S2jFJaR;W?B7qx1o#|2dRC)vp9b z4-!IU##>+@;32B-Zh85Epc+iVp0+2uAGudavT1uiAxcmQ-St@Q10qlbRq-yI0j7a4 z)N9pna<5f)=l0AtEn6Xwl-x8{Sq&;R_e6UQj!bfsTr|9noqcgPdHMXv#!JxpUAP66 z3~&k=@F?7XSK;$yh=cPF4)BWi1{o0YWP%&;^^>SzhEB_RB7QoRiI1)$?{VHWuLxzF zdgFg!oBzfw5X0%5<#XgixO#gt95HM`AURm}p2uc8M`AX6#0c6GB$+_jHS6o@#g(_E z90&7yb6$m0_)NUI9)ttvZWEWlClXQyjp1%rZH7-0kk5O;WklbmPD&HwM z2gTwtaJPIWuGOn@`TQAe?sBB60+mk#&br3t#Ip(Tu1%nbIA(1bQB536QScZwcOGeiCA`&m^p5^~D) zoyln9M-;Q>v~0vj`Z6>P75=K)^9_o6YzEbdF4-Q?jndVmcm=1Zn2aUEPXo$ld+t zdixKjGm4rzY)~1tp$&KIG~Ugcy3bzi?>l_vTU}$5Gy)^18Y;NcWp6qF9eNX>(4S_= zS|B{@F9upx0BV}x|`m8+1xp2ns?%Ccy1r0WBKe^-WyGFy%6y+&G zB5lE}6|**M_0Zk$F}VpH*^(qGVbwpBq1R=qOx3%Dq1=XcQ$GsX40%*{;QO8h|Ga;Z zU#UB=sH}FJD0aZDmpZ=o;Klzw{GW%H-@Z~heYMzewN%%?9PFq2U|5v16xfeCrO9a( zbSe~!fiPxMSQcZk_dyw~9=Z!gFu9}$pk#*2VjfphsaWg|M;SMGa$jehGv#6#o}u0V z*`IEZhjo#ei+|)Bh1xH=Ji;}O@xiF#K;R_%Vm4lcCbz9-R+_sJ0E-R2(g0X(u+jop zX)i^C6L+jAR8Upe&A$pYxMw2*xTdLSfunCTAxW+gLSsCc+ zt!CFakf$;?F3M{57yX5@WV$H7{G5(OuJC5>`)=8zfn7OvaFWm$fHGP`kr{Tp*7c+C znvqkAJZ2=u)l=icZ)Y+oz*H|-0g7It>FY^os!W-LoBsa9a5|GvOwUjpM4t({zYL%h zPur3lUS<2oRfbi@;quxz}~g=-jPM2i^r=kw2UeDXGY)|)!*_()=`t9 zK~pd?G(KW_8;8s_5jL0GS@k=fZ73fWTK0cZX1 zs|(5bWQ%S!lY2QW-KF*}GEDkn1n6xETw1exvve#99D7f7t5 zJOS`;dICb|i=A~s#DaW=*3ji7)GJIIuS4dBeAPP)R{4hzoDF#y{CioD1_AJZpa_ur zm=W-U6!^?=|CkyF*DBSEP8Pro-po2;%Q~u)RmHX~F&xSo(`ykZw_1uPV>pEalyvNe z3yE>sZOPSJMcNgJ0U-+Vuvo2ZOnGFJ%GTv6}sG09-p90&1W^-j)zuAbt#> zr3anIQ-kET2~QmtgmE4=F60`A?mSCR9ajV6LQjlycK+0HBF4E-jB~Kdb&6y}@EP@N$nC6CjzltRSx|x6o12WJZdLG)M4VL%BAhR7Qnd^^U62<2-+Kd@S z0;UkhIMu9)rR7mImXaqS*7alC(b$AlIA#}KwF|GsCg@=t9FWI{V`Jcl(Qcd&HG`SD zs9-rya&ARDZAXwtOX424Gq&OPHoX9DTj;S!T6V8CcNUvF zmxLvH|0|zd`PqpFky7&+=oKF}w>f?K{fnQ-KkIq0uhe|@Z|PfWX~a;NhSL7mY@)ccfE|jLXWB}1q$IX)L;?*`#XBuq4Y&?hyz>Am;GOcrfak;bF2p=G z#2Ac7r-flM?aK*g$$MXc8lUqe%Qgi%o#erHr>YTh&XeP9n_}x32d{{LY-Nk#OacX5 z6?Dp3;e)U8ZY-^Wv^n3u60qeS`7baCbwQ%KrWz8{2>syTq*am>2yG>3lO;{f(u@WlBXd1NN3EF-N$NzGgqmj6qlcu7rj}Pp z%ahJLKw`k7jg-z%6&8SHk&KlMa>WRaTOY}o98(WTgZRc= zZplz-Fk=`g6*!HLmZ!6KgJ1Wn({ZegWerJD$BYrF99c=A-h(QYGDFtcu%p?+g98Qn zacLxTT}sQ^q_vbT!8@C^|1Be|XT0Nhwvtu0rdOq*1l0|j`7J9t6I@k|EY-Fi17&S+ zFRmMQ23T(sg6WgT##BwA3_TgJIS?B4Ti*Ir5Hz|6GBZ@!mGv6uw;0t;pCvy!Mx5oN zv^@15l+s<>aIS*EDs<~9$Y#jn{bXnB+`)oe=wG~aKk{*3vHe)F{up@Vk%qkQVN?4b zh$nb5fBvCZk6!!j1B?8s^h!~B<^G`uzEbOj74gl7dtY8sf10_IDeiqEfA-hn?#BWQ z0lcZJzcoEOT^L&4abP)c;2EK(^$U_KP%0a4;Lu6i!o=r1V@-QN(Vb@xXA;*B1B{|t zm{|!7aSq4bd+HpHgZ5Mk#~phLg(r!obX&@Ks=2cjH&5}}W}fY&$}d@!&=M#-06)AH zP+BJbe<8s;$ZN1H1%;6aW^ZG51u~N#PpH>1o`e|j2sFUs6nddnQ0cR@ zRmXS(V#gK&Rh=4BmS~J8pH)c0)9HsubD-F@8saO!*QAbwEU zJ-@p!REiw<|JK^)+Y95RNC%E=N4L$l&1=%k(=~=FAZMqyTU7c=*u>yD~GtUIV%`jX7VY z4`M@j;ItknS+L4@!64TRGSY-LlraVNC~xxWIQ)npP+3znQ(%*u+@vXt!A}nceGdou z^=vvV)5$x;;#7jhKL<`&@2Dqf8megj^nZu!v6thx&xrV2(zxl{&0XW>>KAs+@7g3- z-aOB>agjL>hUl9Fikm0Sa9q>e>4l5)7dHu(9}jv6FXjh+*nQ|a}W98 zcQ5_(r5inu#OC}t7XN#gOEGiq(?B?{-V|p}eaZ*(`)+m2cHBI8mEXpH1% literal 0 HcmV?d00001 diff --git a/src/model/__pycache__/FCNN_DBSCAN_big.cpython-312.pyc b/src/model/__pycache__/FCNN_DBSCAN_big.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e9865be4e71ebf247918a8a72e462f2ca0c738e GIT binary patch literal 2739 zcmd5;&2Jl35TCbRI~#xG%XL#ABB`2|u1ajuhLQq8O-h3zwZ6EA%0gN#yX$m~y$)~J zqK;C@he$X|t0o}i93yTh9QXtH54aGFXxXNs5?peNln7NhF>k%QcA%20gahv|Z{B()eAj`Wq!EDfx z3S~nad;}`D1ZqGd`5sqO+qM~K9ueGbW4(K&M@rAv}`GN(j{6PS8d+F-45N* z4NazYAg7Eoa*>fsj7%`{HY1lAd54ink+XSIE9C5MNm2^3X-bmGGBZ*=xSUdDOHO63 z-^`2^Sz(W1iKB62E@e_ByU8h}|!)-`Wx`{348P51gnc*OD9Y#=zMcf4_S>o~U10yRx3y7;VJgz#GB4xwSFT zh@by2ZZ5f+|MYF0X~fU9YtC$(Zp24x`~zVReFnwvSUj-Kb2|G=yscvo=iB?9iQ`)I zd-Wc%+|dS*XP@#P3-Nweww_veujgMvkKb{})Z=&4G4=SJa!ftFExgAaDL?%ly0srk zPrQZqxFhL#!B1KTJtsYTt-Xb~M-~bGERnPrr7ZO}nX!4Q70Q$)km85|%2GfF7-2!s z6O1r7DAR0rnfhF@q^lxF2ick2ZO()d`ik?ME78wosxBlBH@SPwRG3>xs_;16^H5wC zSEp9qtdZ?#?BUGv%<4paa&2;b>Z|t}(c%C5?d9ew2Xudd8kdC0d9djqEq7@2~BofO0o|tS{Ty^s440eZv*aQ1y z&zE4Iiv-X9^?D@O?^yMxe-THpBrSxLvMRs(mdNc~MS04QBC*4|F<-VMQ(w@;m@QZ( zs@zE$24$UW$Gu}RS2h$&FBx*d4!3wZ#H6|%Dc#7G?4ZNiA^cZ^5jNdC&15(WAD#J+ zrBW^yWm@?bnr^ZJIS}4&CglTq5sxfV^EQ&*03qZlBz}kCpJ4b2^gV&-KK~l&U!AVc zuFdWPvb!G>;G?zd&w;_;!h7fK6rR~1XT3PiA4vf5V^JX429nZp2v8xyPf$d#RMG=js#aU?Cb6+MVRkK& zG?8*Zsv|%oLap6Pn`;!LUVE+DL#18_p+Fa^A{AB24K)!3PJM57*G_0F4(&*O^XAQ) zzwf>8t$(boRSC5I(bl0qBZT~hgRsegvidtHC1Mhj=188VDD`zAPtyV=c`+pg7C9wz zuShB)kTb*-t`bwUXf_^hS?w1^Ur#CAQvy%<4W3a;H6xeW31J-qv7(E?X4%&2uAyeu zEYe{A82HNBZQ)F{w_>JpJGvXQ=zl{@3O0GPC!OFWR@bX%X zCp(2vi!r>ur|5dd&bYen3AXJ;`ZKm=Fi#vzA4G9j6FkYWa)Vw}*V8$}adh3mrzNIe zb$6MDYjh2qJ~wb6kJub=nE`v*LtPF_cX3k}0;iukF>tacsXK^d=jb@YOr2=m2yAC~ z4}`}#1meXsd92i4x69^|vj3ja`LwCIba?J?d0>9`a?{?TbX^H8=jR)io8Br)HJvvLJi3;te>lNrj7WK4#P(*#zBMmpwWpI{B#XK;jFF_<~g@*0$Zy&jyL z_draOr?J|by)(VDhf3XZ-Q}KJN0(!}iu5s9`)2xP&C>APaQX7BOUtp{Mf$L=zNoN9 zSho4Lp`SJmkUPDpW)$TTkuA;L6W4cTu#>)DC1W^$dB_Y^OWNP zL38k>FRqd`L6-MCZO;56+_f!B3qTHWxTu)|(J!JWXBI*=A=A3pk zF-S+A0sHc-pwR)H&BV-?ZF_#E!1D zkZs$)+VRDX@Av*dziYqWcJ`YOZpVLW@4nl1c6n<*Fg^YfR0p5;t=iDju592unt6b~ z1f_(muCN{qL94QHih{>KR09Zx5Cj8WH6;jSQmC$iK*S_6B~A%$rDkOD#PoNP+GyEK^W1Q=8 z`5b+|uO%cqI$)K_!>2>nCnD*BX&iY+6IPDH1kYcMdTPcVEx5XqnXt6Dr*K?4ZQIY` zrf}FfSg_M>X2dpfo?6w}W;`Owy)rgn(ShTLpXPo=O5j=e&%zVj5l`l64kfsd&l_xf zf%s(!srX8HA0~kw$JkLA;%l8@5YI$P=`W=DSF-C4+4YFjKO(U;DNY+^&zCOFU0fre zuI+pq#wSXBbA4+B)Mpnckz&QvU9t6Nwe?!p19kh7y1jg1S=~2%`iay$-SL7Aw8_`FaixkV}?w{+m`^?c>#>rUe830qa>}J?IQxth*Rmy=$_!q1nQ(tlvtOG zc|9=Vch3S2XHfVEGdDmb6jY%FB|^!jtVVc-s@Q_NM3oAlMBG$i-O#J7Xf0Q$Vd-SR zAC?7p(uZT{_%qUmYWT4lgn|Y<7y$?#L7*B7jMn6U>)(`6A$Ujww4l(#Py&q%prV1l zA+aJ<96*J64MB;=Vuf!Qg2KWM#ESpR3A*d*g4a5o? zbOFY3e*4p!@ez`APTj0XM|Y z2|q3DpUD8-zfbbhAowF9c_rzRC0AsIRb!)wV7x30R|+uxF!l^5JT=Cnu?!M7=9nwx zib>f7N{YaEM3>z4m%%i9*0U*9*at;MH!{bEyr#oK^v8tvUwZ=LFfWoep1*g4cXeBiHnweh1%gd-s-z4+I~ z5oz3u8ZP-V?p^dEbg!WuRXul1;&oV=gIiiJMG+d?g)@QxC&yjrG6Kg{5diCakhEmcQMn< X{U~N0iJ2{9xBi28`5*@3IWN@TzFpLN literal 0 HcmV?d00001 diff --git a/src/model/__pycache__/FNN_CARS_big.cpython-312.pyc b/src/model/__pycache__/FNN_CARS_big.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dc26614fefaa0e746df4fa69f5b71d73fc77ea1 GIT binary patch literal 2790 zcmd5;L2nyX5T3W*wbzc_CUM;qD5Rt*bOo`MCO{%k)fAe_67>Q1-4QH z>L^G)M8Z*9HC6@AF;^50{06Q_h++io5(E-le2bI_RXs6p?Oi)nBu5Pg-eKO%y!mF{ zn|(8HzmG&hfTRAy;KG~;@D~@{A$6F&uaIc~1xoTzAQ?j1yig#TK%n5uh#ifTkp#F7 zR9FM*)5%iUb=94>eQiDy&}Ay#x(L9|3ZMtJJ0_VRO48D)F2d#XAeFxHZwqaiK!sF@ z5@>NTrMX6Y?pVOyI0|3j&UMHT3K^jWIYP;robmA;X2crYAsHzRiqDRumvybgviTcD zs^>M*;}6pUeA9a$q2n(}14`av$qfaoaDM>6Jp_SLz%^QuU9LM*vfYWW3aXptUd$nh zql60X;SuHe?}>C6N}#?Tl(&~YC?6GjP(gR+eNf_oSkA@b$UZ3i7(KBti|x?|B~Ymc z<(<3_N}}Ec!W>497|TxsJ+V$45fz|v59*|gs(=zqwJ3@P;MFn!zli~ZoZxM00bYz& z6<1sQTPcIKWR;h7#`rC?WmPpyqok^~V4C*8)oe*yOchySlG>t)v+e)bF!e06eK~EC zlS`aTaWc)xJDj}B$$Om4a5Ae1wrJ`3oE=nEEuXb4RkirNK39D@{Ji7?*VY%*_)OwX4YHYH#@H*A;_emcel!&}YpSdBbBIarg~ zSv;QN=S|$q7IbXlAyvf|TF!Gmtg5%m*?fD&>7i_l=PlA{%9@_ftExiS%RE!Ov0QNy z>pTn&;qAxbmGyKD_Jy!?;h(VqX~>D{y&eVZ-snYWT|qm>yRky-kYO&DOR8n8=*oaC zmx@eVjGHFokE$JY#dxl4Y9*s+X7hHa%d^+GRnW0A%v{m-;~x|rslBiBd{J`V3A`oc zLLtj4U*jJis-4|GlRs!@;4-^}I({rGiDcJD2>Bgif57OEF!~z|?u)OIp^dAJWIeeL t$nIVSF7fK6rR~1XT3PiA4vf5V^JX429nZJiJuA)C_xcmsiX(6RIRq&O=4qj!t7ck zX(Hu-gd;#CLap6Pn`@Lvz4ltQhf2K=LV+$+MJlS48)_m7ociAEuANXS4(&*O^XAQ) zzxTfHt$(VmRSC4K`PPBIB82>ogRsegvhoKgC1Mhj=188VDD`zAPtyV=c`+pg7C9wz zuShB)kW<7It`JkSXf_^hS?%XVUr#CAQvy%<4W3a;H6s_>31J-qv7+KU5Sw70C!q$$dlcTZo{{gK;r9Hz5ww#EfJTSQMln* zWSrdCKuGvTO5B>zDXWB9wKu6LgfAH4$T}Y_s#BHYT8|tt}CJC+-$>A(_2O9hOz?d zNV*bu7-0A+34-)gl9yfUh0CCglWZm2zJ*(92*9wPplkN2gnG>il8shJp*H@Sea0z3 zj3zbNqdlI)tXzTBg1=YeWCrp>8IvL1G=bHjk&gI*Pp}5=(?7&68_XPQc_oyAy&jyL z_draNC$ZX_JySi?2TBKL4wk!b9a)O)DAGq@?VakKHcNvugXK%NE-uA(7U_e!`l7-b zVclE=v0)o|3$UT;I^?sELp`SJmkLJCpW*3boA5@p0YcJNE~o2rl<_NI$dB&##bGL6&ztYn0>$ekz~;&!nyG8@AGb#@s(SiwNV8@Id*$I2%IaUl<#Mqh4t) z=9D+!djQK1$xW!k0|4PqUK0*=lp5COIG<&PJYj=RdfCyg6Roim-YO-qeP?C5F> z*|PQPZC`HtVfT;p`}TWnr@#GZA^vmw!8>iImo}dPrpI4^>frOfQyaS5m35p)Gf(hW zpp=l+71o0xXjL{&Q1Ez7HGpIYK{DV~6M{g-h3YCuL`)(R;)LKIh>?!LCSNFR4}eRFi~=)L&i^2pRkncn#9cKmQr z1hKl4Y^djsC(7M-)$QPm#o@)DZKUpqgU_C=>sBSU88o&Xw_n5c1R7B^!_UDw#Z?Mrh z;+G|);!EWNm;`zpWk+C$uXP4NJQXRWzmn$P$d21&$3s&8ki=G{IBl3dSGq8BVU>Wo zy8Ue!A20RJ^sW+6pPr{giWO6L#MWQb)@xn&)vb%_*7Er!b1024 + + self.fc_2=nn.Linear(in_features=512,out_features=256) + self.fc_3=nn.Linear(in_features=256,out_features=128) + self.fc_4=nn.Linear(in_features=128,out_features=64) + self.fc_5=nn.Linear(in_features=64,out_features=8) + + + # self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601)) + + + + + + def forward(self, x): + # input_shape=x.shape + + x=torch.mean(x,dim=1,keepdim=True) + + # x=torch.unsqueeze(x, 1) + x=F.tanh(self.Conv2d_1(x)) + x=F.tanh(self.Conv2d_2(x)) + # x=self.MaxPool2d_1(x) + x=F.tanh(self.Conv2d_3(x)) + x=F.tanh(self.Conv2d_4(x)) + x=F.tanh(self.Conv2d_5(x)) + + x=self.flatten(x) + print(x.shape) + x=F.tanh(self.fc_1(x)) + x=F.tanh(self.fc_2(x)) + x=F.tanh(self.fc_3(x)) + x=F.tanh(self.fc_4(x)) + x=F.sigmoid(self.fc_5(x)) + + # + # x=nn.Unflatten(dim=0,unflattened_size=(int(input_shape[0]),int(input_shape[1])))(x) + # x=nn.Flatten(start_dim=2, end_dim=4)(x) + + + # x,_=self.lstm_1(x) + + # x=x[:,-1,:] + + + # x=nn.LeakyReLU()(self.fc_1(x)) + # x=nn.LeakyReLU()(self.fc_2(x)) + # x=nn.Sigmoid()(self.fc_3(x)) + + + + return x + +if __name__=="__main__": + model=SpectralModel() + + torchinfo.summary(model,input_size=(64, 20, 224, 10)) \ No newline at end of file diff --git a/src/model/CNN_LSTM_FCNN.py b/src/model/history/CNN_LSTM_FCNN.py similarity index 87% rename from src/model/CNN_LSTM_FCNN.py rename to src/model/history/CNN_LSTM_FCNN.py index 078d88c..bcfce58 100644 --- a/src/model/CNN_LSTM_FCNN.py +++ b/src/model/history/CNN_LSTM_FCNN.py @@ -3,9 +3,9 @@ import torch.nn.functional as F import torchinfo import torch -class CNN_LSTM_FCNN(nn.Module): +class SpectralModel(nn.Module): def __init__(self,): - super(CNN_LSTM_FCNN, self).__init__() + super(SpectralModel, self).__init__() # self.reshape_1=nn.Flatten(start_dim=0, end_dim=1) self.Conv2d_1=nn.Conv2d(in_channels=1, out_channels=256, kernel_size=3,stride=1) @@ -27,7 +27,7 @@ class CNN_LSTM_FCNN(nn.Module): self.fc_1=nn.Linear(in_features=2048,out_features=1024) #2048->1024 self.fc_2=nn.Linear(in_features=1024,out_features=256) - self.fc_3=nn.Linear(in_features=256,out_features=4) + self.fc_3=nn.Linear(in_features=256,out_features=8) # self.reshape_3=nn.Unflatten(dim=0,unflattened_size=(batch_size,601)) @@ -64,6 +64,6 @@ class CNN_LSTM_FCNN(nn.Module): return x if __name__=="__main__": - model=CNN_LSTM_FCNN() + model=SpectralModel() - torchinfo.summary(model,input_size=(64, 48, 224, 10)) \ No newline at end of file + torchinfo.summary(model,input_size=(64, 20, 224, 10)) \ No newline at end of file diff --git a/src/model/FCNN.py b/src/model/history/FCNN.py similarity index 100% rename from src/model/FCNN.py rename to src/model/history/FCNN.py diff --git a/src/model/history/FCNN_DBSCAN_big.py b/src/model/history/FCNN_DBSCAN_big.py new file mode 100644 index 0000000..0a05df2 --- /dev/null +++ b/src/model/history/FCNN_DBSCAN_big.py @@ -0,0 +1,55 @@ +import torch.nn as nn +import torch.nn.functional as F +import torchinfo +import torch + +class SpectralModel(nn.Module): + def __init__(self,): + super(SpectralModel, self).__init__() + self.norm=nn.BatchNorm1d(224) + self.fc1 = nn.Linear(224, 1024) + self.fc2 = nn.Linear(1024, 2048) + self.fc3 = nn.Linear(2048, 4096) + self.fc4 = nn.Linear(4096, 2048) + self.fc5 = nn.Linear(2048, 1024) + self.fc6 = nn.Linear(1024, 512) + + self.fc7 = nn.Linear(512, 128) + self.fc8 = nn.Linear(128, 8) + + + + def forward(self, x): + x=self.norm(x) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = F.relu(self.fc3(x)) + x = F.relu(self.fc4(x)) + x = F.relu(self.fc5(x)) + x = F.relu(self.fc6(x)) + x = F.relu(self.fc7(x)) + x = F.relu(self.fc8(x)) + x = F.sigmoid(x) + return x +# class SpectralModel(nn.Module): +# def __init__(self,): +# super(SpectralModel, self).__init__() + +# self.norm=nn.BatchNorm1d(224) +# self.submodels = nn.ModuleList([SmallFCNNModel() for _ in range(8)]) + + + + +# def forward(self, x): +# x=self.norm(x) + +# res = [submodel(x) for submodel in self.submodels] +# x=torch.cat(res,dim=1) + +# return x + +if __name__=="__main__": + model=SpectralModel().to("cuda:0") + + torchinfo.summary(model,input_size=(64, 224)) \ No newline at end of file diff --git a/src/model/history/FCNN_DBSCAN_small.py b/src/model/history/FCNN_DBSCAN_small.py new file mode 100644 index 0000000..5b291ae --- /dev/null +++ b/src/model/history/FCNN_DBSCAN_small.py @@ -0,0 +1,68 @@ +import torch.nn as nn +import torch.nn.functional as F +import torchinfo +import torch + +class SmallFCNNModel(nn.Module): + def __init__(self,): + super(SmallFCNNModel, self).__init__() + + self.fc1 = nn.Linear(224, 50) + + self.fc9 = nn.Linear(50, 1) + + + + def forward(self, x): + # x=self.norm(x) + x = F.relu(self.fc1(x)) + x = F.sigmoid(self.fc9(x)) + return x +class SpectralModel(nn.Module): + def __init__(self,): + super(SpectralModel, self).__init__() + + self.norm=nn.BatchNorm1d(224) + # self.fc1 = nn.Linear(100, 50) + # # self.fc2 = nn.Linear(1024, 2048) + # # self.fc3 = nn.Linear(2048, 4096) + # # self.fc4 = nn.Linear(4096, 2048) + # # self.fc5 = nn.Linear(2048, 1024) + # # self.fc6 = nn.Linear(1024, 512) + # # self.fc7 = nn.Linear(512, 256) + # # self.fc8 = nn.Linear(256, 128) + # self.fc9 = nn.Linear(50, 8) + # # self.fc4 = nn.Linear(10240, 4) + self.submodels = nn.ModuleList([SmallFCNNModel() for _ in range(8)]) + + + + + def forward(self, x): + x=self.norm(x) + + res = [submodel(x) for submodel in self.submodels] + # res=[self.submodels[0](x)] + + # for i in range(1,len(self.submodels)): + # res.append(self.submodels[i](0*x)) + + + + + # x = F.relu(self.fc2(x)) + # x = F.relu(self.fc3(x)) + # x = F.relu(self.fc4(x)) + # x = F.relu(self.fc5(x)) + # x = F.relu(self.fc6(x)) + # x = F.relu(self.fc7(x)) + # x = F.relu(self.fc8(x)) + # x = F.sigmoid(self.fc9(x)) + x=torch.cat(res,dim=1) + + return x + +if __name__=="__main__": + model=SpectralModel().to("cuda:0") + + torchinfo.summary(model,input_size=(64, 224)) \ No newline at end of file diff --git a/src/optimizer/__pycache__/trainable.cpython-312.pyc b/src/optimizer/__pycache__/trainable.cpython-312.pyc index d2d8ea4dd18362a8f2ba0bd5f4183f4dfe571add..33eb23d7f61362aaa09305c20e30885bcde261c2 100644 GIT binary patch literal 17716 zcmeHvZB!dqmSE`vp>HH1EIuTV_%h#L#@NO-whWkW+ZbXS3|0gZMnWK568?}&ZF08z z1kZTZqBApwap#!K^qi=Z><+VkmdW<+ID5|S(mTE9R7IMWQen<|PrG|I|47F>-p$!_ zcJC|cQ8LDcWYRP9V@Oo@zIylGci+AD-S_HNC4Z$-$qD$oLYc3&9wmrhq6YpGC6TXK zKpqed!XfYw!vYQplTgG#p+`KOHk>9vG$M~=I(;~u!v!8el1@4-<@jX7G7idz$eeR&r1^tm2@0Sk1wV;S3IHhBW}i9__SlSeKNYIjraUvZnV98@PLEp6uzI;T!=W zB%E@0Bc2Ek9g+_PiPmtQBmMitu<@3lT>1sNb*5avs@v>-`#I8ij`WT@eLlFO^my$K zn*(n9oPKDl2Ii)x?c}YCq|5Il;fCI4zwWfTW+t4Zb7tIWn{auYK2}89Z;cE2aS0XR z$ev&Mi;*q;Ni7J%kF89&#VPbj^ur`{r@!3i5IBS%Cdc?lgy$6$`em5bEyWlQvBaRr zufTUH!mY$O5B+M4yEPc&p*xeuF-8d-;v8sTUA~osw7<6mNvebolU5Ci_h^?izaG!W zZNL~09qCjWB?`c*c}O^8|Cqz!;^#qd=s8n^E*HjH3EhGO}Kfp%xO98nms)I`Ra?n4RS3um+J93c+f>~hocAf`gl-`k- z>LCI6=M{9u7wNW-G$VAFf?0d?*}h$TmQR7#cJdh|ahMzhxrD>S&B{Ot-5q!?JfxH< z>#vi)k1@)CE%y3#!`so{gL&^{LDt>joZ{}q6gI&y+}d$h0-Xif0ThhG8w%;5inyk;1~F$fvrEWSrc@yMXJVDuS5 zjKG;CXyLAf!J<8CP)rrR-nzj;s%RIAB8bL*tQQY?ts$Q%sY0x8umsvFs)Q=SmJgP~ z7y~>f#;^qU8~6?nxB4#f_b`V0g8QkG--_HaIEf75(RfIeQTy?nMqnQgmIe0*%Yzlc zN~+X-nWw`T){|S$qxdclQ~Gm2f{pLN7^VooIXIX@#!tS6{v;c8{*iGM{(^c`fhf6iD`HN@!a za`J2PE|-In=6VlRFB(p+O@DQ98 zukkb(qYgn;p!{7Sl}9z9GbNS%QN@SJ3JMh;Nf zugK{n)`5os$jiV9`zvrZ?R}mh@hChQa3{3HOy_u+7~^JI1zN+o(2?^4A=NY?OwPFa zg&Dt!w+47&FL*Vt{{|k*eG_9m#F>=S@^J77b;Ny}@5LAer_~omfD>$L8=r!}H%Ht- z%*Dg){O&t^AI7jI--Pz6@FnWEusj~VQu1qPuL>QtDfZvfthVbynP0RLT1CGFsW8!P zQvU@%cZ@me@cQF?^aFgKhmQIb3ZeULz6WDmtTsTr9j3B*+oZ(T0;c34b(oi$Liz0@ zk;}m;>jcg~-zXBd@xL|_x6{5hUZ`!FyjJpd+Sf|{2lVcNe`N1&&okSy;y2ARe1(82 zaQ_Y;l^9D@EO3Oj5%-(Mg;$Aj;VA5>!6t`kQFu=xhBn}hj)NaH;VSrUdF~F*;@dKS zV*5(nL$2JOMgCRVcM=bW@T|H0^i`|<@IP`qY+Hfbce-8UVHZ@$l`k?%w=x z({AyIu4PDV=iBHxEGOas-Q*Y%1W%|8+cn_pjLrM4>i zJJwl`V!v*yv;Gk4!b7SVDy>inLw`_{gS{ZN((3<%L=T)Gm6RAp-8w`1UteebL(IoR z_cD)TjGsqPvx~%sn1_edK}z${u~%z9Q}AtM^6Pq!PXI@Ec>*|29ikSmq`dDtmh!e}r+2S=ecyNN zxA0EwIMi}lpq6t2>Nh9hXNBJ>__glhjg!1Z_UPX>SkF_~YCOd2`GxAuaV-6xt2YTt zxnEQF_$O3vZuxswZ(67m$?DAsgl$!CTA_M#5~?>=sNS4{>J6eo)teLO$^TAkvU2uJ7`h{THxt> zIZ$m8y5Hrq4aOXucTV#=-mRP>qMq;xZU_a$5bWPjyHei^-4qQGHv~KMIA75tyZ4_G zJ>wt;oz`!_6=N^S5#Lk3LXPO(AHAYSaL2~@RC!mnX5L3i}bM?2kE-*ByD!m@0zfW`_Y$G=^iiF%j(A`y%4Hq1%l5k=#(a{%+V!91f)pIn%OB2D$=@Ph};oK*7{ z0=TvH>Ih6Ew%>I50*42aMl-jg!J9AN4%Xfdj#hQS=Wd+Anz7>1Q|j^yxwt`>W>cGBgyflf})U2)ap z9RqTo-#Hu5*YfV&A`0a2j82mDlD0{g&+okoL02wfMSlBLRsw8uP9KTBf(qoN3faaG z5Rui@0hM*!KWF!tL37{=Xxe??Ur@(^?-cPPLd*lH?YhtA#8ftJv_MJfIIkpQ1!O>2 zT@6-A8~1v=BrAg@0itesoB{QDr_bd;ns91@#4QV9#KsGT6~z$;_NNZK10j|QO9Fj- zMw-wC9X#0#6s5M=t<}d0Z;XKt>{p%Um%@q_ z`(d5|#UNbGun0)MFHp)gb`PoqQ!*hC0imq<3kUM8sHR~eAS{swR7mz#h?Jit;e|56 z3cWs7a@FY{zu{ohVU`~IxD(WHO*={S7{)5l{NZy@l-t-eE-|ucbFiR&tim+|I?g}_ z^E+7yWVowNk`=?(pLN1B^qWSM02uUs0@jg7i+O_)h|n>Lo#2G^F2o(_h}f%C>fG7%#c$vyvO! zo&-71R!>$Sv4AzODlSOyr_QVbtid<9zQlIN%AhBS0^M6dNo*vm z5T;JCGKzN@if9{(HC8w=%W6`(xjC~60Hi+|J0RyEEAiQj4jh{7bH+gyqIn`yb9)+y zL}+pnoNw@mTnC;2BHEG-dU7k!mbAbOtYCbLlLj|XeuJp-u>uz>o`E&OiXdEBA;fe> zifg#+q`=IeA6O1j=T?BoQ47Zh*OT115mCZT3hs7t1_^FFg+y0!4qQ&=C!xcGa_kA0 zYF@a9w2}L#XJH)31y_9?U^#Tk{SN-VZmdN!uN`Rb8>r?^YSo-AVVGJUIbOSU5KC;i zC=C0c#LP593mgclIFD0HElXM}FySBP>-2w&oPgx7??;1S+HeF$)^ zRuR`~6#-wHs271g(I~pvCb}X5c8}18K-Qb0`Li2R<(9r;`P)5j_0YPSuy6IiPi}-= ztJi4Fk*K&Crj=Q+bSGADf+;w$J{;9un(y1t7?*4@({aXheEnEdb7{T@QDr}SfudyV?RAK78`OiL+;qn95XZr%2|b{g_|NGv*s&8 zm?;4{5<aTLp5olKC!>lj00KxuTWUu;{&xl_Oy*t*MEMYoAF8b>{t%yCX{_5tVuV z^hUb!?Z8`sSb9E_o*zv&Z-6q!MdQ+BM7w`M0+Uo|?pNNejL9vG+_F*@KExCrh{zk~ z+v8%{+nsN9E?kI;^&22^>h4rbUBakKVEVV2(xVaevH7l7c6_EbETzS=%b4u4NOna; zT?w>1_@+Eu6dwP{{&3E!h1N7i#RnlIby*K`7IR|Saz;1FuosAVAXNr$Uv@Kr|?@TSI;#oNh%J{xq zjB!n=3JJNVdZs5T4y+!Hl%M#DI4Els!Q>a(naq+<=h6hkQbExp*Rt#36sz zFJ4~R7s)DH=!|FPJUFs=Wa&aAvv}e3rkYUgizy2jWkFPF`BFj{j7y#Gco#Z8%QP-s zT$zYuRxh+YF_<0%76b24F+&AosE8P?$U^Dw16j z%Wh<{8zb3G3q6~1;%K)Z);!2G57KYENsoCV&C`tX$U@Ij?n-;OY_$m}RNDJ}cl++0 z1t;D=a~C{k+>{V1!&331^5yb}m1tt!aiw9Yo>3YXdY)wDE?Ix)d+~z(A$8nT7*fXb z%R{QTX+L15qDAGVl(1BVt?#KI*bAE1&M*bt>(=$DpY_qhV@&TDQ!o}%Zo z9$71)t4=<)M@^j}U1fcI zEo$ltDW4k6kGht-9`-Dp-B4<2U2jY`z~~0(!6AD1GBbFY9&yG-rkIhb*vK3+f)dUR z!6}qJTT$L2hz{X-lv7)U1E@79yo9popl}rBR0N=JP-sJaZwkE#%nGe2nZ5}C`Z7<6 z{*EA2kZq>~C>%CD0z#j^G{t0=FUTRH^@as$T%WZdi)R`j-1M**5*P}qr@8qFThr2< z4XK)oQZHS4XideGw$ml2AKPhtXH?o1hv1XnmETi7EpJ#o5-C6a6=9T}6{d#XMtV8) zykZEKMhx{K@sm8uqvqx2hsR=hHB4SjB(E-(*Tm#CMe+`ZMV^zdh!)R(iDNW8&|DrksRx&Qxe|RizDG&9=3yMSCadTy;Ctgs#*!`ri62hx8 ze2cLhUaMj(r`HAR|+DgZ%23AkO`0{{iZ86gUav%<|I$WPCX5jmzuvSryr#X`oGpI5weQwLMp zL6>x{3ut|JRNAwdOXQb^WsIplUR3?1NT@X|q(e>t*JQ-Z&5XG@vhV0`2$9StxO;lR zy5M-C)Wwu0Mrm5nuSn?9!*u?UwVHJ;)7(Q}8ifoLRbGj|NYYH_A6@HR*Z#%X1jSz~ zuWY6xfhZs7LiNAfxs(QNM*zHd-@Ndqk$JR(rJ8Y(T#Ve3rTbSaONG{~QI#to4 zBqRtGL(<5~i|NZ4ec7rwe4RdchOP$bj!&Hv^wmk)Imujd(KjesUl!E|RY4w zHq_s-*s(P7fhBAW7cr)qn5ls=HAGB}tDgT@|G5A00j8-V*3`o^^+cNbXyX~WfArHU zHu_CFeZ|i7kHz{YnEr`K|0M0c{^^aI^sNAWBfz+C$J}=q_Z{%+6yyzXb-$n$fp%dx z$|6}kLOgQs2ikCbIFm6}hdm$Hul27TU}{c0R{q!i^=s<`%*lSHrk@_T6x9!XV+Lem z!-KxXzLnajz5%fVP8T*t^#?hFmN5E~us$rIVT982^x#E$*cR2lxuG{a=vwSr8vp$> z;J<>xoy-|Xm}~h?_;ym~+7nUzNvz`V2ZFFAELw)6UL9kqiidHLzO$LI0;!1N7x9w0j0k*o*jfRC8PCy+M~=pf6pfuZ&0a zjxFE(;h9al0CL4Mhk(%K#b!H$I6Z| zWyd0AE%CzoFJ+P*wLBw4THW&k(3Dg6Ss9_y-0!;E6;m4FNVIe@sw~~mXQL(d zPS1iOnb_%lRpHBP1x$4(UDdU&rS&~gY4201idI`=>P|-8xt_It?PmtMe>m3vCe#0B ztl!P_!!CkPCa^AAh3C-e1Jp7_B8?=D1Y<i$1b@b9mg*DnM-~c>7=k7 zJu~U5;h9M%neT>Usa8MV71!$KyW<-Dd{11PHQ$@Sbo=0`FYy=^s*H*YHVk>_Ao0#^ zTDLApgJdr{}Vc(n1PZImfQ=U?~Fdg&oM(`#o6?V&#YEYeQbo>~_~&E0Sou@t?Z|6cy8 zaczpO>!#23M=j^!EVFfN>85Kt*88L8v!R~nwa8r0>b9RynqxXEqq9D)`1t_cfBCO$ z^r$^HI>C%iL`ElR*G+JAK+uCi$_)UWPkOoY2~-#kAvZm1Cg9;`XmRMB%QQT@Vjt7_ zHFT|&$+yy`Q;*?svn?uak7sD-yEk-sOJ`%oCdSwl(H;5?AxLYvD_IaM6vw5ykUuoB z)ViVy+Zl7+Y9?cDTpgebj;v+U`s0lB_+9y?2*}~N?(MU0oefQ|4F9#b?)jVy9yPy2 z520T_PeTunUviJEUq8PtLPxU1G4}ud#w{djFA4t7Z*qz1LBaEUSmD5x(Q@(yH;zLfxO_YrNe13GyIJmz6*X-{GC+-q2o!QWtp? z?vm)MC$bA&uA$2Tbg6?2YvA5s)Nn6AY7%b&#=KCt=U%3e7ZBZLM5l!UJt~{SI|e9{ zaRqcq0+JJ`KVz$!mXMVwdCTCPKribRLjD(M zqL-S!H{b$MA`twN(0op0d`|598=~@Wh?>s{`R9c4mxStbLJi30NhIUH6YBpZAp{M- oBy_(b3Vub@J(1-u7-F(qMwYvDFe0tyUy)*sdC3OQW}S%5UJ#_p38&zo*re@C_H^~W0~_6%UqITOW3EV%(%uT zlwk$DrKnOfVu@_zD~BbtLv5C2pzu|~hxvNrAj)1?8BP-|@>J@vp`WSs0i{?YPXdx> zQ%}NYJSEH>3VA*Anj*1IIQ{9{^?;t+LwZ zl({73)pEVVnW6>uq%~&=np79Ks^E5|*gOZ0^0QNO8CC(6Z#V2?i_1vjc|cW@^N73p zd3?xRxUAHWd*Cfn4{s^!pOKwBkPR|$c1pxnITyZtGMA*j4zyqqn*(x$S&-w~kC)GJ z5#e0%fNbPDh?Nz5{&XgST%On^Pk=n0yS83e6I>0<&)%hky%`1s6+GS&0jz_QDt5*&jgS!513hmzi+ZHM0&BPkSi%NS z6*hv(uxY}L=cdt<2J{%(plF&Q2;Q(IS+RsO94-=raxA_{nM+b&0G3e%w(BN2#;`#Q z4$B5Rx)?##V=l&LY+?eHk9nAIbppGR80AZRA$iv%m4Wg!&?PNp4(P?HZGIaeQgZJW z7rAzUa-6hl%7l$vdJ-XM?rQ~s${?i^0nr$?Au!qz{2bCftbPWlOF=a##`lm``Oe`& zJ_=S=-bw=N$WCCT%S9{vfU8J%p$_l_ZjG(af!3W7_&8coBIqY^tuK%vunkii%3A{d z82A{POqUJV5%4fP?U<9`Nh5eVCOByZL((om(y;}}gp`D&Bhl{QKXU8#q#~efM+DeY z>baf78GKprF;G2WuSrXjbEB}Y!fLL0MwQAz4XDOdVsxmfT$I+xLKK}YkFR)-4%dod z89u%rWA&2{iqT+j?DU?P<@_bylkH_L!TDUxk$ia;HM++hmS?WiNt;_+V@3_n-Y2rzfZxBFlLTV(*#3pz2C7lW4|FEU$sasMoQq1Xi+g6qufG#yF z@U+43a)TT>NzDCIK|G_}Z>5Hw;6WY8xo%IMGXWrcfy|B10K5Y!%DIme#05<`QAdQxa}-IOWDzJ)S5KRRaM`S-)8@_~T1eJO5;b=a(SnS*d*|ts z#5vMh4{w_{tr^1}apa6UG(PQp$w~{^DEaZU(3z>xQNK5yIWZM*p9>0Sy+Y`d=Bi--tMMZrDo2(|_dgyZymXD4rn-p7DtUpPk^(jEio;BYNF&H8x9%s>Hlk z5Q2i+O2=u@GZt4On<;N7o;l_Z4kPoB=$(wK6PDx3z?f&u>xNysPIA*!OUXmBWSS}| zv66AxJHcBI!)#k+y+-hybB89qBgo0`<~^dv&5sN5EUfl>hQ0ofBxi2%6P|z75i7aVx5E0&cVpgXlzJ`4hfMEc!2s9*+p$8$bEw7 zrjAlb*-iZbD>0EIPeSrgcL`#c@?mn8>Y$OyF!cg`4_of1k^2F0FzKtc*IXf@3Q1KXXu zv?kk9Tg-MiYCF7U?1?H2myXQaHuALaO=lha%-QitUj3J(dPBj^R=#1sY`>|{uETE{ z_OLHf%pr1?xvzqiu8x9q^&6R&Gw0MBR_F4bHEY{F!p!cc;eVQR@B^0tq4~G2(}hZ+ z*cK~tMT=a^XIER+ih3{i&K{Vz+*QK8jWy&t3^eMP1=Xz*8(e5S4HZq7O@~Rm%-tT6 z;EY$!M)R$+8Fy6#r=3k-=QOh{{IsbZ-H3lRRnl4NA^3;(`Yuy-%(Odd+PzZsd)sep zpE_bKz0sE5wU)kEOMkSbf34+}$iNR{1AKIVUmF;W4FsYCfwh5PY+x!nFts*tZq0Ol zA#0wQN4>4tPA&AnJI!V34-9B|^1c!F@9s%6=dnkr`xbb0w`Grx(3{rFs$yl%Xqj`Z ztYO_!7qhfREv;*oeVb{DqO95M`&mR*K}>Clsx52kU3UpBo3}KvQCtrHzORWgmcrUL z?aVE+W0{SbcdwT_HfgF*JD+x6Nti54Y|OGZYT3JH-1i;9=9VuUobQ(Ozo?N&t8ojJANY+?Kp9hd+C4M7Juf6^c}k~674vClRF^=WR7y? zCG|4D+#jiIT^U(Dpu5R+Z*bbTyB4~xjJ(x*Hy@q#K1bwou)D2@wW5sOuO&2v@U^y) z8FYQvPvrCb>rzHixrCni^7t9`PHj65o^mzX(X?8mHSF{@H+-Cw0T4!f%}1jRM&43 zXn6R5QL=sHCZU``WQT&4YWy&SzXbSPUEw#=pVt-m zb<7uOF2rwC>^J$HY2UP&11jd*3a!5_?b|&EOh~w`WBg_8Z9_V;x@~6sHEFk_7UUL|2D!hmX^#2ecIPkCw zws)2(U&ibpJkx1v8AoiK9rKDk!Lc#)JT~L0MKoL`P+vg&hiG`1-Wmw;Xz~y#{0bTU7py|P`~Uy| diff --git a/src/optimizer/__pycache__/utils.cpython-312.pyc b/src/optimizer/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61f90729602f12644289e2ce5e90fc858a888e20 GIT binary patch literal 2923 zcmcImT}<0n6!wknkc1?J# z=Kr3}D9M#zj)e>~zlDQ*4(#KcmiHiWnbImOe|)N*{H4CjU7ppm9;(7(k(k-wow)6kRa%X{^CB1~D`xPubTVPpi=dH3_-&fu(hExy;13iDl`)wS_J!D>ldxpkvl zrq|)%i*3`0*KaZ)b(9#TV?Ya89@Nd-%yYJGDgOXKw*xbszoh^%LhK?M2o+qi0iu@x z=raLs+!-c&iA$h}=U8#i4fqoKmpOZ&q6a#KWQ<*8 zvLqz6#o2p{-+#7rZ}RDfQ@>8U|MdFUvs+_NKOSHF_UqIx6cQbjB$bcE6n2P@s4}Yw zteA8E_RUvI-(CIv{-?rUi0Rn?38^88<6()+q8&9AI{0Bh)gn?Ny`vaPP##HF7I!2> zXuH&(_Q!ZuODVh>QKPIVBnNN^^G{hfr$_eoqKB54C zG^dToeA*sOacoOC?ZQ07pp;AwLt046DJ@`wrKS)`;#g?sMvx!{DluDaP!#$hbqWcP zmK9V~$Q0uRoiRb=BRBxsb47ymP=XT_XjcbQnjk_dD#;^|l4L%KmI%>=$U}-t4av}@ zYOKabI3cP*-{um)9IqGjp{u|X(wd}12O*`fNe+7Qp%HbEmC;$nM2Xc7G(wvup=~i? zJ3lN-iUvtlQv!CBAg%>0<5-Q2$OJ7?-m;CfAAg(N!Kkg7yOlB@|sLYh}XxEkn)Mj(T5`N)elj^Gli z3jP}6uakNZy_&~}1rKxc{Ppvb!Hg#`wO{uf9P3=5oO7<0EOp2fLNjGX_^cj2YlJW8 z;R{BX*Ta04iY>bc+B4pBrDu}9t<7*_Jt$z2a*dz3a$=&)@HOeaCc}4B_Z^)bp6g`w zRyIrZFE~Bd2IqD(=-!5z-5KYb%LJgiz-+^U!+W#zdg;VTgK5^8W`k+hnf46R@qo=b zI#)@&=(&TSOD*uQ?h6~f7Twn}+dOx?S3lI7rTX&V^}4ryipw|;VDJtwb8#Jblg>05 zOsmecW|(6SYO{{x7`$U6cu@BR4PUeFYo4u~YdfdEbuLT2n+M;od-qQr&o~<}csrOm zy$-xlXBrLWh|V0zFs%eJcY3r25D?~P^s1BO4Y`{Q#7HRIRD z-bM#D-hHLppel5#V%}Xo@2R?Wa=~3T;h5a1`}Snqd*`caCY|&C9m}Nzz5P$ZPP$yM83 zNj_?%yAGE;YNflKC6C>7cSXtLO1itYBtR?p=au5ck4pT&Z#}aczaUnSNGNZh$C~3X mYVMTCA64)!_>xiakFjME0PuwHJR!X1)4gKb1zZY#bNmfw&x*hR literal 0 HcmV?d00001 diff --git a/src/optimizer/trainable.py b/src/optimizer/trainable.py index 753a452..f086196 100644 --- a/src/optimizer/trainable.py +++ b/src/optimizer/trainable.py @@ -22,6 +22,8 @@ import numpy as np import matplotlib.pyplot as plt +from optimizer.utils import save_inference_files + logger = logging.getLogger("ray") def trainable(hydra_cfg,tune_cfg): @@ -39,7 +41,7 @@ def trainable(hydra_cfg,tune_cfg): ## 原始数据预处理为数据集 ## ############################ t_1=time.time() - data_preprocess=DataPreProcess( hydra_cfg.raw_spectral_data_dir,hydra_cfg.raw_labels_dir,hydra_cfg.labels_name,hydra_cfg.dataset_dir,hydra.utils.instantiate(tune_cfg["choose_frame_spatial"]), hydra.utils.instantiate(tune_cfg["features_scaling"]),hydra.utils.instantiate(tune_cfg["labels_scaling"]),hydra_cfg.dataset.train_ratio,hydra_cfg.dataset.validate_ratio) + data_preprocess=DataPreProcess( hydra_cfg.dataset_dir,hydra.utils.instantiate(tune_cfg["choose_frame_spatial"]), hydra.utils.instantiate(tune_cfg["features_scaling"]),hydra.utils.instantiate(tune_cfg["labels_scaling"])) t_2=time.time() logger.info(f"Preprocessed raw data costs {t_2-t_1}s") @@ -47,12 +49,18 @@ def trainable(hydra_cfg,tune_cfg): ## 加载数据集为dataloader ## ############################ - train_dataset,val_dataset=load_dataset(data_preprocess.dataset_save_dir) + train_dataset=load_dataset(data_preprocess.dataset_save_dir,dataset_type="training") + val_dataset=load_dataset(data_preprocess.dataset_save_dir,dataset_type="validation") + + + + + trainloader = torch.utils.data.DataLoader( - train_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker + train_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker,drop_last=False ) valloader = torch.utils.data.DataLoader( - val_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker + val_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker,drop_last=False ) t_3=time.time() logger.info(f"Dataloader costs {t_3-t_2}s") @@ -101,7 +109,7 @@ def trainable(hydra_cfg,tune_cfg): ## 生成模型架构图写入tensorboad ## ################################ sample = train_dataset[0:4][0] - print(sample.shape) + # print(sample.shape) writer.add_graph(model, torch.tensor(sample).to(device)) @@ -113,6 +121,7 @@ def trainable(hydra_cfg,tune_cfg): ################################ ## 模型训练 ## ################################ + logger.info(f"Start epoch {epoch+1}") train_loss = 0.0 train_steps = 0 @@ -121,11 +130,28 @@ def trainable(hydra_cfg,tune_cfg): train_errors=None train_outputs=None train_labels=None + train_bounds=None model.train() + + # t_1=time.time() + # for batch, (features, labels) in enumerate(trainloader): + # features=features.to(device) + # labels=labels.to(device) + + # t_2=time.time() + # print(f"DEBUDING cost{t_2-t_1}s") + for batch, (features, labels) in enumerate(trainloader): + + + t_iteration_start=time.time() features=features.to(device) labels=labels.to(device) + + + + # zero the parameter gradients optimizer.zero_grad() @@ -136,9 +162,13 @@ def trainable(hydra_cfg,tune_cfg): labels_npy=labels.cpu().detach().numpy() outputs_npy=outputs.cpu().detach().numpy() + # print("outputs_npy",outputs_npy) + # print("labels_npy",labels_npy) # 生成自定义的指标 - error,hit_rate=data_preprocess.get_metric(outputs_npy, labels_npy) + outputs_npy, labels_npy,error,hit_rate,bounds=data_preprocess.get_metric(outputs_npy, labels_npy) + # print("outputs_npy_after",outputs_npy) + # print("labels_npy_after",labels_npy) # Backpropagation loss.backward() @@ -147,16 +177,21 @@ def trainable(hydra_cfg,tune_cfg): # 记录我们自定义的指标 train_loss += loss.item() train_steps += 1 + if train_steps==1: train_hit_rate=hit_rate train_errors=error train_outputs=outputs_npy train_labels=labels_npy + train_bounds=bounds else: + + train_hit_rate=(train_steps-1)/train_steps *train_hit_rate+ 1/train_steps*hit_rate train_errors=np.concatenate((train_errors,error),axis=0) train_outputs=np.concatenate((train_outputs,outputs_npy),axis=0) train_labels=np.concatenate((train_labels,labels_npy),axis=0) + train_bounds=np.concatenate((train_bounds,bounds),axis=0) t_iteration_end=time.time() print("Training Epoch:[{}/{}],Iteration:{}/{},Loss:{}, Cost {}s".format(epoch+1,hydra_cfg.train.max_epoch,train_steps,len(trainloader),loss.item(),t_iteration_end-t_iteration_start)) @@ -168,6 +203,9 @@ def trainable(hydra_cfg,tune_cfg): val_loss = 0.0 val_steps = 0 + val_errors=None + val_outputs=None + val_labels=None val_hit_rate=None t_epoch_train_end=time.time() @@ -177,20 +215,30 @@ def trainable(hydra_cfg,tune_cfg): for batch, (features, labels) in enumerate(valloader): t_iteration_start=time.time() with torch.no_grad(): + features=features.to(device) labels=labels.to(device) outputs = model(features) loss = criterion(outputs, labels) - error,hit_rate=data_preprocess.get_metric(outputs.cpu().detach().numpy(), labels.cpu().detach().numpy()) + labels_npy=labels.cpu().detach().numpy() + outputs_npy=outputs.cpu().detach().numpy() + + outputs_npy, labels_npy,error,hit_rate,bounds=data_preprocess.get_metric(outputs_npy, labels_npy) val_loss += loss.cpu().numpy() val_steps += 1 if val_steps==1: val_hit_rate=hit_rate + val_errors=error + val_outputs=outputs_npy + val_labels=labels_npy else: val_hit_rate=(val_steps-1)/val_steps *val_hit_rate+ 1/val_steps*hit_rate + val_errors=np.concatenate((val_errors,error),axis=0) + val_outputs=np.concatenate((val_outputs,outputs_npy),axis=0) + val_labels=np.concatenate((val_labels,labels_npy),axis=0) t_iteration_end=time.time() print("Validate Iteration:{}/{},Loss:{}, Cost {}s".format(val_steps,len(valloader),loss.item(),t_iteration_end-t_iteration_start)) @@ -206,41 +254,157 @@ def trainable(hydra_cfg,tune_cfg): - checkpoint_data = { + + + reports={"val_loss": val_loss / val_steps, + "train_loss": train_loss, + } + + for i,name in enumerate(data_preprocess.labelNames): + # reports[= + writer.add_scalar(f"{name}/training/hit_rate",train_hit_rate[i],global_step=epoch) + writer.add_scalar(f"{name}/validation/hit_rate",val_hit_rate[i],global_step=epoch) + + if (epoch!=0 and epoch%hydra_cfg.train.checkpoint_interval==0): + + + for i,name in enumerate(data_preprocess.labelNames): + # reports[= + + writer.add_histogram(tag=f'{name}/training/error_histogram', values=train_errors[:,i], global_step=epoch) + writer.add_histogram(tag=f'{name}/validation/error_histogram', values=val_errors[:,i], global_step=epoch) + + fig, ax = plt.subplots() + ax.scatter(train_labels[:,i],train_outputs[:,i]) + ax.plot([train_labels[:,i].min(), train_labels[:,i].min()], [train_labels[:,i].max(), train_labels[:,i].max()], 'r--') + ax.plot(train_labels[:,i],train_labels[:,i]-train_bounds[:,i], 'r--') + ax.plot(train_labels[:,i],train_labels[:,i]+train_bounds[:,i], 'r--') + + + ax.set_xlabel('Actual Values') + ax.set_ylabel('Estimated Values') + writer.add_figure(f'{name}/training/actual_vs_estimated_value', fig , epoch) + plt.close(fig) + + + fig, ax = plt.subplots() + ax.scatter(val_labels[:,i],val_outputs[:,i]) + ax.plot([val_labels[:,i].min(), val_labels[:,i].max()], [val_outputs[:,i].min(), val_outputs[:,i].max()], 'r--') + ax.set_xlabel('Actual Values') + ax.set_ylabel('Estimated Values') + writer.add_figure(f'{name}/validation/actual_vs_estimated_value', fig , epoch) + plt.close(fig) + + + + fig, ax = plt.subplots() + ax.scatter(train_labels[:,i],train_errors[:,i]) + ax.plot(train_labels[:,i],[0 for i in range(train_labels.shape[0])], color='r', linestyle='--') + ax.plot(train_labels[:,i],-train_bounds[:,i], 'r--') + ax.plot(train_labels[:,i],+train_bounds[:,i], 'r--') + ax.set_xlabel('Actual Values') + ax.set_ylabel('Residual error') + + writer.add_figure(f'{name}/training/training_actual_vs_residual', fig , epoch) + plt.close(fig) + + + + fig, ax = plt.subplots() + ax.scatter(val_labels[:,i],val_errors[:,i]) + ax.plot(val_labels[:,i],[0 for i in range(val_labels.shape[0])], color='r', linestyle='--') + ax.set_xlabel('Actual Values') + ax.set_ylabel('Residual error') + writer.add_figure(f'{name}/validation/training_actual_vs_Residual', fig , epoch) + plt.close(fig) + + with tempfile.TemporaryDirectory() as checkpoint_dir: + checkpoint_data = { "epoch": epoch, "net_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), - } - + "data_preprocess":data_preprocess - with tempfile.TemporaryDirectory() as checkpoint_dir: + } data_path = pathlib.Path(checkpoint_dir) / "data.pkl" with open(data_path, "wb") as fp: pickle.dump(checkpoint_data, fp) + + save_inference_files(model,data_preprocess,checkpoint_dir) - checkpoint = ray.train.Checkpoint.from_directory(checkpoint_dir) - - reports={"val_loss": val_loss / val_steps, - "train_loss": train_loss, - } - for i,name in enumerate(hydra_cfg.labels_name): - reports[f"train_hit_rate_{name}"]=train_hit_rate[i] - reports[f"val_hit_rate_{name}"]=val_hit_rate[i] - writer.add_histogram(tag=f'train_error_{name}', values=train_errors[:,i], global_step=epoch) - - fig, ax = plt.subplots() - ax.scatter(np.arange(train_outputs.shape[0]),train_outputs[:,i]) - ax.scatter(np.arange(train_outputs.shape[0]),train_labels[:,i]) - writer.add_figure(f'train_imgage_{name}', fig , epoch) - plt.close(fig) + checkpoint = ray.train.Checkpoint.from_directory(checkpoint_dir) ray.train.report( reports, checkpoint=checkpoint, ) + else: + ray.train.report(reports,) t_epoch_end=time.time() logger.info(f"Save Checkpoint costs {t_epoch_end-t_epoch_val_end}s") print("Training Epoch:[{}/{}], Average Loss:{}, Cost {}s".format(epoch+1,hydra_cfg.train.max_epoch,train_loss,t_epoch_end-t_epoch_start)) - for i,name in enumerate(hydra_cfg.labels_name): - print(f"train_hit_rate_{name}: {train_hit_rate[i]} ",end=" ") - print(f"val_hit_rate_{name}: {val_hit_rate[i]} ",end=" ") \ No newline at end of file + # for i,name in enumerate(hydra_cfg.labels_name): + # print(f"train_hit_rate_{name}: {train_hit_rate[i]} ",end=" ") + # print(f"val_hit_rate_{name}: {val_hit_rate[i]} ",end=" ") + + ##################################################### + ## 每个epoch保存checkpoint,并记录到tensorboard ## + ##################################################### + + #测试 + + test_dataset=load_dataset(data_preprocess.dataset_save_dir,dataset_type="test") + + testloader = torch.utils.data.DataLoader( + test_dataset, batch_size=int(tune_cfg["batch_size"]), shuffle=True, num_workers=hydra_cfg.dataset.num_worker,drop_last=False + ) + + + + + test_loss = 0.0 + test_steps = 0 + test_errors=None + test_outputs=None + test_labels=None + test_hit_rate=None + logger.info(f"Test starts") + t_epoch_test_start=time.time() + + model.eval() + + for batch, (features, labels) in enumerate(testloader): + t_iteration_start=time.time() + with torch.no_grad(): + features=features.to(device) + labels=labels.to(device) + + outputs = model(features) + loss = criterion(outputs, labels) + + labels_npy=labels.cpu().detach().numpy() + outputs_npy=outputs.cpu().detach().numpy() + + outputs_npy, labels_npy,error,hit_rate,bounds=data_preprocess.get_metric(outputs_npy, labels_npy) + + test_loss += loss.cpu().numpy() + test_steps += 1 + if test_steps==1: + test_hit_rate=hit_rate + test_errors=error + test_outputs=outputs_npy + test_labels=labels_npy + else: + test_hit_rate=(test_steps-1)/test_steps *test_hit_rate+ 1/test_steps*hit_rate + test_errors=np.concatenate((test_errors,error),axis=0) + test_outputs=np.concatenate((test_outputs,outputs_npy),axis=0) + test_labels=np.concatenate((test_labels,labels_npy),axis=0) + t_iteration_end=time.time() + print("Test Iteration:{}/{},Loss:{}, Cost {}s".format(test_steps,len(testloader),loss.item(),t_iteration_end-t_iteration_start)) + + t_epoch_test_end=time.time() + logger.info(f"Test costs {t_epoch_test_end-t_epoch_test_start}s") + tmp_string="" + for i,name in enumerate(data_preprocess.labelNames): + tmp_string+=f"{name}\t:{test_hit_rate[i]*100:.2f}%\n" + writer.add_text(f"test/hit_rate",tmp_string) diff --git a/src/optimizer/utils.py b/src/optimizer/utils.py new file mode 100644 index 0000000..1781560 --- /dev/null +++ b/src/optimizer/utils.py @@ -0,0 +1,48 @@ +import shutil +import pathlib +import pickle +import torch +def save_inference_files(model,data_preprocess,checkpoint_dir): + sava_dir=pathlib.Path(checkpoint_dir)/"inference" + print("开始在checkpoint中保存推理所需文件") + print("choose_frame_spatial文件路径:",data_preprocess.choose_frame_spatial.file_path.parent) + + #保存choose_frame_spatial相关文件 + + + sava_dir.mkdir(mode=0o777, parents=True, exist_ok=True) + + shutil.copy(data_preprocess.choose_frame_spatial.file_path,sava_dir/"choose_frame_spatial.py") + with open(sava_dir/"choose_frame_spatial.pkl",'wb') as f: + pickle.dump(data_preprocess.choose_frame_spatial.state_dict(),f) + + + shutil.copy(data_preprocess.features_scaling.file_path,sava_dir/"features_scaling.py") + with open(sava_dir/"features_scaling.pkl",'wb') as f: + pickle.dump(data_preprocess.features_scaling.state_dict(),f) + + + shutil.copy(data_preprocess.labels_scaling.file_path,sava_dir/"labels_scaling.py") + with open(sava_dir/"labels_scaling.pkl",'wb') as f: + pickle.dump(data_preprocess.labels_scaling.state_dict(),f) + + with open(sava_dir/"labelNames.pkl",'wb') as f: + pickle.dump(data_preprocess.labelNames,f) + + + input_tensor = torch.rand((1,*data_preprocess.features_scaling.feature_shape), dtype=torch.float32).to("cuda:0") + torch.onnx.export( + model, # model to export + (input_tensor,), # inputs of the model, + str(sava_dir/"model.onnx"), # filename of the ONNX model + input_names=["input"], # Rename inputs for the ONNX model + dynamo=True # True or False to select the exporter to use + ) + + + + + + + + diff --git a/src/scripts/Add_Lab.py b/src/scripts/Add_Lab.py new file mode 100644 index 0000000..051d19e --- /dev/null +++ b/src/scripts/Add_Lab.py @@ -0,0 +1,149 @@ +import pandas as pd +# from tqdm import tqdm +if __name__ =="__main__": + + label_file_path="/data/SEMS-model-training/labels/NanEr/2024-05-15_08-03_2335.xlsx" + raw_label_lab_file_path="/data/SEMS-model-training/labels/raw_labels/NanEr/5-8月份转炉TSC和TSO化验结果.xls" + + save_dir="/data/SEMS-model-training/labels/NanEr/2024-05-15_08-03_2335_.xlsx" + + labels=pd.read_excel(label_file_path) + + print(f"原始数据量{labels.shape[0]}") + labels=labels.loc[:,["Furnace_Number","Steel_Type","TSC_start_time","TSC_end_time","TSC_T","TSC_C","TSO_start_time","TSO_end_time","TSO_T","TSO_C"]] + + #下述功能集成到数据预处理中。因为每一行但凡有0就删,太严格,应该要求指定的label有0就删 + # # 选出有NULL的行 + null_rows=labels.isnull().any(axis=1) + # # 选出有0的行 + zeros_rows=(labels==0).any(axis=1) | (labels=='0').any(axis=1) + + # # 每一行但凡有NULL或者0都给删了 + selected_rows=~(null_rows|zeros_rows) + labels=labels[selected_rows] + + print(f"删除无效数据后{labels.shape[0]}") + + labels_output=labels.copy() + + labels_output['TSC_P']=0.0 + labels_output['TSC_S']=0.0 + labels_output['TSC_Mn']=0.0 + labels_output['TSC_Ni']=0.0 + labels_output['TSC_Mo']=0.0 + labels_output['TSC_Cr']=0.0 + labels_output['TSO_P']=0.0 + labels_output['TSO_S']=0.0 + labels_output['TSO_Mn']=0.0 + labels_output['TSO_Ni']=0.0 + labels_output['TSO_Mo']=0.0 + labels_output['TSO_Cr']=0.0 + + + + + + + raw_label_lab=pd.read_excel(raw_label_lab_file_path) + raw_label_lab=raw_label_lab.loc[:,["炉次号","分析时间","Mn","P","S","Ni","Cr","Mo"]] + # print(raw_label_lab) + + for i in range(labels.shape[0]): + # i=4 + furnaceID=labels.iloc[i]["Furnace_Number"] + + # print(raw_label_lab[raw_label_lab["炉次号"]==furnaceID]) + + tmp=raw_label_lab[raw_label_lab["炉次号"]==furnaceID] + + if tmp.shape[0]==0: + print(f"炉次号{furnaceID}找不到对应的数据") + continue + elif tmp.shape[0]==1: + print(f"炉次号{furnaceID}找到1个") + labels_output.loc[i,'TSC_P']=tmp.iloc[0]["P"] + labels_output.loc[i,'TSC_S']=tmp.iloc[0]["S"] + labels_output.loc[i,'TSC_Mn']=tmp.iloc[0]["Mn"] + labels_output.loc[i,'TSC_Ni']=tmp.iloc[0]["Ni"] + labels_output.loc[i,'TSC_Mo']=tmp.iloc[0]["Mo"] + labels_output.loc[i,'TSC_Cr']=tmp.iloc[0]["Cr"] + labels_output.loc[i,'TSO_P']=tmp.iloc[0]["P"] + labels_output.loc[i,'TSO_S']=tmp.iloc[0]["S"] + labels_output.loc[i,'TSO_Mn']=tmp.iloc[0]["Mn"] + labels_output.loc[i,'TSO_Ni']=tmp.iloc[0]["Ni"] + labels_output.loc[i,'TSO_Mo']=tmp.iloc[0]["Mo"] + labels_output.loc[i,'TSO_Cr']=tmp.iloc[0]["Cr"] + else: + print(f"炉次号{furnaceID}找到{tmp.shape[0]}个") + + #寻找离TSC最近的 + min_time=None + min_index=0 + for j in range(tmp.shape[0]): + # print(j,i) + delta_time=tmp.iloc[j]["分析时间"]-labels.iloc[i]["TSC_end_time"] + if min_time is None: + min_time=delta_time + else: + if delta_timeNone: + + output_folder=output_folder + output_folder.mkdir(mode=0o777,parents=True,exist_ok=True) + + data=np.load(file_path,allow_pickle=True) + #此处data["rawSpectralData"]的维度为(时间维度,光谱维度,空间维度) + rawSpectralData=data["rawSpectralData"] + + + + plt.figure() + spectrum_band=np.linspace(400,1000,224) + # plt.ylim([0,4095]) + plt.plot(spectrum_band,rawSpectralData) + # plt.title("Max Point Spectrum") + plt.savefig(output_folder/f"{file_path.stem}.png",dpi=100) + plt.close() + #画最强点的光谱 + + + +def main(raw_data_folder:pathlib.Path,output_folder:pathlib.Path)->None: + + files=list(raw_data_folder.glob("*.npz")) + + # plot_npz(files[0],output_folder ) + + p=multiprocessing.Pool(4) + + for i in range(len(files)): + p.apply_async(plot_npz, args=(files[i],output_folder)) + + print('Waiting for all subprocesses done...') + p.close() + p.join() + print('All subprocesses done.') + +if __name__=="__main__": + raw_dataset_folder=pathlib.Path("/data/SEMS-model-training/dataset/DBSCAN_eps_0.15_min_samples_10/pre_dataset") + output_folder=pathlib.Path("/data/SEMS-model-training/dataset/DBSCAN_eps_0.15_min_samples_10/pre_dataset_visual") + main(raw_dataset_folder/"test",output_folder/"test",) + main(raw_dataset_folder/"training",output_folder/"training",) + main(raw_dataset_folder/"validation",output_folder/"validation",) \ No newline at end of file diff --git a/src/scripts/raw_dataset_visual.py b/src/scripts/raw_dataset_visual.py new file mode 100644 index 0000000..74865e0 --- /dev/null +++ b/src/scripts/raw_dataset_visual.py @@ -0,0 +1,265 @@ +import multiprocessing.pool +import pathlib +import os +import numpy as np +import matplotlib.pyplot as plt +from sklearn.decomposition import PCA +from sklearn.decomposition import KernelPCA + +from sklearn.manifold import TSNE +from sklearn.cluster import DBSCAN +from sklearn import metrics +from sklearn.cluster import OPTICS + +import multiprocessing + + +def plot_npz(file_path:pathlib.Path,output_folder:pathlib.Path)->None: + + output_folder=output_folder/file_path.stem + output_folder.mkdir(mode=0o777,parents=True,exist_ok=True) + + data=np.load(file_path,allow_pickle=True) + #此处data["rawSpectralData"]的维度为(时间维度,光谱维度,空间维度) + rawSpectralData=data["rawSpectralData"].transpose(0, 2, 1) + rawSpectralData=rawSpectralData.reshape((rawSpectralData.shape[0]*rawSpectralData.shape[1],rawSpectralData.shape[2])) + #rawSpectralData的维度为(时间维度*空间维度,光谱维度) + + tmp=np.max(rawSpectralData,axis=1) + rawSpectralData=rawSpectralData[(tmp>500) & (tmp<4095),:] + #选出没有欠曝与过曝的空间点 + + + rawSpectralData_normed=(rawSpectralData-np.min(rawSpectralData,axis=1,keepdims=True))/(np.max(rawSpectralData,axis=1,keepdims=True)-np.min(rawSpectralData,axis=1,keepdims=True)) + #归一化所有光谱,去除强度信息 + + + plt.figure() + spectrum_band=np.linspace(400,1000,224) + index=np.argmax(rawSpectralData) + index=np.unravel_index(index, rawSpectralData.shape) + plt.ylim([0,4095]) + plt.plot(spectrum_band,rawSpectralData[index[0],:]) + plt.title("Max Point Spectrum") + plt.savefig(output_folder/"max_point_spectrum.png",dpi=100) + plt.close() + #画最强点的光谱 + + + + ################################ + # PCA + ################################ + # pca_norm_1D = PCA(n_components=1) + # norm_feat_1D = pca_norm_1D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + # pca_norm_2D = PCA(n_components=2) + # norm_feat_2D = pca_norm_2D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + # pca_norm_3D = PCA(n_components=3) + # norm_feat_3D = pca_norm_3D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + + # pca_1D = PCA(n_components=1) + # feat_1D = pca_1D.fit(rawSpectralData).transform(rawSpectralData) + # pca_2D = PCA(n_components=2) + # feat_2D = pca_2D.fit(rawSpectralData).transform(rawSpectralData) + # pca_3D = PCA(n_components=3) + # feat_3D = pca_3D.fit(rawSpectralData).transform(rawSpectralData) + + + # fig, axs = plt.subplots(2, 3,figsize=(15,10)) + + # axs[0,0].scatter(norm_feat_1D, np.zeros((norm_feat_1D.shape[0],)), s=0.1) + # axs[0, 0].set_title(' norm') + # axs[0,1].scatter(norm_feat_2D[:,0], norm_feat_2D[:,1], s=0.1) + # ax_3d = fig.add_subplot(2, 3, 3, projection='3d') + # ax_3d.scatter(norm_feat_3D[:, 0], norm_feat_3D[:, 1], norm_feat_3D[:, 2], s=0.01) + + + # axs[1,0].scatter(feat_1D, np.zeros((feat_1D.shape[0],)), s=0.1) + # axs[1, 0].set_title(' raw') + # axs[1,1].scatter(feat_2D[:,0], feat_2D[:,1], s=0.1) + # ax_3d = fig.add_subplot(2, 3, 6, projection='3d') + # ax_3d.scatter(feat_3D[:, 0], feat_3D[:, 1], feat_3D[:, 2], s=0.01) + # plt.savefig(output_folder/"PCA.png",dpi=100) + + # plt.close() + + + + ################################ + # KernelPCA gamma =15 + ################################ + + def plot_KernelPCA(gamma): + pca_norm_1D = KernelPCA(n_components=1, kernel='rbf', gamma=gamma) + norm_feat_1D = pca_norm_1D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + pca_norm_2D = KernelPCA(n_components=2, kernel='rbf', gamma=gamma) + norm_feat_2D = pca_norm_2D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + pca_norm_3D = KernelPCA(n_components=3, kernel='rbf', gamma=gamma) + norm_feat_3D = pca_norm_3D.fit(rawSpectralData_normed).transform(rawSpectralData_normed) + + pca_1D = KernelPCA(n_components=1, kernel='rbf', gamma=gamma) + feat_1D = pca_1D.fit(rawSpectralData).transform(rawSpectralData) + pca_2D = KernelPCA(n_components=2, kernel='rbf', gamma=gamma) + feat_2D = pca_2D.fit(rawSpectralData).transform(rawSpectralData) + pca_3D = KernelPCA(n_components=3, kernel='rbf', gamma=gamma) + feat_3D = pca_3D.fit(rawSpectralData).transform(rawSpectralData) + + + fig, axs = plt.subplots(2, 3,figsize=(15,10)) + + axs[0,0].scatter(norm_feat_1D, np.zeros((norm_feat_1D.shape[0],)), s=0.1) + axs[0, 0].set_title(' norm') + axs[0,1].scatter(norm_feat_2D[:,0], norm_feat_2D[:,1], s=0.1) + ax_3d = fig.add_subplot(2, 3, 3, projection='3d') + ax_3d.scatter(norm_feat_3D[:, 0], norm_feat_3D[:, 1], norm_feat_3D[:, 2], s=0.01) + + + axs[1,0].scatter(feat_1D, np.zeros((feat_1D.shape[0],)), s=0.1) + axs[1, 0].set_title(' raw') + axs[1,1].scatter(feat_2D[:,0], feat_2D[:,1], s=0.1) + ax_3d = fig.add_subplot(2, 3, 6, projection='3d') + ax_3d.scatter(feat_3D[:, 0], feat_3D[:, 1], feat_3D[:, 2], s=0.01) + plt.savefig(output_folder/f"KernelPCA_gamma_{gamma}.png",dpi=100) + + plt.close() + + # plot_KernelPCA(gamma=None) + # plot_KernelPCA(gamma=1) + # plot_KernelPCA(gamma=5) + # plot_KernelPCA(gamma=10) + # plot_KernelPCA(gamma=15) + + + + ################################ + # t-SNE + ################################ + # pca_norm_1D = TSNE(n_components=1, learning_rate='auto', init='pca') + # norm_feat_1D = pca_norm_1D.fit(rawSpectralData_normed).fit_transform(rawSpectralData_normed) + # pca_norm_2D = TSNE(n_components=2, learning_rate='auto', init='pca') + # norm_feat_2D = pca_norm_2D.fit(rawSpectralData_normed).fit_transform(rawSpectralData_normed) + # pca_norm_3D = TSNE(n_components=3, learning_rate='auto', init='pca') + # norm_feat_3D = pca_norm_3D.fit(rawSpectralData_normed).fit_transform(rawSpectralData_normed) + + # pca_1D = TSNE(n_components=1, learning_rate='auto', init='pca') + # feat_1D = pca_1D.fit(rawSpectralData).fit_transform(rawSpectralData) + # pca_2D = TSNE(n_components=2, learning_rate='auto', init='pca') + # feat_2D = pca_2D.fit(rawSpectralData).fit_transform(rawSpectralData) + # pca_3D = TSNE(n_components=3, learning_rate='auto', init='pca') + # feat_3D = pca_3D.fit(rawSpectralData).fit_transform(rawSpectralData) + + + # fig, axs = plt.subplots(2, 3,figsize=(15,10)) + + # axs[0,0].scatter(norm_feat_1D, np.zeros((norm_feat_1D.shape[0],)), s=0.1) + # axs[0, 0].set_title(' norm') + # axs[0,1].scatter(norm_feat_2D[:,0], norm_feat_2D[:,1], s=0.1) + # ax_3d = fig.add_subplot(2, 3, 3, projection='3d') + # ax_3d.scatter(norm_feat_3D[:, 0], norm_feat_3D[:, 1], norm_feat_3D[:, 2], s=0.01) + + + # axs[1,0].scatter(feat_1D, np.zeros((feat_1D.shape[0],)), s=0.1) + # axs[1, 0].set_title(' raw') + # axs[1,1].scatter(feat_2D[:,0], feat_2D[:,1], s=0.1) + # ax_3d = fig.add_subplot(2, 3, 6, projection='3d') + # ax_3d.scatter(feat_3D[:, 0], feat_3D[:, 1], feat_3D[:, 2], s=0.01) + # plt.savefig(output_folder/"t-SNE.png",dpi=100) + + # plt.close() + + + def plot_DBSCAN(eps=0.15, min_samples=10): + + db_norm = DBSCAN(eps=eps, min_samples=min_samples).fit(rawSpectralData_normed) + + labels_norm = db_norm.labels_ + n_norm = len(set(labels_norm)) - (1 if -1 in labels_norm else 0) + n_noise_norm = list(labels_norm).count(-1) + + + max_i=0 + max_num=0 + for i in range(n_norm): + tmp=(labels_norm==i).sum() + if tmp>max_num: + max_i=i + max_num=tmp + + fig, axs = plt.subplots(2, 2,figsize=(10,10)) + for i in range(labels_norm.shape[0]): + if labels_norm[i]==max_i: + axs[0,0].plot(rawSpectralData_normed[i]) + axs[0,0].set_title(f'norm data w norm cluster {max_num},{n_norm},{n_noise_norm}') + + for i in range(labels_norm.shape[0]): + if labels_norm[i]==max_i: + axs[0,1].plot(rawSpectralData[i]) + axs[0,1].set_title('norm data w norm cluster') + + + + + db_raw = DBSCAN(eps=eps, min_samples=min_samples).fit(rawSpectralData) + labels_raw = db_raw.labels_ + n_raw = len(set(labels_raw)) - (1 if -1 in labels_raw else 0) + n_noise_raw = list(labels_raw).count(-1) + + + max_i=0 + max_num=0 + for i in range(n_raw): + tmp=(labels_raw==i).sum() + if tmp>max_num: + max_i=i + max_num=tmp + + for i in range(labels_raw.shape[0]): + if labels_raw[i]==max_i: + axs[1,0].plot(rawSpectralData_normed[i]) + axs[1,0].set_title(f'norm data w norm cluster {max_num},{n_raw},{n_noise_raw}') + + for i in range(labels_raw.shape[0]): + if labels_raw[i]==max_i: + axs[1,1].plot(rawSpectralData[i]) + axs[1,1].set_title('norm data w norm cluster') + + plt.savefig(output_folder/f"DBSCAN_eps_{eps}_min_samples_{min_samples}.png",dpi=100) + + plt.close() + plot_DBSCAN(eps=0.15, min_samples=10) + # plot_DBSCAN(eps=100, min_samples=10) + + + + + + + + + + + # print(file_path.stem,rawSpectralData.shape) + + +def main(raw_data_folder:pathlib.Path,output_folder:pathlib.Path)->None: + + files=list(raw_data_folder.glob("*.npz")) + + # plot_npz(files[0],output_folder ) + + p=multiprocessing.Pool(4) + + for i in range(len(files)): + p.apply_async(plot_npz, args=(files[i],output_folder)) + + print('Waiting for all subprocesses done...') + p.close() + p.join() + print('All subprocesses done.') + +if __name__=="__main__": + raw_dataset_folder=pathlib.Path("/data/SEMS-model-training/dataset/raw_dataset") + output_folder=pathlib.Path("/data/SEMS-model-training/dataset/raw_dataset_visual") + main(raw_dataset_folder/"test",output_folder/"test",) + main(raw_dataset_folder/"training",output_folder/"training",) + main(raw_dataset_folder/"validation",output_folder/"validation",) \ No newline at end of file diff --git a/src/scripts/rawbindata2rawdataset.py b/src/scripts/rawbindata2rawdataset.py new file mode 100644 index 0000000..e31718c --- /dev/null +++ b/src/scripts/rawbindata2rawdataset.py @@ -0,0 +1,276 @@ +''' +@File : pre_process.py +@Time : 2024/08/09 13:54:28 +@Author : Zhanpeng Yang +@Version : 0.0.1 +@Contact : zhpyang@outlook.com +@Desc : 进行数据预处理 +''' + +import pandas as pd +import logging +import os,glob +import numpy as np +import datetime +import pickle +import h5py + +class DataPreProcess: + """ + 包含所有数据预处理操作,包括 + 1. 从原始二进制数据,以钢厂 提供的Excel文件中提取出目标时刻附近的光谱 + 2. 从目标时刻附近的光谱选择合适、稳定的时间及空间点的光谱,作为feature + 3. 将feature 与 Label进行放缩 + + """ + def __init__(self, raw_spectral_data_dir, raw_labels_dir,labels_name,dataset_dir,train_ratio=0.8,validate_ratio=0.1) -> None: + """初始化类,传入所有数据预处理需要的参数 + + Args: + raw_spectral_data_dir (_type_):原始光谱数据路径 + raw_labels_dir (_type_): 原始标签路径 + labels_name (_type_): 提取出的标签名 + dataset_dir (_type_): 生成的数据集文件夹 + choose_frame_spatial (_type_): 类对象,进行光谱特征挑选 + features_scaling (_type_): 类对象,对输入的特征进行放缩,使之直接输入神经网络 + labels_scaling (_type_): 类对象,对输出的的标签进行放缩 + train_ratio (float, optional): 训练集比例. Defaults to 0.8. + validate_ratio (float, optional): 验证集比例,剩余的即为测试集. Defaults to 0.1. + """ + self.raw_spectral_data_dir=raw_spectral_data_dir + self.raw_labels_dir=raw_labels_dir + self.dataset_dir=dataset_dir + self.labels_name=labels_name + + self.train_ratio=train_ratio + self.validate_ratio=validate_ratio + + #加载原始的标签 + self.raw_labels=pd.read_excel(raw_labels_dir) + + #加载原始光谱的缓存 + self.raw_spectral_data_cache=self._load_raw_spectral_data_cache(raw_spectral_data_dir) + + #随便加载一个csv文件,判断光谱数据的维度 + self.spectral_dim,self.spatial_dim= self._get_spectral_spatial_dim(self.raw_spectral_data_cache) + + #正式开始原始数据转化为数据集 + self._raw_data_2_dataset() + + + + # def _load_raw_labels(self,raw_labels_dir:str,labels_name:list)->pd.DataFrame: + # """读取利用脚本处理后的钢厂给的excel文件所在文件夹(会扫描所有文件) + # 并选择指定的label名作为output + # 并去除值为NaN或0的行 + + # Args: + # raw_labels_dir (str): 利用脚本处理后的钢厂给的excel路径 + # labels_name (list): 指定的作为Label的列 + + # Returns: + # pd.DataFrame: 返回所有筛选后的炉次数据 + # """ + + # raw_labels=None + # for name in os.listdir(raw_labels_dir): + # tmp_raw_labels=pd.read_excel(os.path.join(raw_labels_dir,name)) + + # choosed_column=["TSC_start_time","TSC_end_time"] + # choosed_column=choosed_column+labels_name + + + # #只选出我们想要的部分作为标签 + # tmp_raw_labels=tmp_raw_labels.loc[:,choosed_column] + + # # 选出有NULL的行 + # null_rows=tmp_raw_labels.isnull().any(axis=1) + # # # 选出有0的行 + # zeros_rows=(tmp_raw_labels==0).any(axis=1) | (tmp_raw_labels=='0').any(axis=1) + + # # # 每一行但凡有NULL或者0都给删了 + # selected_rows=~(null_rows|zeros_rows) + # tmp_raw_labels=tmp_raw_labels[selected_rows] + + # if raw_labels is None: + # raw_labels=tmp_raw_labels + # else: + # raw_labels=pd.concat([raw_labels,tmp_raw_labels],axis=0) + # logging.debug(f"Reading raw label excel file:{name}, which has {tmp_raw_labels.shape[0]} furnaces") + # logging.debug(f"Readed raw label excel files, which has {raw_labels.shape[0]} furnaces in total") + + # return raw_labels + def _load_raw_spectral_data_cache(self,raw_spectral_data_dir:str)->list: + """生成所有原始光谱数据文件的缓存,包括每个文件记录的开始及结束时间,目的为加快后面读取原始数据的速度 + + Args: + raw_spectral_data_dir (str): 原始光谱所在路径 + + Returns: + list: 缓存,其中有多少个成员,就有多少个原始数据文件,每个成员包括格式为datetime的开始及结束时间,及文件路径 + """ + spectral_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.csv")) + cache_file_paths=glob.glob(os.path.join(raw_spectral_data_dir,"*.pkl")) + + update_flag=False + + + #不存在缓存文件,以及缓存文件中数据文件个数与文件夹中的文件个数不一致,均重新生成 + if len(cache_file_paths)==0: + logging.debug(f"Raw spectral data cache is not existed! Generating") + update_flag=True + elif len(cache_file_paths)==1: + with open(cache_file_paths[0],"rb") as f: + raw_spectral_data_cache=pickle.load(f) + if len(raw_spectral_data_cache) !=len(spectral_file_paths): + logging.debug(f"Raw spectral data cache is out of date! Regenerating, cache file number:{len(raw_spectral_data_cache)}, spectral data file number: {len(spectral_file_paths)}") + update_flag=True + else: + logging.error(f"More the one 'raw_spectral_data_cache.pkl' file is existed in {raw_spectral_data_dir}") + + if update_flag: + spectral_file_paths.sort() + raw_spectral_data_cache=[] + for file in spectral_file_paths: + tmp_info={} + tmp_data=np.loadtxt(file, delimiter=",") + start_t=datetime.datetime.fromtimestamp(tmp_data[0,0]/1000)+datetime.timedelta(microseconds=tmp_data[0,0]%1000) + end_t=datetime.datetime.fromtimestamp(tmp_data[-1,0]/1000)+datetime.timedelta(microseconds=tmp_data[-1,0]%1000) + tmp_info["start_t"]=start_t + tmp_info["end_t"]=end_t + tmp_info["file_path"]=file + raw_spectral_data_cache.append(tmp_info) + + with open(os.path.join(raw_spectral_data_dir,f"raw_spectral_data_cache.pkl"),"wb") as f: + pickle.dump(raw_spectral_data_cache,f) + + return raw_spectral_data_cache + def _get_spectral_spatial_dim(self,raw_spectral_data_cache): + data=np.loadtxt(raw_spectral_data_cache[0]["file_path"], delimiter=",").astype(np.uint64) + if data[0,2]==229376: + spectral_dim =224 + spatial_dim=512 + if data[0,2]==917504: + spectral_dim =448 + spatial_dim=1024 + return spectral_dim, spatial_dim + + def _read_spectral_data(self,start_time:datetime.datetime,end_time:datetime.datetime)->np.ndarray: + """获取从start_time到end_time的光谱数据 + + Args: + start_time (datetime.datetime): 开始时间 + end_time (datetime.datetime): 结束时间 + + Returns: + np.ndarray: 原始光谱数据 + """ + + + def get_spectral_data_per_file(file_path,s_t,e_t): + data=np.loadtxt(file_path, delimiter=",").astype(np.uint64) + + if s_t is not None: + tmp_s=datetime.datetime.timestamp(s_t)*1000 + tmp_s_index=0 + for i in range(data.shape[0]-1): + if data[i,0]<=tmp_s and data[i+1,0]>=tmp_s: + tmp_s_index=i + break + else: + tmp_s_index=0 + if e_t is not None: + tmp_e=datetime.datetime.timestamp(e_t)*1000 + tmp_e_index=data.shape[0] + for i in range(tmp_s_index,data.shape[0]-1): + if data[i,0]<=tmp_e and data[i+1,0]>=tmp_e: + tmp_e_index=i + break + else: + tmp_e_index=data.shape[0] + + with open(file_path[:-3]+"bin", "rb") as f: + f.seek(data[tmp_s_index,1]) + d=f.read(np.uint64((tmp_e_index-tmp_s_index)*data[tmp_s_index,2])) + d=np.frombuffer(d, dtype=np.uint16).reshape(tmp_e_index-tmp_s_index,self.spectral_dim,self.spatial_dim) + return data[tmp_s_index:tmp_e_index,0],d + + timestamps=None + raw_spectral_data=None + for tmp_info in self.raw_spectral_data_cache: + tmp_data=None + if start_timetmp_info["end_t"] and end_timetmp_info["end_t"]: + # 目标时间段完全包含此文件时间段。所以取从文件开始到结束的数据 + tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,None,None) + elif start_time>tmp_info["start_t"] and end_timetmp_info["start_t"] and start_timetmp_info["end_t"]: + # 目标时间段在交于此文件时间段的右侧。所以取从文件start_time到文件结束的数据 + tmp_time_stamp,tmp_data=get_spectral_data_per_file(tmp_info["file_path"] ,start_time,None) + if tmp_data is not None: + if raw_spectral_data is None: + timestamps=tmp_time_stamp + raw_spectral_data=tmp_data + else: + timestamps=np.concatenate((timestamps,tmp_time_stamp),axis=0) + raw_spectral_data=np.concatenate((raw_spectral_data,tmp_data),axis=0) + return timestamps,raw_spectral_data + + def _raw_data_2_dataset(self): + + save_dir=os.path.join(self.dataset_dir) + + #第一步,进行特征时间与空间点选取,并保存 + + pre_dataset_save_dir=os.path.join(save_dir,"raw_dataset") + + if not os.path.exists(pre_dataset_save_dir): + os.makedirs(pre_dataset_save_dir) + + #数据路径不存在就重新生成,如果存在,就跳过这部分。 + # !!!!!!!!!!因此需要额外注意,如果有新数据,需要把dataset下的文件夹都删除,相当于删除缓存,重新生成 + for i in range(self.raw_labels.shape[0]): + + start_time,end_time=self.raw_labels.iloc[i]["TSC_start_time"]+datetime.timedelta(seconds=-10),self.raw_labels.iloc[i]["TSC_end_time"] + timestamps,raw_spectral_data=self._read_spectral_data(start_time,end_time) + + logging.debug(f"{self.raw_labels.iloc[i]["TSC_start_time"]+datetime.timedelta(seconds=-10)},{self.raw_labels.iloc[i]["TSC_end_time"]}") + if raw_spectral_data is not None: + #获取到的数据帧率大于2才开始记录 + if raw_spectral_data.shape[0]>2*(end_time-start_time).total_seconds(): + logging.debug(f"PreProcess Stage 1: [{i+1}/{self.raw_labels.shape[0]}] with {timestamps.shape[0]} frames") + + # raw_spectral_data=self.choose_frame_spatial.run(timestamps,raw_spectral_data) + np.savez(os.path.join(pre_dataset_save_dir,f"{self.raw_labels.iloc[i]["Furnace_Number"]}.npz",),furnaceNumber=self.raw_labels.iloc[i]["Furnace_Number"],measureStartDatetime=self.raw_labels.iloc[i]["TSC_start_time"].strftime('%Y-%m-%d %H:%M:%S'),measureEndDatetime=self.raw_labels.iloc[i]["TSC_end_time"].strftime('%Y-%m-%d %H:%M:%S'),timestamps=timestamps,rawSpectralData=raw_spectral_data,rawLabels=self.raw_labels.iloc[i][self.labels_name].to_numpy(),labelNames=["Temperature","C","P","S","Mn","Ni","Mo","Cr"]) + # else: + # logging.info(f"Pre Dataset is existed in {pre_dataset_save_dir}") + + +if __name__=="__main__": + + logging.basicConfig(level = logging.DEBUG) + + + raw_data_dir="/data/SEMS-model-training/old_rawdata" + labels_path="/data/SEMS-model-training/labels/NanEr/2024-05-15_08-03_2335_.xlsx" + dataset_dir="/data/SEMS-model-training/dataset" + labels_name=["TSC_T","TSC_C","TSC_P","TSC_S","TSC_Mn","TSC_Ni","TSC_Mo","TSC_Cr"] + + + + + # from choose_frame_spatial.mean import ChooseFrameSpatial + # from features_scaling.max_min import FeatureScaling + # from labels_scaling.max_min import LabelScaling + # choose_frame_spatial=ChooseFrameSpatial(interval=[-30,30]) + # features_scaling=FeatureScaling() + # labels_scaling=LabelScaling() + + + + data_pre_process=DataPreProcess(raw_data_dir,labels_path,labels_name,dataset_dir) \ No newline at end of file