From 78be44d9112c84eac8e20fbd65ca680faf323d2b Mon Sep 17 00:00:00 2001 From: Shivam Khandelwal <shivam@aicrowd.com> Date: Fri, 28 May 2021 03:39:11 +0000 Subject: [PATCH] Missed siammot/data/ due to gitignore --- .../build_augmentation.cpython-37.pyc | Bin 0 -> 2400 bytes .../image_augmentation.cpython-37.pyc | Bin 0 -> 6391 bytes .../video_augmentation.cpython-37.pyc | Bin 0 -> 6125 bytes .../augmentation/build_augmentation.py | 85 +++++++ .../augmentation/image_augmentation.py | 187 ++++++++++++++ .../augmentation/video_augmentation.py | 187 ++++++++++++++ .../data/adapters/handler/data_filtering.py | 140 +++++++++++ .../siammot/data/adapters/utils/data_utils.py | 62 +++++ .../data/adapters/utils/dataset_info.py | 49 ++++ .../data/build_inference_data_loader.py | 56 +++++ .../siammot/data/build_train_data_loader.py | 77 ++++++ siam-mot/siammot/data/image_dataset.py | 232 ++++++++++++++++++ siam-mot/siammot/data/ingestion/ingest_mot.py | 197 +++++++++++++++ .../siammot/data/ingestion/ingest_prim_air.py | 127 ++++++++++ siam-mot/siammot/data/video_dataset.py | 195 +++++++++++++++ 15 files changed, 1594 insertions(+) create mode 100644 siam-mot/siammot/data/adapters/augmentation/__pycache__/build_augmentation.cpython-37.pyc create mode 100644 siam-mot/siammot/data/adapters/augmentation/__pycache__/image_augmentation.cpython-37.pyc create mode 100644 siam-mot/siammot/data/adapters/augmentation/__pycache__/video_augmentation.cpython-37.pyc create mode 100644 siam-mot/siammot/data/adapters/augmentation/build_augmentation.py create mode 100644 siam-mot/siammot/data/adapters/augmentation/image_augmentation.py create mode 100644 siam-mot/siammot/data/adapters/augmentation/video_augmentation.py create mode 100644 siam-mot/siammot/data/adapters/handler/data_filtering.py create mode 100644 siam-mot/siammot/data/adapters/utils/data_utils.py create mode 100644 siam-mot/siammot/data/adapters/utils/dataset_info.py create mode 100644 siam-mot/siammot/data/build_inference_data_loader.py create mode 100644 siam-mot/siammot/data/build_train_data_loader.py create mode 100644 siam-mot/siammot/data/image_dataset.py create mode 100644 siam-mot/siammot/data/ingestion/ingest_mot.py create mode 100644 siam-mot/siammot/data/ingestion/ingest_prim_air.py create mode 100644 siam-mot/siammot/data/video_dataset.py diff --git a/siam-mot/siammot/data/adapters/augmentation/__pycache__/build_augmentation.cpython-37.pyc b/siam-mot/siammot/data/adapters/augmentation/__pycache__/build_augmentation.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb1cf3b9ca47c6a77d5d63802e13b9cbd6604614 GIT binary patch literal 2400 zcmZt{%Wm67kV{e$MN76MyGfe|P!vH?6|kMA4bnrJM1I6kU`vKXrzz25OKEADHXqKC z3KEw*DW?MYhxXW8zh`?1&{KXPr_L-TTQ0K1%+Act?#|AO8`Y|*!1(9!-@m>C>R)7L zLkHp!K+FI!g%Mu~h)n|3Rs+q}0^Qbwf?WuTb}=Z~C4xGtZv<t#9GJEVyvB5YA*iSd zQG%+y2$BU>V#XU{FYPF-%*;0mGu_%mv+GRnE6vJ#0#`LlVyW*r!84D!QO}LNGgqvf z3pOJ^;!nLKak;oYCk;-y8^>N0w#K8t4HI$Yd~q*I=H%D@C*SbLe75K2`C88jSrj~p zxOWzXiQ_-^y;D(>CRyAG<HLvtE^n&Bup-M1<A@`*vD3SA_wIXmgwnA@;U&Ics38ku z834V#2@qG{rHo`6fDTYd2|rAgW967+MLDZkiK&?(X)P<uS<g&47qSI87qd#LGVMg= zze~Q9X>v9)UC!mK5Z;oMIX!g&P9>QEPzEpo762+zr;=4+t{yLDi;`c;mXcapPR(>7 ztuXzqp4F22aYL>xOT3aUvBFy|t0$kNwd6{wPe^h#Edc&B)za!jO+HJDX(?TtXvwwI zV8yp2xt`pZD4=mYtxw3vn2@ZIHX!z;&ItS>$CP;YP!znt8M#eem;>&Oj*<{UEQ}#U zggbE}%-BiB+`;Gy?P%;aHK7Nt15BJSQBEQ{7;y+llL-BkM}y`~QD}Ge4=k~`*Y41M z+uo*Duhs4V-+DgJn;X6M&J(M%-R}!yv)ci!z9r0l%R1<_tai5}v?mAKqO#XThVHia z+LowIxsBa}9^LPCH$;82ySLwiLoSr67dLOik=2WfkuA3Nx?8PXVQ#go)^4}8wcQhS zY3Ek^S-aofXz#YIpG4W}(mdBVW|ttAD0d(u9UqIu+;2bM-lcont&S+?bl=(%rOhZf zjbfJ<VGGCV<^@D5q7otlQ5jHkS!lz<kubcNf~&pIHiC%3>B(!m63B9*z883jU7K<P zs3M9!w(CQ<W?GB6k}v`<q_U2Uz<DJpd*u)+iXNR8nB2@4(SjvJHAHnlcF~b{^IIrc zK}kav79C<MpvSTo*vsMU{b+GsppXJH@-KzohOHl7t{+8#yFMO_!(_Z3Lz`KJ7pyxT zA4EKKSDBl*LrkdEIB_^Mnbi|7xzNK6sL!0lS$CLo3L5dcleZk1i0gx~=QBDlT04C$ zuI6Ige`yW99b14DRVVpp5CbU0@TvK1YB<j{^jxZ{B>xOeCo7r?d=Wl_n7B`?X#5VG zJH*3h1GD83K>Q^jSFs6GY?TqFzENzA8BAvd=*~JTvJyxNOoPfS^RGd9h-rcYX9U^C zu%D$QRVXxnP$eBGHS^r*G4w}0cKt*CE$GPr<mek<g+ZwodI_aFz{T=-a#our>Dp|u zbKXBOV-e)VX#cAl*oC%~UmDQ&G%!P*s6VS2$<!Cdk7$;9s7{TN1~hg49c<%OK%xlY z9l41h$w%xOI=)ZoQ_zdCSQJtrjZ3!W3Pb!|;Xt=yElJZJ03q8WVdSckkyqG)RWS2f zrl#QPdmw{Z`$J3BKM8U#R?u9g=PO`xq4R-!C5U-e`W(h@x3e?VY(g6>Q#$mWIHnZ6 z7dnAUseGC!;t$D5nwor~sOBt<6Yhm0eh*Cg$TPWn$|>!_Vq5`)&#XZDGyvuUk*i^F zi~K&Q36oMN$*~W&iwdPL#*RO=<t{Aq?-4yjge5nJFzhm!9s?mg3t6q11u1K)7Dp?Z zjahxq*HGT2(9d(73uAN9x$K7gTU;tH+08g+sr^OZ#3y_hhIHVD!=u3ACu>;7YZvJ% sbQI*tiU}s`M--#MG4up!<lJNA+ww%dpL^p0rbCQP^p;jr8=CpUe^m`{q5uE@ literal 0 HcmV?d00001 diff --git a/siam-mot/siammot/data/adapters/augmentation/__pycache__/image_augmentation.cpython-37.pyc b/siam-mot/siammot/data/adapters/augmentation/__pycache__/image_augmentation.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c18d2abe5bea310ca77bdeb1bc67d7fc0d3a1a7 GIT binary patch literal 6391 zcmbtY&5s;M74NG4oX?$Ij~(0Lqj8+z39*fhf&zr#*u++%utbg#QY)y_JKeL}yEEOB z>Kc1?)pNn-z`+t1Bn}AeB5^=MfB*^p1}-3u%?(cbFK`LJS3T1^yWSwAbgQcCRlln0 zdLO^{s&BVibq&`)@BaS7zn#~#e-p8|3^Z<`WWPZbY9SkHUioAmQ#AK<Jo(TVnV!k$ zoj$Ziw&$oe8`eg3uc6w;usLdZEvDVmLNm0UXrUFgAM4%<XghR3JBsdru7!2bbw#g& zZiG$HO+~MPZiQ{oZAG5}y%KgncNE<X+51{=^$=gIt@l`AzC8*yBQ)xRNz#{bngl~o z_U&}~Zk)+J@hvVLjaw+$OQ<4^_UQ4Dh5U(zed?isrx7}#71~djXND$Lsuc~j!u=?V z_o6<Dj4paoxr36uj7n-blT7jff6TLP&IWA8a+Yhpo^#YjuA??X4O6o9o>kQ2EKV{R zB>ku`hWOILl#}r=Dx6WA__P*U!L)3x677faLk!x8hq2u48HJHW!-1$_Z9ys)4OG9@ zL<^7qJiNY@j-u<6jY%RW*RwbnT^*(JdJv0^R3y>WFp`lH)>V9hkde5$6U!q$8bJJE zAcN~c7>vP@T@NOkqbQL93Hf?VV)vK3u8nsK$M@qTmcCD4ktxe#ZDy%^ue&73YYWCc z2Pa*`TuJmFAaeErpYfS4wSCBOn;)>5fu}w*bI5V0CI-@wCZwxF=Ik+>S)i>Y+Rp9V zdS1(ou9llS2Ka61<YsPxwx8G37=!qCH1Q-iW{$LT=m~E*y`9yPE_!RKm!nr#y$-&i z7BbBDSG=haMg8>bYKmKNEcH3yat0il=j(!DXT5r1i69BnQDODB(zqWTsj)Bz!!(eE z4l_U%Obb0uU>qWyB%#uAuP&OH1Qw)JpiG55D%ch*0%oF&!CNION=3XG!yl9pp$q18 ztgGPZBa|&=GiBALnVro@sxfClWtUKC%wbJv(P15Rce%q1w#E(C;dRz!U4C4<iwV~I z<a2Z#bC}oBk!$-<@iv26b;<X2IE#M3;WQk|b;K3WaHjX640@;d_@a+o$Pk~!EaD=n zg58Bcm4l*N3CC2zDJ)nwzD038vrFIa2g9N7zk(?;8l_>6P3)2OdVD_%*3x%j<w-D# ze7~ss{wNJ6LuxmD|D#DTEZ>L~Ojmv($!}?G@(e*@7A7iMHnUJ+RmXWw^_`AcZ-D6t zvw&Bv&<IW7lo0`Mf6f%j8lE+$2xAS;F0s{fLNdR0v7%h?j!4HPp6$I=cX3gN8@S^@ z_P5-DNJp;Rid-c^7k28FvUO$Z#xipkHsq4IsqyP~D?O9G$))e0WF1rr1ffO1AMmz> zKVH=PSkfQHqgWPJ02u<cb)`D-MGO*Gs8XUjk{5~eYbY7nk_KzsJAYz36{;Wt=o{&@ z0v87ZDscG&2v`(g<v^Qp*c%Jkk9Y`cy8xI8VP)HTh<2!-*FxhdAgJO2)GYDnOFRIQ z`8?(^p5=J}R&yTf7|-%N763}$F>)(6c8puv%!d5z3nVvtE^JcR{b-avj9hFn%Al)2 zx`Bw?B$ciT5yQy6l1`+X4(3YSAoK{tZqE${z;YMh4iI;j;ae$^<}NYnPyrPeXCN%m zMRiD8JtXx~?AgH5h1nmbNkoreG*&*=b4nfdXw+3yUTsQ9)E~xUueN*CoDvu<dgsl; z*omU?39T1hq4tWnGT)kyrN4=(Gjbx$sI$xSyTkV`pEQWm2KUL|=%Q_r6DnK%3mSlb zejdRhhvvC9vq<{;+JW`~g2SvftIry_LmaavARRojR$f<hJ8vj@MY3(4H}h8B&Q}Ho z<rc7?j@0uGG-?kxWf@QP+3NHXtjyRq543M<Kj<RRt<BEltNT{ImY+G`O#A5J_kobx zw%7+wI{7M4ay4n@s|Y>t_NSo0UB?`a1ImZ&y-B4%_swv7l1b&l{WKLJLV%3O7RafU zCB(@l*^c`tmRoUhG_v$~a!ofINBwvZN1?m1>t31Oc*WgCncl?nX0IA?_%SYWwlE$Q z#&mbO3&MaqM-R|w&ZQrX#>iNjMaR#^NM4{ynfk<xoF^6itwQeyQjmWZ&1fn`;A7!o zm=$I$qfu7Wf-o$*N?f=>oGr2*9}}sMF6@4qJdA`Cui|B|dt`<MpWZ0=F3R*Ko;L-u zU`<@0DGajB!XUGxHexN>sGB5#aTNOMeMKq0ic<U)MdQ4RQr^*6%tEfhb_(|Jq-i(t zhEhMDIN!k+WuHT(S!;-~6ou%fD7%IT+u;^(>WE99Iceg{lT<Lgig^h1lxhDAzD$4z zGt*~=M4&~$<p*S7$acDv>zL>@p6RuS_aygu;<t&PG9u(y=6TGKThCFyBjG}i0jN91 zPnfur^KElh%WENy4hx8_C`KR%C#qLH3SKM9n7?npx$4NRn$p^~^Cq0igj2Qf1@*iI z2u41KmD`{jOZ1A$_1cd)_U6F(R?xC!4d1KhZR~+#56H`~ixovtW@ztd`0B5M+o4wW zby@!*la2i*K$w6T^8g;yjt@+@bz{E;$A%mR9H~p^*xsMOaoY%c$Tu;<0Y5?w_@M_l zGsifU-t_DeZ^f6OEkUV>co|iXiLc<XiGu6z-|DTDISgfFg2Ji_lZ8Hv5{0wkHS{5p zicJ{e5oj0fH?s_00kD5dh*StNu)-jF6Q8G9)=~9VOK)+h;|o+(F5($-=ucI>>lqt@ z+NgI00la6z^!!KVW2<n8LKq6XCcZ(n{5YI?Yeba+rwsTQ@@(IhxKAse2>FydMJU;q zQE9Bs&#`mtB0taRP~qZsaJ)cDiYQP=tBp4oLATi&Yx2|5gEHeoB7V156E9#+aha+Y zQ5ClDhiTvU%iN9NND$H}8HrEK;$8v^VU9ZOIJ&{=M!VI1UQmA@rMf77-a^S-RL3$o zL<M#7q<jui4J|_Sd9L;y6{&9zCt~l0g5CEbF-XPev=B}ST9A|>U-9-Sh|{H`(1sKo zkdaAk8%`Qx<xQ?~9EKh~k1Z<C-lU98R~ehVen_XY_+J3~L$aVA7w_Uxm_f2RjCy=- zrj@;Fw}Ad;7Q+~kEa-6%c=e-=6wQ0VB*Pgqp=}lI4b(oJ-u-c!F491+F*g}{TeLRk zBYYxHS_vhFPf;>LJWc7ozR>>BvQ=_eWra4lsVJ$?hUlm%xzs}jSJt*o91#I#%3VR_ zfGeC;tbJq!=!4rMG8<@_vbJAGe<L>!=#7<=1{a6DX6dhl$bweGSq@^0#CNIV1M1+( zxUhpP5F*(1TJxl=jBA3drl?U)u3i`B1kv-go_*x9o~;BJhhD9cWZ@*^asscVB!*yC z#>v_^9?qve#n*m{)#gW`i|{DAJBu{?m?TR-!jFs-P9z@CuGHb@6Fug8VEHjEM0pr_ zjN=IE`&8c2DZ^rlL^OhJrODnmj=9S_@4a=;?H|29mis>II3#wu=t)JH2suG{oLL-X zSg}IrwlptXqX{-Kpz4H#Ni4ra$<*v@@50G%KHWsC6m?;u<hY18T*7^kTEbDc@sERy z73OIoiB#q(?QLhjM$rkprmXKb@UD5xM2&gup@?%BqUyDju@?3ui3w%ONOBf$$_@F{ z`j9FQl~)f0GRRRR<G$Bg2*KYTN1MuSML?VzsFv-O>>mNCXe<UmS%P%$brwAfYZdfO zC#84^eZNAfj_h59{jk&1<oRg~l_JkCXr;<fbwPc{3{^+REn+%{s)BWcpcRL6KnbA0 zESv}FgDA;Tar8%o22#00gXt`zesn=~^TYuEICvSaYshk?8of9pyI?Ov^$8Or;rtF| zVPdO`bzxfb*r)!Tyg(CNrs_qiXxNgR)uiel#SUKl7DdhHFhO)5rMhSbw@{8pHM`or zqy7ftFGXlT5H%9S?R)oczWVBe3Whm`Q&ECA&Bti}8=C@y7T%SBed&AEr;>O531w;P z<w})T9hZF-4_yp`3nA^v7cr4e<ZvnV3%5+u%=I!uBUe#B@eV&m3uzjC%KxR7KZ8+O z68*YZV|!3d^?eJ~;MBC<Gf!gb|Ax~M=KUkhO&fApT^H2TJF8N{(w)s@G8*p+^4>y! z@9lTXsB%snzUWk)Cf9K4N-}c!Opr>I;d!g^2!VI=+B_uTCoo*T{0oe>!hquCQIPG3 zev<ebQPST+nA*9PNipfm36S{O;=D<x_VS2lr5oG$`K*3*sVqr>zj7>PUh4Cglv^2N h;k;h{Y<q*!nv9av@{EMPb{x0f>2)4-&UQK-^S{D?%4Glm literal 0 HcmV?d00001 diff --git a/siam-mot/siammot/data/adapters/augmentation/__pycache__/video_augmentation.cpython-37.pyc b/siam-mot/siammot/data/adapters/augmentation/__pycache__/video_augmentation.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35bc4d792ce302c33d116e8363ab049962ede5b3 GIT binary patch literal 6125 zcmbtYO>f)C8Rqay6fN7b<9hS8T>%@U5xTJ#+pjc8H;$b;>9!S$ChY>k7F=4SY&sID z3@NW;$UspC-Amg`(E{yBzO?9}Ko7n2udtT_Jrw&Fdg}AOq$pXI6CkB9qnY7wIP=c) zJnuW)oS(N9T>squ+ozAND9XR7GP-mWHjv`KBl8qbbd+ZHFPZ{%wWD^mrY@*WL)mDW zDC?flDRr%;)h##6UAt*_=bCdud8BxzS9+;<CBJf_HRn;cyfW%#-$H!>b=#XmeU9%h zqF(XlQJ?os+^?d(;4PxQ$oH2}uX;<UFL8Yt^=0o8>X*2F$rDeM`pOrWxzebM)Nc2~ zRuV*^+d*w^E9yk@qaaCq`Gr82x}MH`uj}smKlbC`$WIr!vL&P5nT8)mG{k17FVkw) zBkJ~~AIE5I(W^$6hQbC?Ttnt7WUi(P1E`+%QZzNML}o%s=V@AL^{0X7M^B|2#_dRU zec5QKc#<yqr?iC>Uqv>+V9zXBO|T;;;um5d28x3=tXFLu-7E&Fjh0-C_o9BstA$Zg z+wp6SDD>CrYHEGofAL)@Bbio`(TkmSh&y`hciM6e@A``(=kfR7gY~_r>#z5B`eD*v zj{~=RyBj6zZXkCe8Tz+9Kk?bTx8uZ>u<Gq+K{D<~4^V&4P26?Yb9?9!ue<%-t{)~Y z*>wGY4B$*Qt@U1{mg59rkT?!aCN_~N!ou(9%Cy1OMm<|(d%8+U1`->tjY0y`B_dJV z>WLa(9VkN$rc+PVC(5pJtRd-#cN3l3PSl6W&))k)JvNR_?q>|lQ#G$^Lt|i^3ZV>i zluRl;U;9`YXal{CvFhUJcXlnm9@J>bwZgu<*4k&%?e%<F!&cXL-9`VqJy*KjtnqHm zt#yJpsYUHtK-i3@yTR1Eelpo_GdlbMJ#veN=ZEUWLv=H?gE$DWLt)ELbs9T0l76q_ z^IoUgQP4|Gnp)`9OR@r!r3TM0HM3Wx<<S4k$(n2`=FJAbDy{He-pIhovIQCmTiJ3u z9mlDn7B3@H%mq<Vb)kzDQ5CjWP-kvy_EcSf#*Ml`<AiS4cbwFAoNnaxJ5;VX&hx(8 z$?nKS3@)!xM(CZ$XeP3Zj8rcsfwP2da7zo7;-Wir#a~A@;_v55TX@1#Us_<W#WiAa z87wZdxQV9mxuPpg6DY9L#V0^L!!*;7)@V@rCtwm#<IBhrB>{&|)S*h?8)&B*X(~af zQP)x<?qfIQJLt#@tLxd;Slu9WSgowCd&s5c64f0qIKW2l1f3vx0ktc`@*Ovh9cR29 zJkkxC0PBo6x;!=8f=`WtP!_rq1$o}TLE#LcYKaE&&?<}(trBwph4RERBYr<AL8+9X z1@%&|DEsA8kx>q`GY;=1c0s!}(9Aq8q3_(l1gT1=THc}$D;eDe21+F+-ct}y9~ewL zts%*@k_CNU63*ljWdt@^ql}G6z(}=@AEpZr;@Fp@KP-ez3v|j%s*@q*CF)snpzu^% zL9K4F@#U9jv{kxo5fAwOThvDTDd9;KNm^YO@?9!33>kd%@~<Gp^su5Xh-I}ZsyPdm zGZxI)i;M(kAVh23qLFII5@kq^0BUwD1{#(3RWdQ!v`m!<N@6=T^Tpi6LphSN=~4r) zXqqlG+dD|P{8mIo9KCf8E!nU+OH1-PjrKNWU#4tKNE$7j&k@=3Nf5>_9GQzPMSDfY z<M&#kqAe`({<Y9nTpCInNM{{wsc^KGXLu$%S{-~Ur*DrZ?uK5}eH_W)D1sB}Y<Gg* zf3k7^b$_<^CDDLE6{ldQNcEnKcIs+|iZyf?uSW7RXzBNnMnt>$${Ea{T?r2p?Rc*b zP&h-oRoZph;gJtZw4rvY43vHSnIhYV8wt~qJ=(3pgPBPQRD>7H{g^p0hh+~nCe@vf zl@LS)demdR@M~%#W0pO|EI^7)DS7oo8D@&Cqm&wZZP+y}vj^-&v7bAZF@fX^+DzM! zNVADlkj5gyfIQ?lR+&6UueIyQkI35?iG9h%D4pdt-os-b(j<s@a2uMWmNRIiJ9Ndj zkd54iXR@oH(62lh`mw7pfJ!S}DKIe+m^NDEB<Q05LU>75rX9vWKjknycNv-1eSnUc z!!R<3VeiObcP|MMuclTDpqDNp06TV*zU06p)%N=Sh=XIvA_V^gDMPSvH0IF6YzyEP z?t<7-;PzLj%*HM7iYS=1N`dQ8_7QRIL;m+Ir1GhLtdomCG7mQsJDDTApQtAS5!(B} za|y{hE)Plr?U^b&a0b?iI0m-iA}mI?i4xomqO>2uSs-F#JyaVYEqMy|8TE7SVjZ_q zv$Yomh?Q*e3mlro@@w=Ag$42(lzo%>mHn`fpxs4STj0NLPkRb2@D6h(P>??ceky{o z%b11zPw*0s{mEfis;!a;Q{Z%+qhSu6F4~nG_Id*sq29b0^<Sk070@36fmMva^ps{n zyK1S5px=Tfsr?KEE)rzT)A{5x7Gyf0mDAM%`q|;?L3Su;Y!^795h-mU#Y8ze_#m)Q z>DZGWhOT;COOWp~K5$Rt5K3$8{_ziV{o{TnUwWz`a7rsUkHGIcouC^el5m<X<W2b@ zq^@NfNaBsK7@?-y8Cz#8?$2pn6dNhn7E74o=$+YXI!_UbM2jQJFHxDKh`fx&Xo#Lv zSvx24IC^;G;EltN5{UT(Deiy}CJ_T9Fb4*nZgbpK0v+fGL+#oSgvq79oYCa@=EM;O zkrW3}lMI=beMxqaL=*Wf%Dzn*qkn@+94wWmC@_Y+yi1+#QC0*?)5ay$KSat@U%L$C z1M`~AsLu}!7n?fAz1rA*1FioKUfZKr=UQKq)Y3vKE`sC+QcMBPSnG?(gN_DDe!wrK zSEnFB-w;MnpB-$}Ff6te!A6ep6`&rqM4Z68J8FL5-$ggR5Z+aD0PkRMhvE`RvBTMB z`Y138SBO#5MklfVHByFXpqUZ<?#0tTk7|;L0@c4q<qc5XoI>^EbErN@^r7WJRuJ-* z2W8?40-gh4ow1D*gf_Gbl-n~<ZZyWmn?U-6SD2d$btGGC2IkXJKMYvZA7Z^3_^0*% z)!=W=Wl>bNZ-oeFGDA%GEmH0Sawg!gK>A<7KmBlge)Tqu0wA2CFZKcBH*<-_pF%Hk z;3GDel3M-`He4&<v^D;K_G&uZz#C`N0Xwgp5H)5|Vv&x<t?^4F33XbXXmKVP@d$Tk zG2+i?Kg)<4uUNu)N|4Z#ln{R6pCfx6B>?VB;Q6~0ACezVQR0I$l%VM6(@f~$7E1Uw zHIjG(C60tx;>U;Onczp%GIefdlgxm;e2isWAoDEl<GB9yGXGbyOLihAaN9+2lLPxT z)_!ATZ{xjxqIc&yZ%S&Zui(2_BwKsz-kPgircWwSxK@11i=~RLe9Dm;O3l$1;o{43 zx}1Fv%i{?Su_Xy3ha0TT863@MXCI$W`7>1ZiGso$j;+}5Fx_}&6WA>1nOgU<&tM;t a&t@yqCsKrn>SeuprFyNpTD^|n)&Bvs!V(Gq literal 0 HcmV?d00001 diff --git a/siam-mot/siammot/data/adapters/augmentation/build_augmentation.py b/siam-mot/siammot/data/adapters/augmentation/build_augmentation.py new file mode 100644 index 0000000..147c04c --- /dev/null +++ b/siam-mot/siammot/data/adapters/augmentation/build_augmentation.py @@ -0,0 +1,85 @@ +from .video_augmentation import SiamVideoResize, \ + SiamVideoColorJitter, SiamVideoCompressionAugment, SiamVideoMotionAugment, \ + SiamVideoMotionBlurAugment, SiamVideoRandomHorizontalFlip, VideoTransformer +from .image_augmentation import ToTensor, ToBGR255 + +import maskrcnn_benchmark.data.transforms as T + + +def build_siam_augmentation(cfg, is_train=True, modality='video'): + + motion_limit = 0.0 + motion_blur_prob = 0.0 + compression_limit = 0.0 + if is_train: + min_size = cfg.INPUT.MIN_SIZE_TRAIN + max_size = cfg.INPUT.MAX_SIZE_TRAIN + flip_horizontal_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN + brightness = cfg.INPUT.BRIGHTNESS + contrast = cfg.INPUT.CONTRAST + saturation = cfg.INPUT.SATURATION + hue = cfg.INPUT.HUE + + if modality == 'image': + motion_limit = cfg.INPUT.MOTION_LIMIT + motion_blur_prob = cfg.INPUT.MOTION_BLUR_PROB + compression_limit = cfg.INPUT.COMPRESSION_LIMIT + + else: + min_size = cfg.INPUT.MIN_SIZE_TEST + max_size = cfg.INPUT.MAX_SIZE_TEST + flip_horizontal_prob = 0.0 + brightness = 0.0 + contrast = 0.0 + saturation = 0.0 + hue = 0.0 + + amodal = cfg.INPUT.AMODAL + SIZE_DIVISIBILITY = cfg.DATALOADER.SIZE_DIVISIBILITY + to_bgr255 = cfg.INPUT.TO_BGR255 + + video_color_jitter = SiamVideoColorJitter( + brightness=brightness, + contrast=contrast, + saturation=saturation, + hue=hue, + ) + + normalize_transform = T.Normalize( + mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 + ) + + transform = Compose( + [ + video_color_jitter, + SiamVideoMotionBlurAugment(motion_blur_prob), + SiamVideoCompressionAugment(compression_limit), + SiamVideoMotionAugment(motion_limit, amodal), + SiamVideoResize(min_size, max_size, SIZE_DIVISIBILITY), + SiamVideoRandomHorizontalFlip(prob=flip_horizontal_prob), + # PIL image + VideoTransformer(ToTensor()), + # Torch tensor, CHW (RGB format), and range from [0, 1] + # VideoTransformer(ToBGR255(to_bgr255=to_bgr255)) + VideoTransformer(normalize_transform), + ] + ) + return transform + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target=None): + for t in self.transforms: + image, target = t(image, target) + return image, target + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += " {0}".format(t) + format_string += "\n)" + return format_string \ No newline at end of file diff --git a/siam-mot/siammot/data/adapters/augmentation/image_augmentation.py b/siam-mot/siammot/data/adapters/augmentation/image_augmentation.py new file mode 100644 index 0000000..adbc582 --- /dev/null +++ b/siam-mot/siammot/data/adapters/augmentation/image_augmentation.py @@ -0,0 +1,187 @@ +import torch +import random +import numpy as np +from PIL import Image +from torchvision.transforms import functional as F + +import imgaug.augmenters as iaa + +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +class ImageResize(object): + def __init__(self, min_size, max_size, size_divisibility): + if not isinstance(min_size, (list, tuple)): + min_size = (min_size,) + self.min_size = min_size + self.max_size = max_size + self.size_divisibility = size_divisibility + + # modified from torchvision to add support for max size + def get_size(self, image_size): + w, h = image_size + size = random.choice(self.min_size) + max_size = self.max_size + if max_size is not None: + min_original_size = float(min((w, h))) + max_original_size = float(max((w, h))) + if max_original_size / min_original_size * size > max_size: + size = int(round(max_size * min_original_size / max_original_size)) + + if w < h: + ow = size + oh = int(size * h / w) + else: + oh = size + ow = int(size * w / h) + + if self.size_divisibility > 0: + oh = (int(oh / self.size_divisibility) * self.size_divisibility) + ow = (int(ow / self.size_divisibility) * self.size_divisibility) + + return (oh, ow) + + def __call__(self, image, target=None): + size = self.get_size(image.size) + image = F.resize(image, size) + if target is None: + return image, target + target = target.resize(image.size) + return image, target + + +class ImageCropResize(object): + """ + Crop a patch from the image and resize to its original size + """ + def __init__(self, crop_limit=None, amodal=False): + self.crop_limit = crop_limit + self.amodal = amodal + + def remove_invisible_box(self, box: BoxList): + """ + Remove boxes that are not visible (out of image boundary) after motion augmentation + """ + bbox = box.bbox.clone() + xmin_clip = bbox[:, 0].clamp(min=0, max=box.size[0] - 1) + ymin_clip = bbox[:, 1].clamp(min=0, max=box.size[1] - 1) + xmax_clip = bbox[:, 2].clamp(min=0, max=box.size[0] - 1) + ymax_clip = bbox[:, 3].clamp(min=0, max=box.size[1] - 1) + keep = (xmax_clip > xmin_clip) & (ymax_clip > ymin_clip) + + return box[keep] + + def boxlist_crop(self, box: BoxList, x1, y1, x2, y2): + """ + Adjust the coordinate of the bounding box within + image crop specified by (x1, y1, x2, y2) + """ + + w, h = (x2 - x1), (y2 - y1) + xmin, ymin, xmax, ymax = box._split_into_xyxy() + cropped_xmin = (xmin - x1) + cropped_ymin = (ymin - y1) + cropped_xmax = (xmax - x1) + cropped_ymax = (ymax - y1) + cropped_bbox = torch.cat( + (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1 + ) + cropped_box = BoxList(cropped_bbox, (w, h), mode="xyxy") + for k, v in box.extra_fields.items(): + cropped_box.add_field(k, v) + + if self.amodal: + # amodal allows the corners of bbox go beyond image boundary + cropped_box = self.remove_invisible_box(cropped_box) + else: + # the corners of bbox need to be within image boundary for non-amodal training + cropped_box = cropped_box.clip_to_image(remove_empty=True) + return cropped_box.convert(box.mode) + + def __call__(self, image, target): + w, h = image.size + + tl_x = int(w * (random.random() * self.crop_limit)) + tl_y = int(h * (random.random() * self.crop_limit)) + br_x = int(w - w * (random.random() * self.crop_limit)) + # keep aspect ratio + br_y = int((h / w) * (br_x - tl_x) + tl_y) + + if len(target) > 0: + box = target.bbox + box_w = box[:, 2] - box[:, 0] + box_h = box[:, 3] - box[:, 1] + box_area = box_h * box_w + max_area_idx = torch.argmax(box_area, dim=0) + max_motion_limit_w = int(box_w[max_area_idx] * 0.25) + max_motion_limit_h = int(box_h[max_area_idx] * 0.25) + + # make sure at least one bounding box is preserved + # after motion augmentation + tl_x = min(tl_x, max_motion_limit_w) + tl_y = min(tl_y, max_motion_limit_h) + br_x = max(br_x, w-max_motion_limit_w) + br_y = max(br_y, h-max_motion_limit_h) + + assert (tl_x < br_x) and (tl_y < br_y) + + crop = F.crop(image, tl_y, tl_x, (br_y-tl_y), (br_x-tl_x)) + crop = F.resize(crop, (h, w)) + if len(target) > 0: + target = self.boxlist_crop(target, tl_x, tl_y, br_x, br_y) + target = target.resize(image.size) + + return crop, target + + +class ImageMotionBlur(object): + """ + Perform motion augmentation to an image + """ + def __init__(self): + motion_blur = iaa.MotionBlur(k=10, angle=[-30, 30]) + gaussian_blur = iaa.GaussianBlur(sigma=(0.0, 2.0)) + + self.blur_func_pool = [motion_blur, gaussian_blur] + + pass + + def __call__(self, image): + blur_id = random.choice(list(range(0, len(self.blur_func_pool)))) + blur_func = self.blur_func_pool[blur_id] + np_image = np.asarray(image) + blurred_image = blur_func.augment_image(np_image) + pil_image = Image.fromarray(np.uint8(blurred_image)) + return pil_image + + +class ImageCompression(object): + """ + Perform JPEG compression augmentation to an image + """ + def __init__(self, max_compression): + self.max_compression = max_compression + + def __call__(self, image): + ratio = random.uniform(0, 1) + compression = min(100, int(ratio * self.max_compression)) + np_image = np.asarray(image) + compressed_image = iaa.arithmetic.compress_jpeg(np_image, compression) + pil_image = Image.fromarray(np.uint8(compressed_image)) + return pil_image + + +class ToTensor(object): + def __call__(self, image, target=None): + return F.to_tensor(image), target + + +class ToBGR255(object): + def __init__(self, to_bgr255=True): + self.to_bgr255 = to_bgr255 + + def __call__(self, image, target=None): + if self.to_bgr255: + image = image[[2, 1, 0]] * 255 + return image, target + diff --git a/siam-mot/siammot/data/adapters/augmentation/video_augmentation.py b/siam-mot/siammot/data/adapters/augmentation/video_augmentation.py new file mode 100644 index 0000000..0f267bf --- /dev/null +++ b/siam-mot/siammot/data/adapters/augmentation/video_augmentation.py @@ -0,0 +1,187 @@ +import torch +import random +from torchvision.transforms import functional as F +from torchvision.transforms import ColorJitter as ImageColorJitter + +from .image_augmentation import ImageResize, ImageCropResize, \ + ImageMotionBlur, ImageCompression + + +class VideoTransformer(object): + def __init__(self, transform_fn=None): + if transform_fn is None: + raise KeyError('Transform function should not be None.') + self.transform_fn = transform_fn + + def __call__(self, video, target=None): + """ + A data transformation wrapper for video + :param video: a list of images + :param target: a list of BoxList (per image) + """ + if not isinstance(video, (list, tuple)): + return self.transform_fn(video, target) + + new_video = [] + new_target = [] + for (image, image_target) in zip(video, target): + (image, image_target) = self.transform_fn(image, image_target) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target + + +class SiamVideoResize(ImageResize): + def __init__(self, min_size, max_size, size_divisibility): + super(SiamVideoResize, self).__init__(min_size, max_size, size_divisibility) + + def __call__(self, video, target=None): + + if not isinstance(video, (list, tuple)): + return super(SiamVideoResize, self).__call__(video, target) + + assert len(video) >= 1 + new_size = self.get_size(video[0].size) + + new_video = [] + new_target = [] + for (image, image_target) in zip(video, target): + (image, image_target) = self._resize(image, new_size, image_target) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target + + def _resize(self, image, size, target=None): + image = F.resize(image, size) + target = target.resize(image.size) + return image, target + + +class SiamVideoRandomHorizontalFlip(object): + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, video, target=None): + + if not isinstance(video, (list, tuple)): + return video, target + + new_video = [] + new_target = [] + # All frames should have the same flipping operation + if random.random() < self.prob: + for (image, image_target) in zip(video, target): + new_video.append(F.hflip(image)) + new_target.append(image_target.transpose(0)) + else: + new_video = video + new_target = target + return new_video, new_target + + +class SiamVideoColorJitter(ImageColorJitter): + def __init__(self, + brightness=None, + contrast=None, + saturation=None, + hue=None): + super(SiamVideoColorJitter, self).__init__(brightness, contrast, saturation, hue) + + def __call__(self, video, target=None): + # Color jitter only applies for Siamese Training + if not isinstance(video, (list, tuple)): + return video, target + + idx = random.choice((0, 1)) + # all frames in the video should go through the same transformation + transform = self.get_params(self.brightness, self.contrast, + self.saturation, self.hue) + new_video = [] + new_target = [] + for i, (image, image_target) in enumerate(zip(video, target)): + if i == idx: + image = transform(image) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target + + +class SiamVideoMotionAugment(object): + def __init__(self, motion_limit=None, amodal=False): + # maximum motion augmentation + self.motion_limit = min(0.1, motion_limit) + if motion_limit is None: + self.motion_limit = 0 + self.motion_augment = ImageCropResize(self.motion_limit, amodal) + + def __call__(self, video, target=None): + + # Motion augmentation only applies for Siamese Training + if not isinstance(video, (list, tuple)) or self.motion_limit == 0: + return video, target + + new_video = [] + new_target = [] + # Only 1 frame go through the motion augmentation, + # the other unchanged + idx = random.choice((0, 1)) + for i, (image, image_target) in enumerate(zip(video, target)): + if i == idx: + (image, image_target) = self.motion_augment(image, image_target) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target + + +class SiamVideoMotionBlurAugment(object): + def __init__(self, motion_blur_prob=None): + self.motion_blur_prob = motion_blur_prob + if motion_blur_prob is None: + self.motion_blur_prob = 0.0 + self.motion_blur_func = ImageMotionBlur() + + def __call__(self, video, target): + # Blur augmentation only applies for Siamese Training + if not isinstance(video, (list, tuple)) or self.motion_blur_prob == 0.0: + return video, target + + new_video = [] + new_target = [] + idx = random.choice((0, 1)) + for i, (image, image_target) in enumerate(zip(video, target)): + if i == idx: + random_prob = random.uniform(0, 1) + if random_prob < self.motion_blur_prob: + image = self.motion_blur_func(image) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target + + +class SiamVideoCompressionAugment(object): + def __init__(self, max_compression=None): + self.max_compression = max_compression + if max_compression is None: + self.max_compression = 0.0 + self.compression_func = ImageCompression(self.max_compression) + + def __call__(self, video, target): + # Compression augmentation only applies for Siamese Training + if not isinstance(video, (list, tuple)) or self.max_compression == 0.0: + return video, target + + idx = random.choice((0, 1)) + new_video = [] + new_target = [] + for i, (image, image_target) in enumerate(zip(video, target)): + if i == idx: + image = self.compression_func(image) + new_video.append(image) + new_target.append(image_target) + + return new_video, new_target \ No newline at end of file diff --git a/siam-mot/siammot/data/adapters/handler/data_filtering.py b/siam-mot/siammot/data/adapters/handler/data_filtering.py new file mode 100644 index 0000000..9c51b8d --- /dev/null +++ b/siam-mot/siammot/data/adapters/handler/data_filtering.py @@ -0,0 +1,140 @@ +import numpy as np + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import AnnoEntity + +from siammot.utils.entity_utils import bbs_iou + + +def build_data_filter_fn(dataset_key: str, *args, **kwargs): + """ + Get dataset specific filter function list, if there is any + """ + filter_fn = None + if dataset_key == 'CRP': + filter_fn = CRPFilter(*args, **kwargs) + elif dataset_key.startswith('MOT'): + filter_fn = MOTFilter(*args, **kwargs) + elif dataset_key == 'AOT': + filter_fn = AOTFilter(*args, **kwargs) + return filter_fn + + +class BaseFilter: + def __init__(self): + pass + + # the default filter does not filter any entity, which is technically doing nothing + def _filter(self, entity: AnnoEntity, ignored_gt_entities=None): + raise False + + def filter(self, entity:AnnoEntity, ignored_gt_entities=None): + return self._filter(entity, ignored_gt_entities) + + def __call__(self, entities: [AnnoEntity], ignored_entities=None, meta_data=None): + """ + Check each entity whether it is valid or should be filtered (ignored). + :param entities: A list of entities (for a single frame) to be evaluated + :param ignored_entities: A list of ignored entities or a binary mask indicating ignored regions + :param meta_data: The meta data for the frame (or video) + :return: A list of valid entities and a list of filtered (ignored) entities + """ + valid_entities = [] + filtered_entities = [] + + for entity in entities: + if self._filter(entity, ignored_entities): + filtered_entities.append(entity) + else: + valid_entities.append(entity) + + return valid_entities, filtered_entities + + +class CRPFilter(BaseFilter): + """ + A class for filtering JTA dataset entities during evaluation + A gt entity will be filtered (ignored) if its id is -1 (negative) + A predicted entity will be filtered (ignored) if it is matched to a ignored ground truth entity + """ + def __init__(self, iou_thresh=0.2, is_train=False): + """ + :param iou_thresh: a predicted entity which overlaps with any ignored gt entity with at least + iou_thresh would be filtered + """ + self.iou_thresh = iou_thresh + + def _filter(self, entity: AnnoEntity, ignored_gt_entities=None): + if ignored_gt_entities is None: + if entity.id < 0: + return True + else: + for entity_ in ignored_gt_entities: + if bbs_iou(entity, entity_) >= self.iou_thresh: + return True + return False + + +class MOTFilter(BaseFilter): + """ + A class for filtering MOT dataset entities + A gt entity will be filtered (ignored) if its visibility ratio is very low + A predicted entity will be filtered (ignored) if it is matched to a ignored ground truth entity + """ + def __init__(self, visibility_thresh=0.1, iou_thresh=0.5, is_train=False): + self.visibility_thresh = visibility_thresh + self.iou_thresh = iou_thresh + self.is_train = is_train + + def _filter(self, entity: AnnoEntity, ignored_gt_entities=None): + if ignored_gt_entities is None: + if self.is_train: + # any entity whose visibility is below the pre-defined + # threshold should be filtered out + # meanwhile, any entity whose class does not have label + # needs to be filtered + if entity.blob['visibility'] < self.visibility_thresh or \ + not any(k in ('person', '2', '7') for k in entity.labels): + return True + else: + if 'person' not in entity.labels or int(entity.id) < 0: + return True + else: + for entity_ in ignored_gt_entities: + if bbs_iou(entity, entity_) >= self.iou_thresh: + return True + return False + + +class AOTFilter(BaseFilter): + """ + A class for filtering AOT entities + A gt entity will be filtered if it falls into one the following criterion + 1. tracking id is not Helicopter1 or Airplane1 + 2. range distance is larger than 1200 + """ + + def __init__(self, range_distance_thresh=1200, iou_thresh=0.2, is_train=False): + self.range_distance_thresh = range_distance_thresh + self.iou_thresh = iou_thresh + self.is_train = is_train + + def _filter(self, entity: AnnoEntity, ignored_gt_entities=None): + if ignored_gt_entities is None: + range_distance_m = np.inf + if 'range_distance_m' in entity.blob: + range_distance_m = entity.blob['range_distance_m'] + + labels = [] + if entity.labels is not None: + labels = entity.labels + + if ('intruder' not in labels) or \ + (range_distance_m >= self.range_distance_thresh): + return True + else: + for entity_ in ignored_gt_entities: + if entity_.bbox is not None: + if bbs_iou(entity, entity_) >= self.iou_thresh: + return True + return False + diff --git a/siam-mot/siammot/data/adapters/utils/data_utils.py b/siam-mot/siammot/data/adapters/utils/data_utils.py new file mode 100644 index 0000000..2d2ce35 --- /dev/null +++ b/siam-mot/siammot/data/adapters/utils/data_utils.py @@ -0,0 +1,62 @@ +import os + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import GluonCVMotionDataset +from pycocotools.coco import COCO + +from .dataset_info import dataset_maps + + +def load_motion_anno(dataset_folder, + anno_file, + split_file, + set=None, + ): + """ + Load GluonCVMotionDataset format annotations for downstream training / testing + """ + + dataset = GluonCVMotionDataset(anno_file, + root_path=dataset_folder, + split_file=split_file + ) + + if set == 'train': + dataset = list(dataset.train_samples) + elif set == 'val': + dataset = list(dataset.val_samples) + elif set == 'test': + dataset = list(dataset.test_samples) + + return dataset + + +def load_coco_anno(dataset_folder, + anno_file): + + dataset_anno_path = os.path.join(dataset_folder, anno_file) + dataset = COCO(dataset_anno_path) + return dataset + + +def load_dataset_anno(cfg, dataset_key, set=None): + dataset_folder, anno_file, split_file, modality = dataset_maps[dataset_key] + + dataset_info = dict() + dataset_info['modality'] = modality + + dataset_folder = os.path.join(cfg.DATASETS.ROOT_DIR, dataset_folder) + if modality == 'video': + dataset = load_motion_anno(dataset_folder, + anno_file, + split_file, + set) + elif modality == 'image': + dataset = load_coco_anno(dataset_folder, + anno_file) + image_folder = os.path.join(dataset_folder, split_file) + dataset_info['image_folder'] = image_folder + else: + raise ValueError("dataset has to be video or image.") + + return dataset, dataset_info + diff --git a/siam-mot/siammot/data/adapters/utils/dataset_info.py b/siam-mot/siammot/data/adapters/utils/dataset_info.py new file mode 100644 index 0000000..36527d8 --- /dev/null +++ b/siam-mot/siammot/data/adapters/utils/dataset_info.py @@ -0,0 +1,49 @@ +dataset_maps = dict() +""" +each item in the dataset maps are a list of the following info +( +dataset_folder, +annotation file name (video dataset) / path of annotation file (image dataset), +split file name (video dataset) / path of image folder (image dataset) , +modality +) +""" +dataset_maps['TAO'] = ['TAO', + 'anno_person.json', + 'splits_person.json', + 'video'] + +dataset_maps['CRP'] = ['caltech_roadside_pedestrians', + 'anno.json', + 'splits.json', + 'video'] + +dataset_maps['MOT17_DPM'] = ['MOT17', + 'anno.json', + 'splits_DPM.json', + 'video'] + +dataset_maps['MOT17'] = ['MOT17', + 'anno.json', + 'splits.json', + 'video'] + +dataset_maps['AOT'] = ['airbone_object_tracking', + 'anno.json', + 'splits.json', + 'video'] + +dataset_maps['COCO17_train'] = ['mscoco', + 'annotations/MSCOCO2017_train_person.json', + 'images/train2017', # all raw images would be in dataset_root/mscoco/images/train2017 + 'image'] + +dataset_maps['crowdhuman_train_fbox'] = ['CrowdHuman', + 'annotations/annotation_train_fbox.json', + 'Images', # all raw images would be in dataset_root/CrowdHuman/Images + 'image'] + +dataset_maps['crowdhuman_train_vbox'] = ['CrowdHuman', + 'annotations/annotation_train_vbox.json', + 'Images', + 'image'] \ No newline at end of file diff --git a/siam-mot/siammot/data/build_inference_data_loader.py b/siam-mot/siammot/data/build_inference_data_loader.py new file mode 100644 index 0000000..970fca4 --- /dev/null +++ b/siam-mot/siammot/data/build_inference_data_loader.py @@ -0,0 +1,56 @@ +import torch +import torch.utils.data as data + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import DataSample +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +class InferenceVideoData(data.Dataset): + """ + Split the video into small chunks (in an non-overlapping fashion) for inference + """ + + def __init__(self, video: DataSample, clip_len=1, transforms=None): + """ + Construct a data loader for inference + :param video: a video stream in DataSample format + :param clip_len: the length of video clips + :param transforms: transform function for video pre-processing + """ + self.video = video + self.video_reader = video.get_data_reader() + self.clip_len = clip_len + self.transforms = transforms + self.clip_idxs = list(range(0, len(self.video), self.clip_len)) + + def __getitem__(self, id): + video_clip = [] + # this is needed for transformation + dummy_boxes = [] + timestamps = [] + start_idx = self.clip_idxs[id] + end_idx = min(len(self.video), start_idx + self.clip_len) + for frame_idx in range(start_idx, end_idx): + (im, timestamp, _) = self.video_reader[frame_idx] + dummy_bbox = torch.tensor([[0, 0, 1, 1]]) + dummy_boxlist = BoxList(dummy_bbox, im.size, mode='xywh') + + video_clip.append(im) + timestamps.append(torch.tensor(timestamp)) + dummy_boxes.append(dummy_boxlist) + + if self.transforms is not None: + video_clip, _ = self.transforms(video_clip, dummy_boxes) + + return torch.stack(video_clip), start_idx, torch.stack(timestamps) + + def __len__(self): + return len(self.clip_idxs) + + +def build_video_loader(cfg, video: DataSample, transforms): + clip_len = cfg.INFERENCE.CLIP_LEN + videodata = InferenceVideoData(video, clip_len=clip_len, transforms=transforms) + videoloader = data.DataLoader(videodata, num_workers=4, batch_size=1, shuffle=False) + + return videoloader diff --git a/siam-mot/siammot/data/build_train_data_loader.py b/siam-mot/siammot/data/build_train_data_loader.py new file mode 100644 index 0000000..2017413 --- /dev/null +++ b/siam-mot/siammot/data/build_train_data_loader.py @@ -0,0 +1,77 @@ +import torch.utils.data + +from maskrcnn_benchmark.utils.comm import get_world_size +from maskrcnn_benchmark.data.build import make_data_sampler, make_batch_data_sampler +from maskrcnn_benchmark.data.datasets.concat_dataset import ConcatDataset + +from .video_dataset import VideoDataset, VideoDatasetBatchCollator +from .image_dataset import ImageDataset +from .adapters.utils.data_utils import load_dataset_anno +from .adapters.augmentation.build_augmentation import build_siam_augmentation +from .adapters.handler.data_filtering import build_data_filter_fn + + +def build_dataset(cfg): + """ + + """ + + dataset_list = cfg.DATASETS.TRAIN + if not isinstance(dataset_list, (list, tuple)): + raise RuntimeError( + "dataset_list should be a list of strings, got {}".format(dataset_list) + ) + + datasets = [] + for dataset_key in dataset_list: + dataset_anno, dataset_info = load_dataset_anno(cfg, dataset_key) + modality = dataset_info['modality'] + transforms = build_siam_augmentation(cfg, is_train=True, modality=modality) + data_filter_fn = build_data_filter_fn(dataset_key, is_train=True) + + if modality == 'image': + assert 'image_folder' in dataset_info + _dataset = ImageDataset(dataset_anno, + dataset_info['image_folder'], + transforms=transforms, + frames_per_image=cfg.VIDEO.RANDOM_FRAMES_PER_CLIP, + amodal=cfg.INPUT.AMODAL) + else: + _dataset = VideoDataset(dataset_anno, + sampling_interval=cfg.VIDEO.TEMPORAL_SAMPLING, + clip_len=cfg.VIDEO.TEMPORAL_WINDOW, + transforms=transforms, + filter_fn=data_filter_fn, + frames_in_clip=cfg.VIDEO.RANDOM_FRAMES_PER_CLIP, + amodal=cfg.INPUT.AMODAL) + datasets.append(_dataset) + + dataset = ConcatDataset(datasets) + + return dataset + + +def build_train_data_loader(cfg, is_distributed=False, start_iter=0): + + num_gpus = get_world_size() + + video_clips_per_batch = cfg.SOLVER.VIDEO_CLIPS_PER_BATCH + assert ( + video_clips_per_batch % num_gpus == 0 + ), "SOLVER.VIDEO_CLIPS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format( + video_clips_per_batch, num_gpus) + + video_clips_per_gpu = video_clips_per_batch // num_gpus + + dataset = build_dataset(cfg) + num_iters = cfg.SOLVER.MAX_ITER + sampler = make_data_sampler(dataset, True, is_distributed) + batch_sampler = make_batch_data_sampler( + dataset, sampler, [], video_clips_per_gpu, num_iters, start_iter + ) + + num_workers = cfg.DATALOADER.NUM_WORKERS + collator = VideoDatasetBatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY) + data_loader = torch.utils.data.DataLoader(dataset, num_workers=num_workers, + batch_sampler=batch_sampler, collate_fn=collator) + return data_loader diff --git a/siam-mot/siammot/data/image_dataset.py b/siam-mot/siammot/data/image_dataset.py new file mode 100644 index 0000000..806e8e6 --- /dev/null +++ b/siam-mot/siammot/data/image_dataset.py @@ -0,0 +1,232 @@ +import torch +import os +from tqdm import tqdm +from PIL import Image + +import torch.utils.data as data +from pycocotools.coco import COCO +from gluoncv.utils.bbox import bbox_xywh_to_xyxy, bbox_clip_xyxy + +from maskrcnn_benchmark.structures.bounding_box import BoxList + + +class ImageDataset(data.Dataset): + def __init__(self, + dataset: COCO, + image_dir, + transforms=None, + frames_per_image=1, + amodal=False, + skip_empty=True, + min_object_area=0, + use_crowd=False, + include_bg=False, + ): + """ + :param dataset: the ingested dataset with COCO-format + :param transforms: image transformation + :param frames_per_image: how many image copies are generated from a single image + :param amodal: whether to use amodal ground truth (no image boundary clipping) + :param include_bg: whether to include the full background images during training + """ + + self.dataset = dataset + self.image_dir = image_dir + self.transforms = transforms + self.frames_per_image = frames_per_image + + self._skip_empty = skip_empty + self._min_object_area = min_object_area + self._use_crowd = use_crowd + self._amodal = amodal + self._include_bg = include_bg + self._det_classes = [c['name'] for c in self.dataset.loadCats(self.dataset.getCatIds())] + + # These are tha mapping table of COCO labels + self.json_category_id_to_contiguous_id = { + v: i+1 for i, v in enumerate(self.dataset.getCatIds()) + } + + self._labels, self._im_aspect_ratios, self._items, self._ids \ + = self._dataset_preprocess() + + self.id_to_img_map = {k: v for k, v in enumerate(self._ids)} + + def __getitem__(self, index): + img_name = self._items[index] + img_path = os.path.join(self.image_dir, img_name) + + img = Image.open(img_path).convert('RGB') + target = self._get_target(img, index) + + # for tracking purposes, two frames are needed + # the pairs would go into random augmentation to generate fake motion + video_clip = [img for _ in range(self.frames_per_image)] + video_target = [target for _ in range(self.frames_per_image)] + + if self.transforms is not None: + video_clip, video_target = self.transforms(video_clip, video_target) + + return video_clip, video_target, img_name + + def _get_target(self, img, index): + + # a list of label (x1, y1, x2, y2, class_id, instance_id) + labels = self._labels[index] + if len(labels) == 0: + assert self._include_bg is True, "The image does not has ground truth" + bbox = torch.as_tensor(labels).reshape(-1, 4) + class_ids = torch.as_tensor(labels) + instance_ids = torch.as_tensor(labels) + empty_boxlist = BoxList(bbox, img.size, mode="xyxy") + empty_boxlist.add_field("labels", class_ids) + empty_boxlist.add_field("ids", instance_ids) + return empty_boxlist + + labels = torch.as_tensor(labels).reshape(-1, 6) + boxes = labels[:, :4] + target = BoxList(boxes, img.size, mode="xyxy") + + class_ids = labels[:, 4].clone().to(torch.int64) + target.add_field("labels", class_ids) + + instance_ids = labels[:, -1].clone().to(torch.int64) + target.add_field("ids", instance_ids) + + if not self._amodal: + target = target.clip_to_image(remove_empty=True) + + return target + + def _dataset_preprocess(self): + items = [] + labels = [] + ids = [] + im_aspect_ratios = [] + image_ids = sorted(self.dataset.getImgIds()) + instance_id = 0 + rm_redundant = 0 + all_amodal = 0 + + for entry in tqdm(self.dataset.loadImgs(image_ids)): + label, num_instances, num_redundant, num_amodal\ + = self._check_load_bbox(entry, instance_id) + if not label and not self._include_bg: + continue + instance_id += num_instances + rm_redundant += num_redundant + all_amodal += num_amodal + labels.append(label) + ids.append(entry['id']) + items.append(entry['file_name']) + im_aspect_ratios.append(float(entry['width']) / entry['height']) + + print('{} / {} valid images...'.format(len(labels), len(image_ids))) + print('{} instances...'.format(instance_id)) + print('{} redundant instances are removed...'.format(rm_redundant)) + print('{} amodal instances...'.format(all_amodal)) + return labels, im_aspect_ratios, items, ids + + def _check_load_bbox(self, entry, instance_id): + """ + Check and load ground-truth labels + """ + entry_id = entry['id'] + entry_id = [entry_id] if not isinstance(entry_id, (list, tuple)) else entry_id + ann_ids = self.dataset.getAnnIds(imgIds=entry_id, iscrowd=None) + objs = self.dataset.loadAnns(ann_ids) + + # check valid bboxes + valid_objs = [] + width = entry['width'] + height = entry['height'] + _instance_count = 0 + _redudant_count = 0 + _amodal_count = 0 + unique_bbs = set() + for obj in objs: + if obj.get('ignore', 0) == 1: + continue + if not self._use_crowd and obj.get('iscrowd', 0): + continue + if self._amodal: + xmin, ymin, xmax, ymax = bbox_xywh_to_xyxy(obj['bbox']) + if xmin < 0 or ymin < 0 or xmax > width or ymax > height: + _amodal_count += 1 + else: + xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(obj['bbox']), width, height) + + if (xmin, ymin, xmax, ymax) in unique_bbs: + _redudant_count += 1 + continue + + box_w = (xmax - xmin) + box_h = (ymax - ymin) + area = box_w * box_h + if area <= self._min_object_area: + continue + + # require non-zero box area + if xmax > xmin and ymax > ymin: + unique_bbs.add((xmin, ymin, xmax, ymax)) + contiguous_cid = self.json_category_id_to_contiguous_id[obj['category_id']] + valid_objs.append([xmin, ymin, xmax, ymax, contiguous_cid, + instance_id+_instance_count]) + _instance_count += 1 + if not valid_objs: + if not self._skip_empty: + # dummy invalid labels if no valid objects are found + valid_objs.append([-1, -1, -1, -1, -1, -1]) + return valid_objs, _instance_count, _redudant_count, _amodal_count + + def __len__(self): + return len(self._items) + + def get_img_info(self, index): + img_id = self.id_to_img_map[index] + img_data = self.dataset.imgs[img_id] + return img_data + + @property + def classes(self): + return self._det_classes + + def get_im_aspect_ratio(self): + return self._im_aspect_ratios + + +if __name__ == "__main__": + + from siammot.configs.defaults import cfg + from siammot.data.video_dataset import VideoDatasetBatchCollator + from siammot.data.adapters.utils.data_utils import load_dataset_anno + from siammot.data.adapters.augmentation.build_augmentation import build_siam_augmentation + + torch.manual_seed(0) + + dataset_anno, dataset_info = load_dataset_anno('COCO17_train') + collator = VideoDatasetBatchCollator() + transforms = build_siam_augmentation(cfg, modality=dataset_info['modality']) + + dataset = ImageDataset(dataset_anno, + dataset_info['image_folder'], + frames_per_image=2, + transforms=transforms, + amodal=True) + + batch_size = 16 + sampler = torch.utils.data.sampler.RandomSampler(dataset) + batch_sampler = torch.utils.data.sampler.BatchSampler( + sampler, batch_size, drop_last=False) + dataloader = data.DataLoader(dataset, + num_workers=4, + batch_sampler=batch_sampler, + collate_fn=collator + ) + import time + tic = time.time() + for iteration, (image, target, image_ids) in enumerate(dataloader): + data_time = time.time() - tic + print("Data loading time: {}".format(data_time)) + tic = time.time() + print(image_ids) \ No newline at end of file diff --git a/siam-mot/siammot/data/ingestion/ingest_mot.py b/siam-mot/siammot/data/ingestion/ingest_mot.py new file mode 100644 index 0000000..cd10128 --- /dev/null +++ b/siam-mot/siammot/data/ingestion/ingest_mot.py @@ -0,0 +1,197 @@ +import argparse +import csv +import configparser +import datetime +import glob +import os + +from PIL import Image +from pathlib import Path + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import GluonCVMotionDataset, DataSample, AnnoEntity, FieldNames, SplitNames +from gluoncv.torch.data.gluoncv_motion_dataset.utils.ingestion_utils import process_dataset_splits + +# From paper, see table 5 and 6: https://arxiv.org/pdf/1603.00831.pdf +MOT_LABEL_MAP = { + 1: "Pedestrian", + 2: "Person on vehicle", + 3: "Car", + 4: "Bicycle", + 5: "Motorbike", + 6: "Non motorized vehicle", + 7: "Static person", + 8: "Distractor", + 9: "Occluder", + 10: "Occluder on the ground", + 11: "Occluder full", + 12: "Reflection", +} + +DET_OPTIONS = {"SDP", "FRCNN", "DPM"} + + +def sample_from_mot_csv(csv_path, fps, sample=None, mot17=True, has_gt=False): + if sample is None: + id_ = Path(csv_path).stem + sample = DataSample(id_) + else: + sample = sample.get_copy_without_entities() + with open(csv_path, newline='') as f: + reader = csv.reader(f, delimiter=',') + + def coord(x): + return round(float(x)) + + for row in reader: + frame_num = int(row[0]) + obj_id = row[1] + x = coord(row[2]) + y = coord(row[3]) + w = coord(row[4]) + h = coord(row[5]) + conf = float(row[6]) + # If not mot17 the last 3 are 3D coords which are usually -1 + # (see pg. 9 https://arxiv.org/pdf/1504.01942.pdf) + if has_gt and mot17: + label = int(row[7]) + visibility = float(row[8]) + else: + label = 1 + visibility = 1 + + label_text = MOT_LABEL_MAP[label] + + # NOTE: Actually all classes that aren't Pedestrian have confidence 0 and so should be ingested + # but are ignored at evaluation time + # i.e. (label != 1 and conf) is never true + assert not (label != 1 and conf) + has_person_label = label_text in ("Pedestrian") + + time_ms = int((frame_num - 1) / fps * 1000) + entity = AnnoEntity(time=time_ms, id=obj_id) + entity.bbox = [x, y, w, h] + blob = { + "frame_csv": frame_num, + "frame_idx": frame_num - 1, + "visibility": visibility + } + entity.labels = {} + # entity.labels["person"] = 1 + if has_person_label: + entity.labels["person"] = 1 + else: + entity.labels[str(label)] = 1 + entity.labels["vis"] = visibility + + entity.confidence = conf + entity.blob = blob + + sample.add_entity(entity) + return sample + + +def main(args, description="Initial ingestion", det_options=None, mot17=True): + if mot17: + if det_options is not None and not all(x in DET_OPTIONS for x in det_options): + raise ValueError("Det options were {} but must be only: {}".format(det_options, DET_OPTIONS)) + if det_options is None: + det_options = DET_OPTIONS + else: + print("Ingesting MOT15, ignoring det options {}".format(det_options)) + det_options = [""] + + dataset_path = args.dataset_path + out_filename = args.anno_name + + out_dataset = GluonCVMotionDataset(out_filename, dataset_path, load_anno=False) + metadata = { + FieldNames.DESCRIPTION: description, + FieldNames.DATE_MODIFIED: str(datetime.datetime.now()), + } + out_dataset.metadata = metadata + + splits = { + "train": os.path.join(out_dataset.data_root_path, "train"), + "test": os.path.join(out_dataset.data_root_path, "test"), # No gt for MOT test + } + + for det_option in det_options: + for split_name, split_path in splits.items(): + subdirs = glob.glob(os.path.join(split_path, "*" + det_option)) + for i, subdir in enumerate(subdirs): + vid_id = os.path.basename(subdir) + vid_path = os.path.join(split_path, subdir) + + sample = DataSample(vid_id) + + if mot17: + info_path = os.path.join(vid_path, "seqinfo.ini") + config = configparser.ConfigParser() + config.read(info_path) + seq_conf = config["Sequence"] + fps = float(seq_conf['frameRate']) + num_frames = int(seq_conf['seqLength']) + width = int(seq_conf['imWidth']) + height = int(seq_conf['imHeight']) + else: + # Assume 30 fps + fps = 30 + im_paths = glob.glob(os.path.join(vid_path, "img1", "*.jpg")) + num_frames = len(im_paths) + im_example = Image.open(im_paths[0]) + width = im_example.width + height = im_example.height + + rel_base_dir = vid_path.replace(out_dataset.data_root_path, "").lstrip(os.path.sep) + rel_base_dir = os.path.join(rel_base_dir, "img1") + metadata = { + FieldNames.DATA_PATH: rel_base_dir, + FieldNames.FPS: fps, + FieldNames.NUM_FRAMES: num_frames, + FieldNames.RESOLUTION: {"width": width, "height": height}, + } + sample.metadata = metadata + + gt_path = os.path.join(vid_path, "gt/gt.txt") + det_path = os.path.join(vid_path, "det/det.txt") + has_gt = os.path.exists(gt_path) + anno_path = gt_path if has_gt else det_path + + sample = sample_from_mot_csv(anno_path, fps, sample, mot17, has_gt) + + out_dataset.add_sample(sample) + + print("Done {} sample {}/{}, {}".format(split_name, i+1, len(subdirs), vid_id)) + + out_dataset.dump() + + return out_dataset + + +def write_data_split(args, dataset): + if dataset is None: + dataset = GluonCVMotionDataset(args.anno_name, args.dataset_path) + + def split_func(sample): + data_path = sample.data_relative_path + if data_path.startswith("train"): + return SplitNames.TRAIN + elif data_path.startswith("test"): + return SplitNames.TEST + + raise Exception("Shouldn't happen") + + process_dataset_splits(dataset, split_func, save=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Ingest mot dataset') + parser.add_argument('--dataset_path', default="", + help="The path of dataset folder") + parser.add_argument('--anno_name', default="anno.json", + help="The file name (with json) of ingested annotation file") + args = parser.parse_args() + + mot17 = "MOT17" in args.dataset_path + dataset = main(args, mot17=mot17) + write_data_split(args, dataset) diff --git a/siam-mot/siammot/data/ingestion/ingest_prim_air.py b/siam-mot/siammot/data/ingestion/ingest_prim_air.py new file mode 100644 index 0000000..b973d1d --- /dev/null +++ b/siam-mot/siammot/data/ingestion/ingest_prim_air.py @@ -0,0 +1,127 @@ +import argparse +import copy +import datetime +import fire +import string +import tqdm +import os +from pathlib import Path + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import GluonCVMotionDataset, FieldNames, SplitNames +from gluoncv.torch.data.gluoncv_motion_dataset.utils.ingestion_utils import process_dataset_splits +from gluoncv.torch.data.gluoncv_motion_dataset.utils.serialization_utils import save_json + + +def ingest_dataset(args, renumber_ids=True): + """ + + :param args: Input arguments + :param renumber_ids: rename track identities to integers + """ + dataset = GluonCVMotionDataset(args.anno_name, args.dataset_path, load_anno=False) + dataset.metadata = { + FieldNames.DESCRIPTION: "Initial ingestion", + FieldNames.DATE_MODIFIED: str(datetime.datetime.now()), + } + #raw_anno_paths = sorted(Path(dataset.data_root_path).glob("groundtruth.json")) + raw_anno_paths = sorted(Path('/home/ubuntu/airborne-detection-starter-kit/data/').glob("groundtruth.json")) + + for raw_anno_path in tqdm.tqdm(raw_anno_paths): + # Setting the dataset and samples to None here looks pointless but it allows the memory to be freed, otherwise + # on subsequent iterations it can actually run out of memory as it loads a new dataset while keeping the + # previous one still in memory (happened on c5.xlarge 8GB RAM) + raw_dataset = None + samples = None + # raw_sample and sample have references back to the dataset so have to unset these too + raw_sample = sample = None + raw_dataset = GluonCVMotionDataset(raw_anno_path) + raw_dataset.__version__ = 1 + set_dir = raw_anno_path.parent.parent + images_root_path = Path(dataset.data_root_path) # set_dir / "Images" + + samples = sorted(raw_dataset.samples) + with open ('/home/ubuntu/siam-mot/data/all_flights_val.txt', 'r') as f: + all_flights = f.readlines() + all_flights = [flight.rstrip() for flight in all_flights] + + for raw_id, raw_sample in tqdm.tqdm(samples): + if raw_id not in all_flights[200:]: + continue + data_path = images_root_path /raw_id + data_rel_path = str(data_path.relative_to(dataset.data_root_path)) + new_id = data_rel_path + first_img = sorted(data_path.glob("*.png"))[0] + first_timestamp = int(first_img.name.split(raw_id)[0]) + sample = raw_sample.get_copy_without_entities(new_id=new_id) + sample.metadata["orig_path"] = raw_sample.data_relative_path + sample.data_relative_path = data_rel_path + unique_ids = {} + + first_frame = None + for raw_entity in raw_sample.entities: + entity = copy.deepcopy(raw_entity) + orig_frame = entity.blob.pop("frame") + orig_time = entity.time + if first_frame is None: + assert raw_entity.time == first_timestamp + first_frame = orig_frame + rel_frame = orig_frame - first_frame + # rel_ts = raw_entity.time - first_timestamp + # assert rel_ts >= 0 + # rel_ts_msec = rel_ts / 1e6 + # ts_msec_round = int(round(rel_ts_msec / sample.period) * sample.period) + # print(f"frame: {raw_entity.blob.get('frame')} ts_msec: {rel_ts_msec} ts_round {ts_msec_round}") + # print() + # assert abs(rel_ts_msec - ts_msec_round) < sample.period / 10 + # entity.time = ts_msec_round + + entity.time = round(rel_frame / sample.fps * 1000) + if entity.id: + obj_type = entity.id.rstrip(string.digits).lower() + entity.labels[obj_type] = 1 + if entity.id.lower() in ("airplane1", "helicopter1"): + entity.labels["intruder"] = 1 + entity.blob["orig_id"] = entity.id + if renumber_ids: + entity.id = unique_ids.setdefault(entity.id, len(unique_ids)) + entity.blob[FieldNames.FRAME_IDX] = rel_frame + entity.blob["orig_frame"] = orig_frame + entity.blob["orig_time"] = orig_time + if entity.labels and "miss_distance_class" in entity.labels: + entity.blob["miss_distance_class"] = entity.labels.pop("miss_distance_class") + if "range_distance_m" in entity.blob: + entity.blob["range_distance_m"] = round(entity.blob["range_distance_m"], 1) + sample.add_entity(entity) + + # break + dataset.add_sample(sample, dump_directly=True) + + dataset.dump() + + return dataset + + +def write_split(dataset): + def split_func(sample): + # data_path = sample.data_relative_path + orig_path = sample.metadata['orig_path'] + if orig_path.startswith("train"): + return SplitNames.TRAIN + elif orig_path.startswith("val"): + return SplitNames.VAL + + raise Exception("Shouldn't happen") + + process_dataset_splits(dataset, split_func, save=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Ingest Prime Air dataset') + parser.add_argument('--dataset_path', default="/home/ubuntu/airborne-detection-starter-kit/data/val/") + #description="The path of dataset folder") + parser.add_argument('--anno_name', default="anno.json") + #description="The file name (with json) of ingested annotation file") + args = parser.parse_args() + + dataset = ingest_dataset(args, renumber_ids=True) + write_split(dataset) diff --git a/siam-mot/siammot/data/video_dataset.py b/siam-mot/siammot/data/video_dataset.py new file mode 100644 index 0000000..a9c3f6a --- /dev/null +++ b/siam-mot/siammot/data/video_dataset.py @@ -0,0 +1,195 @@ +import random +import torch +import itertools +import torch.utils.data as data +from tqdm import tqdm +from collections import defaultdict +from PIL.Image import Image + +from maskrcnn_benchmark.structures.image_list import to_image_list +from maskrcnn_benchmark.structures.bounding_box import BoxList + +from gluoncv.torch.data.gluoncv_motion_dataset.dataset import GluonCVMotionDataset, AnnoEntity + + +class VideoDataset(data.Dataset): + + def __init__(self, dataset: GluonCVMotionDataset, sampling_interval=250, clip_len=1000, + is_train=True, frames_in_clip=2, transforms=None, filter_fn=None, + amodal=False): + """ + :param dataset: the ingested dataset with GluonCVMotionDataset + :param sampling_interval: the temporal stride (in ms) of sliding window + :param clip_len: the temporal length (in ms) of video clips + :param is_train: a boolean flag indicating whether it is training + :param frames_in_clip: the number of frames sampled in a video clip (for a training example) + :param transforms: frame-level transformation before they are fed into neural networks + :param filter_fn: a callable function to filter entities + :param amodal: whether to clip the bounding box beyond image boundary + """ + + if dataset is None: + raise Exception('dataset should not be None. Call GluonCVMotionDataset to construct dataset first.') + + assert is_train is True, "The dataset class only supports training" + assert (2 >= frames_in_clip > 0), "frames_in_clip has to be 1 or 2" + + self.data = dict(dataset.train_samples) + + self.clip_len = clip_len + self.transforms = transforms + self.filter_fn = filter_fn + self.frames_in_clip = min(clip_len, frames_in_clip) + + # Process dataset to get all valid video clips + self.clips = self.get_video_clips(sampling_interval_ms=sampling_interval) + self.amodal = amodal + + def __getitem__(self, item_id): + + video = [] + target = [] + + (sample_id, clip_frame_ids) = self.clips[item_id] + video_info = self.data[sample_id] + video_reader = video_info.get_data_reader() + + # Randomly sampling self.frames_in_clip frames + # And keep their relative temporal order + rand_idxs = sorted(random.sample(clip_frame_ids, self.frames_in_clip)) + for frame_idx in rand_idxs: + im = video_reader[frame_idx][0] + entities = video_info.get_entities_for_frame_num(frame_idx) + if self.filter_fn is not None: + entities, _ = self.filter_fn(entities, meta_data=video_info.metadata) + boxes = self.entity2target(im, entities) + + video.append(im) + target.append(boxes) + + # Video clip-level augmentation + if self.transforms is not None: + video, target = self.transforms(video, target) + + return video, target, sample_id + + def __len__(self): + return len(self.clips) + + def get_video_clips(self, sampling_interval_ms=250): + """ + Process the long videos to a small video chunk (with self.clip_len seconds) + Video clips are generated in a temporal sliding window fashion + """ + video_clips = [] + for (sample_id, sample) in tqdm(self.data.items()): + frame_idxs_with_anno = sample.get_non_empty_frames(self.filter_fn) + if len(frame_idxs_with_anno) == 0: + continue + # The video clip may not be temporally continuous + start_frame = min(frame_idxs_with_anno) + end_frame = max(frame_idxs_with_anno) + # make sure that the video clip has at least two frames + clip_len_in_frames = max(self.frames_in_clip, int(self.clip_len / 1000. * sample.fps)) + sampling_interval = int(sampling_interval_ms / 1000. * sample.fps) + for idx in range(start_frame, end_frame, sampling_interval): + clip_frame_ids = [] + # only include frames with annotation within the video clip + for frame_idx in range(idx, idx + clip_len_in_frames): + if frame_idx in frame_idxs_with_anno: + clip_frame_ids.append(frame_idx) + # Only include video clips that have at least self.frames_in_clip annotating frames + if len(clip_frame_ids) >= self.frames_in_clip: + video_clips.append((sample_id, clip_frame_ids)) + + return video_clips + + def entity2target(self, im: Image, entities: [AnnoEntity]): + """ + Wrap up the entity to maskrcnn-benchmark compatible format - BoxList + """ + boxes = [entity.bbox for entity in entities] + ids = [int(entity.id) for entity in entities] + # we only consider person tracking for now, + # thus all the labels are 1, + # reserve category 0 for background during training + int_labels = [1 for _ in entities] + + boxes = torch.as_tensor(boxes).reshape(-1, 4) + boxes = BoxList(boxes, im.size, mode='xywh').convert('xyxy') + if not self.amodal: + boxes = boxes.clip_to_image(remove_empty=False) + boxes.add_field('labels', torch.as_tensor(int_labels, dtype=torch.int64)) + boxes.add_field('ids', torch.as_tensor(ids, dtype=torch.int64)) + + return boxes + + +class VideoDatasetBatchCollator(object): + """ + From a list of samples from the dataset, + returns the batched images and targets. + This should be passed to the DataLoader + """ + + def __init__(self, size_divisible=0): + self.size_divisible = size_divisible + + def __call__(self, batch): + transposed_batch = list(zip(*batch)) + image_batch = list(itertools.chain(*transposed_batch[0])) + image_batch = to_image_list(image_batch, self.size_divisible) + + # to make sure that the id of each instance + # are unique across the whole batch + targets = transposed_batch[1] + video_ids = transposed_batch[2] + uid = 0 + video_id_map = defaultdict(dict) + for targets_per_video, video_id in zip(targets, video_ids): + for targets_per_video_frame in targets_per_video: + if targets_per_video_frame.has_field('ids'): + _ids = targets_per_video_frame.get_field('ids') + _uids = _ids.clone() + for i in range(len(_ids)): + _id = _ids[i].item() + if _id not in video_id_map[video_id]: + video_id_map[video_id][_id] = uid + uid += 1 + _uids[i] = video_id_map[video_id][_id] + targets_per_video_frame.extra_fields['ids'] = _uids + + targets = list(itertools.chain(*targets)) + + return image_batch, targets, video_ids + + +if __name__ == "__main__": + + from siammot.data.adapters.utils.data_utils import load_dataset_anno + + torch.manual_seed(0) + + dataset_anno, dataset_info = load_dataset_anno('MOT17') + collator = VideoDatasetBatchCollator() + + dataset = VideoDataset(dataset_anno, + frames_in_clip=2, + amodal=True) + + batch_size = 16 + sampler = torch.utils.data.sampler.RandomSampler(dataset) + batch_sampler = torch.utils.data.sampler.BatchSampler( + sampler, batch_size, drop_last=False) + dataloader = data.DataLoader(dataset, + num_workers=4, + batch_sampler=batch_sampler, + collate_fn=collator + ) + import time + tic = time.time() + for iteration, (image, target, image_ids) in enumerate(dataloader): + data_time = time.time() - tic + print("Data loading time: {}".format(data_time)) + tic = time.time() + print(image_ids) -- GitLab