From e096e94ee2584a17bff1d069c1dccb6fafe5d1fa Mon Sep 17 00:00:00 2001 From: gabe-levin Date: Wed, 4 Mar 2026 14:15:28 +0100 Subject: [PATCH 1/2] feat: convert excel into csvs for easier git tracking --- .../src/space2stats_ingest/METADATA/README.md | 16 +-- .../Space2Stats Metadata Content.xlsx | Bin 57698 -> 0 bytes .../METADATA/link_new_item.py | 98 ++++++++-------- .../Space2Stats_Metadata_DDH_Dataset.csv | 19 +++ .../Space2Stats_Metadata_Feature_Catalog.csv | 111 ++++++++++++++++++ .../Space2Stats_Metadata_NADA.csv | 48 ++++++++ .../Space2Stats_Metadata_Sources.csv | 15 +++ 7 files changed, 248 insertions(+), 59 deletions(-) delete mode 100644 space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx create mode 100644 space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_DDH_Dataset.csv create mode 100644 space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Feature_Catalog.csv create mode 100644 space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_NADA.csv create mode 100644 space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Sources.csv diff --git a/space2stats_api/src/space2stats_ingest/METADATA/README.md b/space2stats_api/src/space2stats_ingest/METADATA/README.md index 98db59e9..c1159d8d 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/README.md +++ b/space2stats_api/src/space2stats_ingest/METADATA/README.md @@ -23,14 +23,14 @@ Follow these steps to create the initial STAC metadata: ## Adding a New STAC Item -To add a new STAC Item, update the Excel spreadsheet with the relevant fields, and pass your new Parquet dataset to the `link_new_item.py` script. - -1. **Update Metadata File**: - - In the **Feature Catalog** sheet of `Space2Stats Metadata Content.xlsx`, add a description for each new variable in your dataset. - - Create an item id for the new set of variables, for example *world_pop_2025* or *nighttime_lights_2013*. - - Add a new entry in the **Sources** sheet if it doesn’t exist already. -> [!IMPORTANT] -> Make sure that the Item column in **Sources** corresponds to the same item id you created in the **Feature Catalog** sheet. This will be used to retrieve relevant information. +To add a new STAC Item, update the CSV metadata files in the `metadata_content/` folder, and pass your new Parquet dataset to the `link_new_item.py` script. + +1. **Update Metadata CSVs**: + - In `metadata_content/Space2Stats_Metadata_Feature_Catalog.csv`, add a row for each new variable in your dataset with columns: `variable`, `description`, `nodata`, and `item`. + - Create an item id for the new set of variables, for example *world_pop* or *nighttime_lights*. + - In `metadata_content/Space2Stats_Metadata_Sources.csv`, add a new row for the item with its name, description, citation, method, resolution, and optional start/end dates. +> [!IMPORTANT] +> Make sure that the `Item` column in **Sources** corresponds to the same item id you used in the **Feature Catalog**. This is used to link variables to their source metadata. 2. **Run *link_new_item.py* script**: - Navigate to the `METADATA` sub-directory and execute the following command: diff --git a/space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx b/space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx deleted file mode 100644 index 80d239931ef28f22f56046e952bbb0cc50d82009..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 57698 zcmeHwYm6jWc2=34MPkruS=m33B}+jenHp_pX636MJ>9)jW!9^^y1Ud_J+r$Qm7_8v zD`P4$BR3+ls%yZj^;$xUEhFq5{P+P_fMoG7EO;gSWBIeP#1Dj!03m_ELi@6ek(M8p zEK5f4oqHqV#yJ@gmD$rfUVFDkH5Ce{SD&Bm zSGHEIFtR5D+w&*R{nZ;MT>Zp@kGy{Gnd8|J3WXzg8d|_I3Gc6tqG)=zRtx(hXKaU6 zf9g!|W#|WEJHnsAMQv*LFYODb)>vQPsDT25WAy>UX&nvjuLehh`l@w$Geym}7fy74mAXW|3Tx_Do%&|AmQPIM116?XMojYqCZ<_N zOzQ(Crd38v`&AMXVJ2u(iO_$Sb;0H`jGR*(Vw)-QbNx4-zO}B z+f>JJ%ANCIxe})%+hrx}5pd_A-it;I@X3ws<-zSbyz|})z&!E*4`%N$T6?2l7P&b! zKN^i0;A7wScKsl?10GRbnI1l!O)l~)4`x}oXSRKnS9iyru%X?XO~EyjNy+-YGjbT2 zC!RftpeatA$f9rlrQ_tZ?o+PI`t`Bf5B$&{M%JfXYu9#jyX4e8XA{X3I&{Yvmz%sc zW2jl*JbugC^}W1ScbzK^i3MRRol-C7bcvlh9_z*fdluQ_Jd~$4l+w(=9NW<-kI3Fl z(9go{g%L*j!ts1-Z{UPsUYVx>+MB`s$Oga49rnlG&6q<5k$ahk^Vs(NEM(We?2PR3 zG!M4tPDWYSqwo?_)V5B1C>!&O^s&!Z2>p$5uqTDX{gz7#zu_CVGlJ*t1tZB!5c)C; za>zPxd^Uf3H{Q^86qf-@XPkQMPRiJ}=YntR8otlA@2~Dah~+JwL&2@YKMs}d9YO#N z_VJR+Desq)8e-EC9%&_`0tR;x*s6>b_|e;LH0q6P@^jUd8F>-Ls4qfm>ch3Ru3qQj zrb?mvN7~wKlO3WtM`%Zn{egbnq}PeLk)lylT_~G2si;W^7Okwg86!p8UZ%1-5w&n_ zbw!$DimbM@$<)QlE=QNAO~cEyH=)^B(1*~=8}L9<)lkN4&w;Nov4N9ep?REoRy}muYzdD4ai^uU4gBFr5XnPm%UH79v&)QqpqFSV;#oPI_st)Hx z!GdAScp8LJ*A7Q|4ArN2sJsc)r0H+8TQu2TO#^b6$zoJAw=hNi$f&>^;X~^ThfY7r z4tC}Z^_~7R=7jz%a)RDyaBZD?v*4MH;YXiRdj@U@hlN_u^as3a)Z6M4=dCfP$@;Vu zoPKY6(~%u>OY+VnD^7!}shZND{>qStw&l{U8n=8?XvM#(revq__gGU%9KeIn9MA~# z;oJ>ui&o10RX>PEKDf|yV7oNBVqoh!#XB_6mzJb2X_VX7-`60-pX zC+8pkJ6=KO7o?+P*Bj$xsp!;e;Dc6N^kPP%gf2nzH|hVBMAchdana>f@kB;u8$ut9 zmefjC1rvVT+i_JdNx~umI3sH{$C_(#qn!|kQJilvUZ(oY6n-F!{s;Nz27S*_)0hP6 zss9YT)k63|_n(DXT_LW98AJ?HKb=GG(J%F6B2>C-W95{3EfLnQSPM$)6`f6#Ei&Fq zc6O>n@#@%Ge3hLY_b}^g{jRX9F|u-r;?*#*L)5^iKvy;v<)6QUf$qZVKl5~SDxBgk zwGO5yftp?C{@^Bl^g=Cg^{gWsh9JGkGiPYIgBOYYk@kdMLOMc{sK^SMiqS+%3pVqN z23|0Rr=nU~riB}~7fws4n!&Qu!pfnRhXC=hm~oS72YAh7b*pJ{B|YU$-cOe;Kzx3e zLUWPG;l%aTHp8e>MSD8bJNZ(!VI>rH#Y^oiNk!2XF`c=x1hr^yW8yd(hz)M|*ycyNyv1*3*uJh&IW zs~3G;g;x8%=LeP=!`MisD7l8{AXE@LtA_|&csKgBDyJu=exVmA%?4eA`US58@q?5A zT1E8Y!97SJsQQzA{6WHvo~aA(uTGGIq?(}NZh?l}x~ao#RP`#vH7bF6&;4M4oTuzS z=vNQ!dCrigEws83{f_)8{(tUAkw3=6z_l;@iS6OBmXwGefVS_%+SMnA4EKv+W`H4o zsS{|d=@A6Qj~bu;gYajuY25D{t@UCUS7uMA-p!NQ_}oDNnbNc3u~oIAo=js{Q&OVh z%!emb7@?^Y@%ha4pl0PX12_jSOvfk@AKB-gqZHD#7Iex$3$xSod$6TBO}yHP0HVrN-Vc1;KQg!aU+>* zH$XUiLTP_i;0zIFso(8x>xE0~6EU-E44C4I^lT-mgY<_P-1K__+eL;DR*!#8N+vP5 zK0WLpiw`pl!AQKhdT?bUrh|dMSG%Hl9s})iAcQMbCITqra2*ae@hK~bgLFBF8#&_? zpa$Yn3W?`v3LA{Vk@Mor9i(Lx$SwytY+AF4`t&HP##JX(M#6aKKXp zZNaqt>5TGG{K*|_zDSoB36T@(G>~K(g;YmekXOa}tDO=Fmtdy95+a9Z>%6oklmkmb ztszoYJ?D;voP*F>w<6!F=MvA+EKU4IN<0L8F7fqT>N#{d^>s~tJC}ZrXleR4Q~Hr& zm`{H@ulG6hE%djk`y1=I^m9Z@)4!F{kId$L`t!)=z_*aECQm-~9M96!Z>Q8l0OnHP z$eleLx}5xmnmvvD*~1YnO@Ey%9?k3USkQvp`6Iw@X@Z(S&HNz{n3iutMyzo;V;E2~ zcNFRzyu1xfH4M%CVGx*>Z-Wth(DL)gp_xAp9Q>9xsBvf&j04BCd>f41taI^Z#-Wuz z4jjC^4J|bet%7mjn3ivY5x8|B6=R7zJZt5T0|&pQ4Qd?P1>?XmE#C$skLyA>rfq2F zj{^rUZ$n#+L%U!cIHu*>V8n7=2uRb0{Bhvmx3ocx!$!e4a7@d$!HDL%5R#@1d9qDF z-_nE<;U*9*-vT3?>q1bbE!fDPNgTYq2^(rAZ4}HTj%oQe7!h3;!qT)Me&D?Pi;N@-DRO7IjKMn%Z@@+6uy1~V+kq(==;~>CqX@eSv zt^9Ein3ivYk&S^2SoXPp1^Xw*UXztCYp^wpq{d_3d>ZGQ zYyx_&8H-tL0?YElV1#r-$jW#a#%K9Pn*cAPztm^bPgFN9Tpi3K2BH9F+=SIG89)tSa25|7Q29#QEvf8Cb zzzFAtkdf&KH1g*V2QO`bnnMMKo2+)}78udo5E3$NLB8Q8z{^@tYPJbHOV6UF5zKhZ zU&r}on*c9uff|DXqfOvhz6C}sH}l2v_$=RO6X0bnC^gvxp5FcY@r>v{DvZZ5hdmM?vSA!j=|{d+7oN)1g1ldN4 zwz(8l{hT);1Y923i%`sKTY_REN84P$5ykL@%<~EXmq&5jyI_)mGuOGink7gz0<_KL zBas?W+EE_09CVJ_b324&LJr))VXt!HPctlC@HSED5`-Ix+UBB?2nRn_*}%NET>+`- zM&2&cHA(QO)kXqyA+{`#EkU}Gmu)VT6zPq8(m7-y>5Fk^QSTD88!<>J)ftz!qP>|< zJBKW!y_iXhx|X2Vh`=_NNJKA~bnZ0akcIT#&Z|Y8OVDkkV4DjlMR&m*;gH32FUqnx zz9pzPq7es=$yw4Um?a#tkorY9H%GNN;TuLo;!zR~&Yd@11Y{xMiVQHBJO(asEJ3l6 zjvHKF(L{+91J5Oz0~ZpFoK|eFj+_fWaFM|Y<_Ml{QEo6XdCp(Ef`TPlVI<}T7iHvg zxpLhgJex!wtj~Ib*SRa;V?*h!qoZd%YeL%%A`dCqmf8;*TG&>BR}-AsMB70Sm6iCq z0-r_!3}+fcT07IUH;fp?V|L!m(cerlXhToHFHOI|*A@5{q~A!>4K7XR=x<5-IsDS} z3w&Ks|AO=zk-EVJlxUig~e?!vG;g_ag;OmO|7o^{a z*$pl{=jh**^mF*7=@erfszzOJZ$LHdor-Qcosj{a>) zKZjqMeu1wm>R*ihO(S>lSjIo+HcANiW$Bmrx&q&V^cyL>$px$I|3+Q(zkpwweu1wm z@GVHck-wYe^0y)7uYg~geu1wm@GVHck-wYe^0y)6<*tBVntp+=EATByzmdP2T%OL& zA0aPy1^m+V3w&LHZ$bKv{N3d8G@QC}!gGM0ItD)&xbKn|K5+W()WxPH+Cg_T88|Pj zwcg3m8+WXY${71QM^;4V&C=mg9Bo(B+lST%wKWA|>e{9ew|LC58z{D>j8@PD1P;cG zMt9=P#1Uw=L+excS9$!n((R`8sj^*Bwwx# zMs96!A(Ydv&AIu?iyB?y)XJf~n zP=7-f+y<66hTZ42S3FDG%l1-t8>$*8=S2D zq%benovTKHhba^%&qyhp8IkR+LEz8ew>%4{j(b)=Yo7&EBz5Ex$_sXNL2G2cYX<|; zrT31kul>z`WL4@_><|Ky%8g?OJU!%%lsv7)<`a_w={D$lp7cQ#?gfrwcLsYw;0HLu zBKs#D*@AFeFCu`Qkgji`6D_RpGvd6ATDBCBm|LdAaU(zZ4@QrFzp=7HKLy8)s0*LI zM~)jg#*SBQP6EA5_$qmV6OA~4IAo+-bQp;^X#|%fjr;*lOuA5u z?uj4C2srNA$*z~4KSMf^MYE9;u+QN-iK!W?0vz^$!#)iz(!3iEc*p@YwH)M#gFJI^ zL?kX2k%zE6naZW z@dhZauI$4#UzpQj4M$;};U23ALW?Pmv#aoImPK0Q9kS0zq(hK!=AOEq2#2nPPH=?- zD|-=6IGu#n{|E14Clx-wNrD-;GwI`4 zE*lwdv@WX_Z_@#5(*xD*l$E(a7n%_f^YoD8gpSASJmTLePZ9^oahQi3X6d9_W6g16 zuIKk%zv5uj{juBUb?$P|Cn}6dQOsj{5(IWQa8Kv8dNVp5PURHhwzL_R2(%hXR#nSP zxzcd1T;Crfg%t-mAsiZD40YHPr@$lc3~$BC=<=GBCyXPZi-m$@EkD`m?I0OdX&`jc zT}-{Tr_yUwqi_ygfM{$$*IkOr_IPD_KJ40)ORJ(0W$~Cv0$zeu9o(o}G6yuz>}%Z6 zd0tWB+X~i1a8prEO}VCcebKqdl0D$xsd0BKLlqyuLk>JY@%VQfV45@WILu;>SYwZK z+qxs%p_+|zn0-oWwszoRm~2MFKK*DyanIiaV*$!U9Xi zFOi5W##|g>r3s=zq=|YC?uu#Z&8~2B%^ho+JP>Q?`w#|T>ME;Or$(#eyA*$-sW-M? zxMQpgwHB&iPnm3JxJ3uhD`*jT?xo|oBkBj_gOPMnGG;AJqb6lJd_$1E$2({rr~P6r zHLL5^A=WD@ON6}@i!R+TaM2JZ7xE?PURWz0BCCfC5uD`ahh|b!7l1%q7JH$E2=Xj1 zl@tj36*ALy;7+V5fwv3MH5j`L{x}~N=if}CZduO!KcDL zZ|U1aHx320y($TjM(Yi0$HG?4F>mP6WQH5iluI8MXE+!pOopAw&0LeKskj{Yu4-R> zy;@&yY~wb`M)U1w>rGr7D6H}WPLgi9={l0TLp31P3#B)RWq-(v&#zSeof?;6z)X)a z{w3~S%3pgNCL>(;XjcK)G!pyO5J`4lSn&2WnQRJ3)+E~$ASQK}*Q2MbT8miexE2LW z8wVtVP9^V+2-cbwh=($uLNFPHDcQ6js@fA({Rv(_yrth2>xYxH^!nk&v3`hirt^cd zk0TDO7n9Nr^HnbnK3sCsjq#y!7i!0!3_W*lLkb7dhqzK(5^&NtaIi0L5$-)@cBgJW zlsZ*|MJhN{;>n){`tUSTTgUbd+)r!C+#1pM3R)O{uL3FnQ|sS=Q)1iY#O=?* zsxzw%oQoO82sb0R6D)Z&zB(OEh0lGT({SR17>v-h9#`*Jd!qp2Yh-otyzc~oYxU>} z=gIfqy}7h@st@VUz@dl2W1UnL`+uuy z;lf*tOVxS`2*?F!`O3LAAft+;5sfU~3}$^LG$19_SZl{zK~E_Mg9<&_nzfQ}rDD$1 z?~g)@l3Eavz-2MyS6j#bX&S;c)v8)2vvV|ANvP0@FOrW-@68a4LBIxHIP@iY=lL_M zvANl*TYFCkQGKIo!J4=W>$BR}+}f%&+RgS>L#Ut!yuUl?h*^8dFj|i>y)X3G_uyl5 z`@-pZ$Z^asaQ>aFVd0N@$RTK&EVk1V@xfdtV0$AV{7WZaB!Icri^WA z15RHpD8{eLN&2*K@hbl?;&pxUV1hrPzR(>{G3R5Em7#^2;LsjB#H40ebpuh=9>9P$2}zkD8E z+SVg{d_MB5hgItkj~#M9crN$^o_(5xj@7|)*L~-b+}zeN{vdwi`Y~SD83e?V4yy{+ zf$akqe9aE{5wW1-nJV+x#?^Eau)X-Nz^y!U;gR#LBLF>iJdo}aLnHpAd64xM9^b<9 zM(o!*^qotEIOSc|&fvhpWBPFo~7J})9iIU`y#!hh(aB+y;3du!GTk+FPtca;(l2UE>>!GV- z#;L^Aul@?BsLLsOfU9capcLlbt}}tx>j-fkdjb#1+QHM_=^It+6haV5uGTf!5;S-$ zG00VLDdkzQ=QCgN0TCBm*a$wNw%IBQ2QDskArnsVSS4*cheyv&db$EIG~8rGEk$9Z z)q*Dw83P1^RO;AuZ zh;(2Y;Uc_O5U7*>9X*I$$mZ4xn@h$8`mF696Z z1wcQ~bN@&H)obx963r=uF_$fAKBbq#eRt(QYZ8&@P-5U!By9S<(-$NZu?!zAMW8IK{G_i3oVJI$a zg&tJOJBV*9&j!L5fscp;@fj=?eaW1nH}G+3mR5biv0&-rc#u%ixhLpUk+y@A$FND_ z%rFT2fP(j9>uaBTFY(5wU*#Af2mof}_@_tv`&iYfO>;3FS0{F8S1Owx^XwrLkFQG^@DPMVdRJK_m-2=LRuqNdF(jGJ4Z` zggv1B(YZatsW#oJwW}fy^-YkwV+O`TU>ZFLCV^g!)y0$~IjukvMX{^a6T0jLaywq< zcdV^VCAsUZ_SZi5#rg&e5V9bY3}4@@h4uRS#&%`Bj>c@ZH!C-M0UwCA(BN>0Ljf_L zZ+0Xg;3-9%2i7IT!vK=!v-Qid=)#AhNZM?XB*;m5BueqG$Korjsm-$&uv-RaDIn(~ zCYMB9fI#_sb1a01){9g8IyuPFBXicw0S+qluXZ{9>c0}5)8z>A^#tV>q{Xbz&{0X& zCF~Ku-=78Kh6TIv6MH0WzH;#Gar6(=dW6&CsAs80DT$n}Dxe>SRh-&`&|B5T;I?)l+ZJ&B^@7*0WF<3EZriUPlGfUZMVfs zi0%r9H4u%!C1JB;36f_($mjz>ecrm@6C!V2a0qDCmgTvVidcg{zODVy9I9|?N3Wy>uC2h*y;2tT1kz0@jP{L z7MG@sCJ#9Ukr{`{s`IpH62uxH>BA7E*~~6M-JYP1n`*lxPEP~74<{L-=2&M+sZ7*7 zWK3N{8>Us82;rus*#7-}0yw(53{Y~uyvX(Z&yBSAoYdqZqaJ{AyI_S@x) z+T_!Si)7-&YTgCA`9SY0`?@1=#tS|%SL4KPy^QWsfL%xBjZjp%Ua^CH<7=P$IrTT} z%}gS=S_U>b-0*!qAN)=+4@p=@TnWj{BEG>BDfV!wEqtjJZQH59K?su+0!pDmTA84g zY<9>obqdK^eS5tD)eQQ`)*D(o&-i{+)4DJ(iS<}(o!(IgtLFryeRjRt{_2;^(Q0>e zWjGeZp>vZ62VFTIj(krEIDAIB3K;-!9)x&OW;x562xBAn#CJmkGC;fHV1tqBel0S?9@(3%!GLMR%ky1?L?^PKCNK> z;CM>P7gOHjkW7U)RU{gOkz3X#B261$PF2q%@L)f16*KDMuE!k<8(IZAO7 z@lZ;sYGUfAFzh_1(lOl1Fk@9dD{F zk+mdo$=D{Dyb(sFQOEe&K0Q57o0XT^49B~r)%z%gV~C4#H+RW}O+p*dWm8mF!L!r4 zd!1efGr+rA&r*@vO7p5A-4wH31g^tp|ul5fqSktK#_z=3$q&G zzfPUfeDkOhcF$rBq@Zx2N4rzjAHv~P)*)~Zq~!5+nNVgue!ItZ(- z4~?mB%p2YWGP*VxC8<&l*K~c11SI5a22#_a>!@5cWIAR!f|kmxMD(tCgDk- zfc`Xm##&PjV|e0e%gc?`+(6L33eBLZTp$!_iVbhW!20C&N9EVpfQ`@u)UBA;!QDPZ z1dFP`5j6;ufxRKlM_w1^6@U@~2aG=@7hE+y+ku3UalH3T$r<;8VBzWQORXi222cTh)VCrL7N zx|gT3fU?x7K91{d+YMtdaQudUiwi&DLWbW;u_7o|Vkc+Z3eLw%>oJU6+y?p>3lae} zN~ciP$nz=2yl`v#t|~696zNqLRx3Nlbz09qj$P<+B-Yg zdwO=YZq5>9Yu0E1T(*auBrunVWa?O?WOf|(71jw$PAOzL~uZ2325HMojQTv{C{%D9j~9UJ#DSPA1*Y9B>v5 zE7C&~o8HAXNGTyA^@fc!YbNZC)QI3`Hk9M%uwtU+P=~3xMn{3%;*f%poz4=@ z7R%}6u*m^0DUp0+3>OtTcX=J0kM5k4l_K{Ob_B-n+rO#d_s84*EtpD1VhY2kd@6Bq zc%sefw2F91aiCL})!&u&H>7IWE)-$!mG=vm(1@<;Onv1ynqO)Lx`2X$jT7ap|8>LJ zZ<7MABj->%XK{W6g1Cdfj5vrlNX}_U2b8g&hH-G>F}shrv~L7#;(o&!7peVTP@hpY z8eD-WuaCpVZh9~iwh zCX1K&L`pR$$EYTaB~%ju3#(5BJ+RLyE3~7VsT7u?;1v=KGNAxYn$aQ}Tr)_j!%=!r z5YmRJcJB(<6A!Uw4$=`II@+AVa3YNI(3q3+s?GUCaM(RKD^vXO@NkIw7IK3M!U7GW z7~nCxYRWr;!@iw$^u;uAh)O9DMssLzZJ?Y84*LY#;6}11ILOv^b=zby@xd@6r@3|%M;SEmQF}f{laMSQhJT5%)0=9#k z9?|ClqIIWPUIQD3T0uYlXC&Fv@YVX*|adM*#-qF!&~FD zxWzqPqMVJK#%*6<4+=#3reikS!vl&=DmR zy_qv;($=vliJR7wu@#rDm(j+V`-x5mof}z!koJhGa2$rXcw})0drAXyV#2WEai@l=7VdbBRt=Q0Jmn za6jt+UBU`Zhf@#1D+}4iF~@~Nwm<&4m7n~=hgMeT=c6ktD=)m-I9>+tFH)Oeq5gd( zxs5NpU@(^=iSu9sKlF!@I%CTpA{gl)2^kK2Y))~UXzbxU#5!`Yv0a*YshVPw@a~I& z{nEGwH}K^m=!xB`H>(d`x2$_fTTfxO(!TWBM4i-5-^5qt2EM=g$>Z+M$+J}pc^kdi zl#cjt25%wy0zBh7*`q+3JB4!8sO5f5Y)eA~9gDI@vQ1;seQ9zYDNLkm9NbRB3Mt{^)d($bR z8mwLoXV@6(5Eyl{Xr}Cu^Gn?|=o}cq$6=KGsbAoBboc)1UppV1pKtXC{mRg;JC)Y< zhEuV(u!*$UZw{O5eVldKZ~lZD0P1G?cMrpQHy)lH8XgD^ zT_h7roa4XufByb||F1u=vO+)cqJ<;p^4#|?-`}E5u)MNG`^plM6Ut5kOwEBqO7i_x z3KYB@H$2U>zy%08shT1#_gD83%cHZHI+&I?DG%2t5MMaLz+;vNcU?RLM+2JKte8H8 zC(M9DS4hDd61>|zR6!cVPcuazd($FKS>(x1cP9f2S(?J?M0G`Ia-9Q>t(XN5-}%@d z9RI1675e$JVBKd(qYG5eFGa_GB}`YWpP^6 z^XPE&CNYmWCXDqufyB^uVW>8%6yr|UN3D+o04h*jfPAJ1e>34 zc}SCHzNI7{=Jd`K(pCrGAM}W$JnY@8WnbPqgbsqexhFPq65-^F?rN7cPSbi9(;oAm zvU~LatBl!61hfREb}2>b*_*;eK?nTneIn#iiSFu`>M;wOeRf+9$k2%H;(-IwOHBwq za1vFHR`_&yf+L=CC~XA`tw{(Pvb#9MF|I( zr*|awYslPHs$8vJt=AGocR*P^W3#s?TR?ruK++j28J3&{9rX3b6#7L76)j#%>cWnu zA_M%tUi;+ZW1O*o<%PquDHFZ#fa#t*`p|1Xgf7?rHh%V}zw&E;W@Uwbeh{zz3K?o( zre|t>{nht36bi>n$}Vz4f{qIH1f;+2@E0M3)@_Y7z)SVagL^pM(#CmH*pi5vwF!V~ zSLwuIB86IWe--RC;Wst<3MrOT4hR32l)IV?s^hTYynvcnRoX=r1&}FT9osOY_EZu) zK;VWgz<*Ur7vbmpw4!+_xTg}8Rg~`rHqPX_D5yZ&XoW4my#}=n`Qr*rK^KjuqKmMj z0h-G>-shK?f?7Tpx&*z?FLB`xb4$?s{1VPMzr>3QeaSC#U08;`^T}cHTD1ZKQ$DZTP8vn-C&c|E1rcKX<5yQ?ru!LrPe^Q z^o5DJP>ovVgV5-AAO3~r;g7wxvO+&U zoG5+iZ_ucn6

ZMX5f*NSR#1-d#}CsX@JttM@(rv z{lis-z)VV6^b=nW|8TpzvO+(!o5}oJ`%9{5%6M?@1(|?q6-g}e&P{{m7cXmf!q)xmXl)lUU^bEQ96u|8XuB#hN6R9~U75gXN$6TZJXDUuf#f!qRnV3ZMPE zl!U~QDGN)FmnrQ0d*=U=1*O-*6#Q@hVcv%!OG~TKl>Vjvm?@pukg~Y6bxiTU@Sk%n YpOZ%){?i2X5&ZWUahd=3Kj^Rj4;M;89{>OV diff --git a/space2stats_api/src/space2stats_ingest/METADATA/link_new_item.py b/space2stats_api/src/space2stats_ingest/METADATA/link_new_item.py index 9bb5f8d3..eb76db34 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/link_new_item.py +++ b/space2stats_api/src/space2stats_ingest/METADATA/link_new_item.py @@ -1,13 +1,13 @@ import argparse -import json +import csv import os from datetime import datetime from os.path import join -from typing import Dict +from typing import Dict, List import git -import pandas as pd import pyarrow as pa +from dateutil.parser import parse as dtparse # type: ignore[import-untyped] from pyarrow.parquet import ParquetFile from pystac import Asset, CatalogType, Collection, Item from pystac.extensions.table import TableExtension @@ -36,37 +36,23 @@ def get_types(parquet_file: str): return column_types -# Function to save an updated dictionary of column types. Will not be used for now. -def save_parquet_types_to_json(parquet_file: str): - git_root = get_git_root() - json_file = join( - git_root, "space2stats_api/src/space2stats_ingest/METADATA/types.json" - ) - df = pd.read_parquet(parquet_file, nrow=10) - - # Get the column names and their types - column_types = {col: str(df[col].dtype) for col in df.columns} +def _read_csv(path: str) -> List[dict]: + """Read a CSV file and return a list of row dicts.""" + with open(path, newline="") as f: + return list(csv.DictReader(f)) - # Save the column types to a JSON file - with open(json_file, "r+") as f: - data_types = json.load(f) # Read the existing data - data_types.update(column_types) # Update with new columns - f.seek(0) # Move to the start of the file - json.dump(data_types, f, indent=4) # Write updated data - f.truncate() - print(f"Column types saved to {json_file}") +# Function to load metadata from CSV files +def load_metadata(metadata_dir: str) -> Dict[str, object]: + feature_rows = _read_csv( + join(metadata_dir, "Space2Stats_Metadata_Feature_Catalog.csv") + ) + sources = _read_csv(join(metadata_dir, "Space2Stats_Metadata_Sources.csv")) + # Build feature_catalog as a dict keyed by variable name + feature_catalog = {row["variable"]: row for row in feature_rows} -# Function to load metadata from the Excel file -def load_metadata(file: str) -> Dict[str, pd.DataFrame]: - overview = pd.read_excel(file, sheet_name="DDH Dataset", index_col="Field") - nada = pd.read_excel(file, sheet_name="NADA", index_col="Field") - feature_catalog = pd.read_excel(file, sheet_name="Feature Catalog") - sources = pd.read_excel(file, sheet_name="Sources") return { - "overview": overview, - "nada": nada, "feature_catalog": feature_catalog, "sources": sources, } @@ -79,10 +65,10 @@ def load_existing_collection(collection_path: str) -> Collection: # Function to create a new STAC item def create_new_item( - sources: pd.DataFrame, + sources: List[dict], column_types: dict, item_id: str, - feature_catalog: pd.DataFrame, + feature_catalog: Dict[str, dict], ) -> tuple[Item, str]: # Define geometry and bounding box (you may want to customize these) geom = { @@ -105,12 +91,15 @@ def create_new_item( ] # Get metadata for the new item - try: - src_metadata = sources[sources["Item"] == item_id].iloc[0] - except IndexError: + src_metadata = None + for row in sources: + if row["Item"] == item_id: + src_metadata = row + break + if src_metadata is None: raise IndexError(f"Item '{item_id}' not found in the metadata sources sheet") - if pd.isna(src_metadata["End Date"]): + if not src_metadata["Start Date"] or not src_metadata["End Date"]: # Define the item item = Item( id=item_id, @@ -134,13 +123,16 @@ def create_new_item( ) else: # Define the item with a time range + def _parse_date(val): + return dtparse(str(val).strip()) + item = Item( id=item_id, geometry=geom, bbox=bbox, datetime=None, - start_datetime=src_metadata["Start Date"], - end_datetime=src_metadata["End Date"], + start_datetime=_parse_date(src_metadata["Start Date"]), + end_datetime=_parse_date(src_metadata["End Date"]), properties={ "name": src_metadata["Name"], "description": src_metadata["Description"], @@ -163,7 +155,7 @@ def create_new_item( table_extension.columns = [ { "name": col, - "description": feature_catalog.loc[col, "description"], + "description": feature_catalog[col]["description"], "type": dtype, } for col, dtype in column_types.items() @@ -205,24 +197,28 @@ def main(): # Paths and metadata setup collection_path = join(metadata_dir, "stac/space2stats-collection/collection.json") - excel_path = join(metadata_dir, "Space2Stats Metadata Content.xlsx") column_types = get_types(input_parquet) - # Load metadata and column types - metadata = load_metadata(excel_path) + # Load metadata from CSVs + metadata = load_metadata(join(metadata_dir, "metadata_content")) feature_catalog = metadata["feature_catalog"] # Find item name and metadata based on column names - feature_catalog.set_index("variable", inplace=True) - try: - feature_catalog = feature_catalog.loc[column_types.keys()] - except KeyError as e: - raise KeyError(f"Column '{e}' not found in the metadata feature catalog sheet") - item_ids = feature_catalog["item"].unique() - item_id = [id for id in item_ids if id != "all"] - if len(item_id) != 1: - raise ValueError(f"Expected one item name, found {len(item_id)}") - item_id = item_id[0] + for col in column_types: + if col not in feature_catalog: + raise KeyError( + f"Column '{col}' not found in the metadata feature catalog sheet" + ) + item_ids = {feature_catalog[col]["item"] for col in column_types} + item_ids.discard("all") + if len(item_ids) != 1: + raise ValueError(f"Expected one item name, found {len(item_ids)}") + item_id = item_ids.pop() + + # Filter feature_catalog to only columns in the parquet + feature_catalog = { + col: feature_catalog[col] for col in column_types if col in feature_catalog + } # Load existing collection collection = load_existing_collection(collection_path) diff --git a/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_DDH_Dataset.csv b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_DDH_Dataset.csv new file mode 100644 index 00000000..bd73036d --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_DDH_Dataset.csv @@ -0,0 +1,19 @@ +Field,Value +Title,Space2Stats Database +Description,A global dataset of geospatial variables at the grid level (hexagon H3 level 6). +TTL,Ben Stewart +Business Unit,DECSC +Collaborator,Andres Chamorro +Classification,Public +License,Creative Commons Attribution 4.0 +, +Data Resource, +Classification,Public +Resource URL,https://space2stats.ds.io/docs +Resource Title,Space2Stats API +Description Resource,"This database contains geospatial statistics for the entire globe standardized to a hexagonal grid. The spatial unit of the dataset is the H3 level 6 (approximately 36 sq. km. per cell). The variables cover a wide range of geographic themes relevant to international development, including demographic, socio-economic, environmental, climate, and infrastructure. An API enables users to query, access, and aggregate statistics from the Space2Stats database. The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set." +Release Note,test +Release Date,"45,526" +First Published Date,"45,526" +Maintenance and Update Frequency,No fixed schedule +Maintenance Note, diff --git a/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Feature_Catalog.csv b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Feature_Catalog.csv new file mode 100644 index 00000000..5b6c68ee --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Feature_Catalog.csv @@ -0,0 +1,111 @@ +variable,description,nodata,item +hex_id,H3 unique identifier,,all +ogc_fid,Feature unique identifier,,all +sum_built_area_m_1975,Total built area (m2) in 1975,,builtarea_ghsl +sum_built_area_m_1980,Total built area (m2) in 1980,,builtarea_ghsl +sum_built_area_m_1985,Total built area (m2) in 1985,,builtarea_ghsl +sum_built_area_m_1990,Total built area (m2) in 1990,,builtarea_ghsl +sum_built_area_m_1995,Total built area (m2) in 1995,,builtarea_ghsl +sum_built_area_m_2000,Total built area (m2) in 2000,,builtarea_ghsl +sum_built_area_m_2005,Total built area (m2) in 2005,,builtarea_ghsl +sum_built_area_m_2010,Total built area (m2) in 2010,,builtarea_ghsl +sum_built_area_m_2015,Total built area (m2) in 2015,,builtarea_ghsl +sum_built_area_m_2020,Total built area (m2) in 2020,,builtarea_ghsl +sum_built_area_m_2025,Total built area (m2) in 2025,,builtarea_ghsl +sum_built_area_m_2030,Total built area (m2) in 2030,,builtarea_ghsl +spi,"Standardized Precipitation Index (SPI), 6-month timescale",,climate +date,"Month, formatted as YYYY-MM-DD",,climate +cy_frequency_mean,Tropical Cyclone Frequency,,cyclones +drought_spei_1_5_rp100_mean,"Drought hazard (SPEI ≤ -1.5, 100-year return period)",,drought +fires_density_mean,Fire Density,,fires +pop,"Sum of Gridded Population, 2020",,flood_exposure_15cm_1in100 +pop_flood,"Sum of population exposed to floods greater than 15 cm, 1 in 100 return period",,flood_exposure_15cm_1in100 +pop_flood_pct,"Percent of population exposed to floods greater than 15 cm, 1 in 100 return period",,flood_exposure_15cm_1in100 +landslide_susceptibility_mean_2023,Landslide Susceptibility Index,,landslide_susceptibility +sum_viirs_ntl_2012,Sum of VIIRS nighttlime lights brightness for 2012,,nighttime_lights +sum_viirs_ntl_2013,Sum of VIIRS nighttlime lights brightness for 2013,,nighttime_lights +sum_viirs_ntl_2014,Sum of VIIRS nighttlime lights brightness for 2014,,nighttime_lights +sum_viirs_ntl_2015,Sum of VIIRS nighttlime lights brightness for 2015,,nighttime_lights +sum_viirs_ntl_2016,Sum of VIIRS nighttlime lights brightness for 2016,,nighttime_lights +sum_viirs_ntl_2017,Sum of VIIRS nighttlime lights brightness for 2017,,nighttime_lights +sum_viirs_ntl_2018,Sum of VIIRS nighttlime lights brightness for 2018,,nighttime_lights +sum_viirs_ntl_2019,Sum of VIIRS nighttlime lights brightness for 2019,,nighttime_lights +sum_viirs_ntl_2020,Sum of VIIRS nighttlime lights brightness for 2020,,nighttime_lights +sum_viirs_ntl_2021,Sum of VIIRS nighttlime lights brightness for 2021,,nighttime_lights +sum_viirs_ntl_2022,Sum of VIIRS nighttlime lights brightness for 2022,,nighttime_lights +sum_viirs_ntl_2023,Sum of VIIRS nighttlime lights brightness for 2023,,nighttime_lights +sum_viirs_ntl_2024,Sum of VIIRS nighttlime lights brightness for 2024,,nighttime_lights +ghs_11_count,Total number of cells in very low density areas,,urbanization_ghssmod +ghs_12_count,Total number of cells in low density rural areas,,urbanization_ghssmod +ghs_13_count,Total number of cells in rural areas,,urbanization_ghssmod +ghs_21_count,Total number of cells in suburban grid cells,,urbanization_ghssmod +ghs_22_count,Total number of cells in semi-dense urban clusters,,urbanization_ghssmod +ghs_23_count,Total number of cells in dense urban clusters,,urbanization_ghssmod +ghs_30_count,Total number of cells in urban centres,,urbanization_ghssmod +ghs_total_count,Total number of cells in all categories in GHS database,,urbanization_ghssmod +ghs_11_pop,Total population in very low density areas,,urbanization_ghssmod +ghs_12_pop,Total population in low density rural areas,,urbanization_ghssmod +ghs_13_pop,Total population in rural areas,,urbanization_ghssmod +ghs_21_pop,Total population in suburban grid cells,,urbanization_ghssmod +ghs_22_pop,Total population in semi-dense urban clusters,,urbanization_ghssmod +ghs_23_pop,Total population in dense urban clusters,,urbanization_ghssmod +ghs_30_pop,Total population in urban centres,,urbanization_ghssmod +ghs_total_pop,Total population based on GHS-Pop population,,urbanization_ghssmod +sum_f_00_2025,"Total population female, ages 0 to 1, 2025",,world_pop +sum_f_01_2025,"Total population female, ages 1 to 10, 2025",,world_pop +sum_f_05_2025,"Total population female, ages 5 to 10, 2025",,world_pop +sum_f_10_2025,"Total population female, ages 10 to 15, 2025",,world_pop +sum_f_15_2025,"Total population female, ages 15 to 20, 2025",,world_pop +sum_f_20_2025,"Total population female, ages 20 to 25, 2025",,world_pop +sum_f_25_2025,"Total population female, ages 25 to 30, 2025",,world_pop +sum_f_30_2025,"Total population female, ages 30 to 35, 2025",,world_pop +sum_f_35_2025,"Total population female, ages 35 to 40, 2025",,world_pop +sum_f_40_2025,"Total population female, ages 40 to 45, 2025",,world_pop +sum_f_45_2025,"Total population female, ages 45 to 50, 2025",,world_pop +sum_f_50_2025,"Total population female, ages 50 to 55, 2025",,world_pop +sum_f_55_2025,"Total population female, ages 55 to 60, 2025",,world_pop +sum_f_60_2025,"Total population female, ages 60 to 65, 2025",,world_pop +sum_f_65_2025,"Total population female, ages 65 to 70, 2025",,world_pop +sum_f_70_2025,"Total population female, ages 70 to 75, 2025",,world_pop +sum_f_75_2025,"Total population female, ages 75 to 80, 2025",,world_pop +sum_f_80_2025,"Total population female, ages 80 to 85, 2025",,world_pop +sum_f_85_2025,"Total population female, ages 85 to 90, 2025",,world_pop +sum_f_90_2025,"Total population female, ages 90 and above, 2025",,world_pop +sum_m_00_2025,"Total population male, ages 0 to 1, 2025",,world_pop +sum_m_01_2025,"Total population male, ages 1 to 10, 2025",,world_pop +sum_m_05_2025,"Total population male, ages 5 to 10, 2025",,world_pop +sum_m_10_2025,"Total population male, ages 10 to 15, 2025",,world_pop +sum_m_15_2025,"Total population male, ages 15 to 20, 2025",,world_pop +sum_m_20_2025,"Total population male, ages 20 to 25, 2025",,world_pop +sum_m_25_2025,"Total population male, ages 25 to 30, 2025",,world_pop +sum_m_30_2025,"Total population male, ages 30 to 35, 2025",,world_pop +sum_m_35_2025,"Total population male, ages 35 to 40, 2025",,world_pop +sum_m_40_2025,"Total population male, ages 40 to 45, 2025",,world_pop +sum_m_45_2025,"Total population male, ages 45 to 50, 2025",,world_pop +sum_m_50_2025,"Total population male, ages 50 to 55, 2025",,world_pop +sum_m_55_2025,"Total population male, ages 55 to 60, 2025",,world_pop +sum_m_60_2025,"Total population male, ages 60 to 65, 2025",,world_pop +sum_m_65_2025,"Total population male, ages 65 to 70, 2025",,world_pop +sum_m_70_2025,"Total population male, ages 70 to 75, 2025",,world_pop +sum_m_75_2025,"Total population male, ages 75 to 80, 2025",,world_pop +sum_m_80_2025,"Total population male, ages 80 to 85, 2025",,world_pop +sum_m_85_2025,"Total population male, ages 85 to 90, 2025",,world_pop +sum_m_90_2025,"Total population male, ages 90 and above, 2025",,world_pop +sum_pop_2015,"Total population, 2015",,world_pop +sum_pop_2016,"Total population, 2016",,world_pop +sum_pop_2017,"Total population, 2017",,world_pop +sum_pop_2018,"Total population, 2018",,world_pop +sum_pop_2019,"Total population, 2019",,world_pop +sum_pop_2020,"Total population, 2020",,world_pop +sum_pop_2021,"Total population, 2021",,world_pop +sum_pop_2022,"Total population, 2022",,world_pop +sum_pop_2023,"Total population, 2023",,world_pop +sum_pop_2024,"Total population, 2024",,world_pop +sum_pop_2025,"Total population, 2025",,world_pop +sum_pop_2026,"Total population, 2026",,world_pop +sum_pop_2027,"Total population, 2027",,world_pop +sum_pop_2028,"Total population, 2028",,world_pop +sum_pop_2029,"Total population, 2029",,world_pop +sum_pop_2030,"Total population, 2030",,world_pop +sum_f_2025,"Total female population, all ages, 2025",,world_pop +sum_m_2025,"Total male population, all ages, 2025",,world_pop diff --git a/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_NADA.csv b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_NADA.csv new file mode 100644 index 00000000..48aa97a8 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_NADA.csv @@ -0,0 +1,48 @@ +Group,Field,Value +Identification,Title,Space2Stats Database +Identification,Identifier,GLO_2024_SPACE2STATS_GEO_v01 +Identification,Hierarchy level,dataset +Identification,Edition,v.1 +Identification,Edition Date,"45,541" +Identification,Status, +Identification,Language,ENG +Identification,Characterset,utf-8 +Identification,Date, +Identification,"45,541",creation +Identification,Graphic overview, +Identification,Responsible party,"Ben Stewart (Task Leader), Andres Chamorro (Collaborator), Development Data Group (DECDG), World Bank" +Identification,Presentation form,API +Identification,Series name,Space2Stats Hexagonal Grid Database +Identification,Citation, +Identification,Abstract,"This database contains geospatial statistics for the entire globe standardized to a hexagonal grid. The spatial unit of the dataset is the H3 level 6 (approximately 36 sq. km. per cell). The variables cover a wide range of geographic themes relevant to international development, including demographic, socio-economic, environmental, climate, and infrastructure. An API enables users to query, access, and aggregate statistics from the Space2Stats database." +Identification,Purpose,The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set. +Identification,Point of contact,"Andres Chamorro (Collaborator) +World Bank, Development Data Group (DECDG) +achamorroelizond@worldbank.org" +Identification,Resource maintenance, +Identification,Update frequency,As needed +Identification,Descriptive keywords,? +Identification,Spatial representation type,vector +Spatial extent,Place,Global +Spatial extent,East,180.00 +Spatial extent,West,-180.00 +Spatial extent,North,89.99 +Spatial extent,South,-89.99 +Spatial extent,Reference system, +Spatial extent,Code,"4,326" +Spatial extent,Code space,EPSG +Constraints,Access constraints,unrestricted +Constraints,Use constraints,unrestricted +Constraints,Use limitations,The information contained in this dataset is for general information purpose only. +Distribution,Distribution format, +Distribution,Name,json +Distribution,Specification,API response. Docs: https://space2stats.ds.io/docs +Distribution,Distributor,Development Seed +Data quality,Lineage statement,TBD: description of dataset creation +Data quality,Lineage process step,TBD: methodology +Data quality,Processor,GOST and DevSeed +Metadata,Metadata standard,ISO 19115-1:2014 +Metadata,Date stamp,"45,541" +Metadata,Language,ENG +Metadata,Contacts,"Andres Chamorro (collaborator) +World Bank, Development Data Group (DECDG)" diff --git a/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Sources.csv b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Sources.csv new file mode 100644 index 00000000..d45dfbfe --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/metadata_content/Space2Stats_Metadata_Sources.csv @@ -0,0 +1,15 @@ +Theme,Name,Description,Methodological Notes,Source Data,Citation source,Organization,Method,Resolution,Item,Start Date,End Date +Demographics,Population,Gridded population disaggregated by gender.,Global raster files are processed for each hexagonal grid using zonal statistics.,"WorldPop gridded population, 2020, Unconstrained, UN-Adjusted, https://www.worldpop.org/methods/top_down_constrained_vs_unconstrained/","Stevens FR, Gaughan AE, Linard C, Tatem AJ (2015) Disaggregating Census Data for Population Mapping Using Random Forests with Remotely-Sensed and Ancillary Data. ","World Pop, https://www.worldpop.org/methods/populations",sum,100 mts,world_pop,, +Socio-economic,Nighttime Lights,Sum of luminosity values measured by monthly composites from VIIRS satellite.,Monthly composites generated by NASA through the Lights Every Night partnership.,"World Bank - Light Every Night, https://registry.opendata.aws/wb-light-every-night/",tbd,"NASA, World Bank",sum,500 mts,nighttime_lights,, +Climate,Flood Area,"Area where flood depth is greater than 50 cm, 1-in-100 or 1000 return period.","Flood data combines fluvial, pluvial, and coastal flood exposure using the maximum value. Return period indicates likelihood of disaster (1 in 100 years).","Fathom 3.0 High Resolution Global Flood Maps Including Climate Scenarios, https://datacatalog.worldbank.org/search/dataset/0065653/Fathom-3-0---High-Resolution-Global-Flood-Maps-Including-Climate-Scenarios",Wing et al. (2024) A 30 m Global Flood Inundation Model for Any Climate Scenario. https://doi.org/10.1029/2023WR036460,"Fathom, https://www.fathom.global/",sum,30 mts,,, +Climate,Population Exposed to Floods,"Population where flood depth is greater than 15 cm, 1-in-100 return period.",Flood data is intersected with population grid to estimate population exposed.,"Fathom 3.0 High Resolution Global Flood Maps Including Climate Scenarios, https://datacatalog.worldbank.org/search/dataset/0065653/Fathom-3-0---High-Resolution-Global-Flood-Maps-Including-Climate-Scenarios",Wing et al. (2024) A 30 m Global Flood Inundation Model for Any Climate Scenario. https://doi.org/10.1029/2023WR036460,"Fathom, https://www.fathom.global/",sum of intersect,30 mts and 100 mts,flood_exposure_15cm_1in100,, +Conflict,Number of Conflict Events,Sum of conflict events (ACLED).,Conflict data is filtered for event types and then aggregated by hexagon (count).,"Armed Conflict Location and Event Data (ACLED), https://acleddata.com/data/",https://acleddata.com/article-categories/general-methodology/,"ACLED, https://acleddata.com/",count,point data,,, +Conflict,Number of Conflict Fatalities,Sum of estimated fatalities from conflcit events (ACLED).,Conflict data is filtered for event types and then aggregated by hexagon (sum of fatalities).,"Armed Conflict Location and Event Data (ACLED), https://acleddata.com/data/",https://acleddata.com/article-categories/general-methodology/,"ACLED, https://acleddata.com/",sum,point data,,, +Urbanization,Urbanization by population and by area,"Urbanization is analyzed using the GHS-SMOD dataset, including comparisons with population",Global raster files are processed for each hexagonal grid using zonal statistics.,Global Human Settlement Layer (https://human-settlement.emergency.copernicus.eu/degurbaDefinitions.php),"Pesaresi M., Ehrlich D., Ferri S., Florczyk A.J., Freire S., Halkia S., Julea A.M., Kemper T., Soille P. and V. Syrris. Operating procedure for the production of the Global Human Settlement Layer from Landsat data of the epochs 1975, 1990, 2000, and 2014. Publications Office of the European Union, EUR 27741 EN, 2016. doi: 10.2788/253582.",Copernicus Emergency Management Service,sum,1000m,urbanization_ghssmod,, +Urbanization,Built area,Built area (in m2) in 5-year epochs. Source data report built area as total m2 built in 100m pixels at a global scale.,Zonal statistics are calculated as simple sum of built area in metres squared.,https://human-settlement.emergency.copernicus.eu/datasets.php,"Pesaresi M., Schiavina M., Politis P., Freire S., Krasnodębska K., Uhl J. H., Carioli A., Corbane C., Dijkstra L., Florio P., Friedrich H. K., Gao J., Leyk S., Lu L., Maffenini L., Mari-Rivero I., Melchiorri M., Syrris V., Van Den Hoek J., Kemper T. Advances on the Global Human Settlement Layer by joint assessment of Earth Observation and population survey data, International Journal of Digital Earth 17 (1), 2024 10.1080/17538947.2024.2390454 +",Copernicus Emergency Management Service,sum,100m,builtarea_ghsl,, +Climate,Standardized Precipitation Index (SPI),Index for a given timescale measuring drought severity based on precipitation anomalies (SPI) ,Processed by Benny Instanto (GOST). The SPI was constructed with a 6-month timescale window based on precidipation data from CHIRPS. The resulting index is standardized (-3 to +3) and stored as a netcdf (~5km resolution). The netcdf was converted to h3 using the h3ronpy raster_to_dataframe function which uses the centroid value for each cell.,CHIRPS3,tbd,"Climate Hazards Center, World Bank",sample,5km,climate,1/1/70,1/1/70 +Landslides,Landslide Susceptibility Index,Global landslide susceptibility raster (relative index) intended for regional screening and landslide nowcasting workflows.,"Derived from a heuristic/fuzzy overlay of static factors including terrain slope, geology, proximity to faults and roads, and forest loss.",NASA GPM Landslides Program — Global Landslide Susceptibility Map (GeoTIFF). (https://gpm.nasa.gov/sites/default/files/downloads/global-landslide-susceptibility-map-2-27-23.tif),"Stanley, T., & Kirschbaum, D. B. (2017). A heuristic approach to global landslide susceptibility mapping. Natural Hazards, 87(1), 145–164. doi:10.1007/s11069-017-2757-y",NASA,mean,30 arc-seconds (~1km),landslide_susceptibility,1/1/2017,12/31/2017 +Fires,Fire Density,Fire density (number of fires/km²),count of fires per km2 occurred into pixels ,NASA MODIS Fire data (https://firms.modaps.eosdis.nasa.gov/active_fire/ ),"L Giglio, W Schroeder, CO Justice. Remote sensing of environment 178, 31-41, 2016. 1625, 2016. The Collection 6 MODIS burned area mapping algorithm and product.",NASA,mean,0.1 deg,fires,1/1/2003,12/31/2025 +Climate,Tropical Cyclone Frequency,Global tropical cyclone frequency summarized to H3 level 6 hexagons as mean frequency per hexagon.,Derived by converting an IBTrACS-based global cyclone frequency raster to H3 and computing zonal mean value within each hexagon.,"International Best Track Archive for Climate Stewardship (IBTrACS), NOAA NCEI, https://www.ncei.noaa.gov/products/international-best-track-archive","Knapp, K. R., et al. (2010). The International Best Track Archive for Climate Stewardship (IBTrACS). https://doi.org/10.25921/82ty-9e16",NOAA NCEI,mean,0.017333333 degree (~1.9km),cyclones,1/1/1969,12/31/2009 +Drought,"Drought hazard (SPEI ≤ -1.5, 100-year return period)","Global drought hazard raster: expected number of months with 6‑month SPEI ≤ -1.5 for a 100‑year return period, summarized to H3 level 6 as mean months per hexagon.","Extreme-value analysis of monthly 6‑month SPEI time series (1902–2018) from the Global SPEI Database (CSIC). Per ~0.5° grid cell, months below the threshold are modeled for selected return periods using Poisson–Generalized Pareto point process models; H3 aggregation uses zonal mean within each hexagon.",Global Drought Hazard (HDX download): https://data.humdata.org/dataset/30b85665-4c3d-4dc3-b543-3a567a3dea37/resource/6744572e-d5d1-4033-9d64-c87dc565586a/download/global-drought-spei-1.5-return-period-100-years.tif; Dataset metadata: https://geo.btaa.org/catalog/30b85665-4c3d-4dc3-b543-3a567a3dea37; Source SPEI database: https://spei.csic.es/spei_database/#map_name=spei06,Institute for International Law of Peace and Armed Conflict (2023). Global Drought Hazard. Humanitarian Data Exchange. https://geo.btaa.org/catalog/30b85665-4c3d-4dc3-b543-3a567a3dea37 (CC-BY 4.0),Institute for International Law of Peace and Armed Conflict; Humanitarian Data Exchange; CSIC (SPEIbase),mean,~0.5 degree (~55 km),drought,1/1/1998,12/31/2018 \ No newline at end of file From 077913ef520b18ce5d7b0b4a2ff285eb36804c17 Mon Sep 17 00:00:00 2001 From: gabe-levin Date: Wed, 4 Mar 2026 14:16:24 +0100 Subject: [PATCH 2/2] fix: stac items to match new metadata for GeoE3 layers --- .../space2stats-collection/collection.json | 18 ++++++------------ .../drought/drought.json | 2 +- .../space2stats-collection/fires/fires.json | 16 +++++++++------- .../landslide_susceptibility.json | 8 +++++--- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/collection.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/collection.json index 3350ccdf..f9b2e3bd 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/collection.json +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/collection.json @@ -46,12 +46,6 @@ "type": "application/json", "title": "Built area" }, - { - "rel": "item", - "href": "./landslide_susceptibility/landslide_susceptibility.json", - "type": "application/json", - "title": "Landslide Susceptibility Index" - }, { "rel": "item", "href": "./world_pop/world_pop.json", @@ -64,12 +58,6 @@ "type": "application/json", "title": "Tropical Cyclone Frequency" }, - { - "rel": "item", - "href": "./cyclones/cyclones.json", - "type": "application/json", - "title": "Tropical Cyclone Frequency" - }, { "rel": "item", "href": "./drought/drought.json", @@ -81,6 +69,12 @@ "href": "./fires/fires.json", "type": "application/json", "title": "Fire Density" + }, + { + "rel": "item", + "href": "./landslide_susceptibility/landslide_susceptibility.json", + "type": "application/json", + "title": "Landslide Susceptibility Index" } ], "Title": "Space2Stats Database", diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/drought/drought.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/drought/drought.json index 405918d8..fa85b098 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/drought/drought.json +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/drought/drought.json @@ -48,7 +48,7 @@ "method": "mean", "resolution": "~0.5 degree (~55 km)", "themes": "Drought", - "start_datetime": "2002-01-01T00:00:00Z", + "start_datetime": "1998-01-01T00:00:00Z", "end_datetime": "2018-12-31T00:00:00Z", "table:columns": [ { diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/fires/fires.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/fires/fires.json index c2cb5513..89ab2f53 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/fires/fires.json +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/fires/fires.json @@ -41,13 +41,15 @@ ], "properties": { "name": "Fire Density", - "description": "Average global fire-event density (expected fire events per 0.1° pixel per year; values scaled ×100), averaged over 1997–2010.\n", - "methodological_notes": "UNEP/GRID-Europe aggregated monthly World Fire Atlas (ESA-ESRIN) fire detections into a global mean fire-density raster for 1997–2010.", - "source_data": "World Fire Atlas (WFA, ESA-ESRIN) monthly products (modified algorithm 1), compiled and processed by UNEP/GRID-Europe into global fire density (1997–2010).", - "sci:citation": "UNEP/GRID-Geneva/Europe. Fires density 1997–2010 (derived from ESA-ESRIN World Fire Atlas).", - "method": "sum", - "resolution": "0.1 decimal degree (~11 km)\n", + "description": "Fire density (number of fires/km²)", + "methodological_notes": "count of fires per km2 occurred into pixels ", + "source_data": "NASA MODIS Fire data (https://firms.modaps.eosdis.nasa.gov/active_fire/ )", + "sci:citation": "L Giglio, W Schroeder, CO Justice. Remote sensing of environment 178, 31-41, 2016. 1625, 2016. The Collection 6 MODIS burned area mapping algorithm and product.", + "method": "mean", + "resolution": "0.1 deg", "themes": "Fires", + "start_datetime": "2003-01-01T00:00:00Z", + "end_datetime": "2025-12-31T00:00:00Z", "table:columns": [ { "name": "hex_id", @@ -60,7 +62,7 @@ "type": "float64" } ], - "datetime": "2026-02-25T20:20:37.073528Z" + "datetime": null }, "links": [ { diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/landslide_susceptibility/landslide_susceptibility.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/landslide_susceptibility/landslide_susceptibility.json index a7003d6f..3dff7f0e 100644 --- a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/landslide_susceptibility/landslide_susceptibility.json +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/landslide_susceptibility/landslide_susceptibility.json @@ -41,13 +41,15 @@ ], "properties": { "name": "Landslide Susceptibility Index", - "description": "Global landslide susceptibility raster (relative index) intended for regional screening and landslide nowcasting workflows. ", + "description": "Global landslide susceptibility raster (relative index) intended for regional screening and landslide nowcasting workflows.", "methodological_notes": "Derived from a heuristic/fuzzy overlay of static factors including terrain slope, geology, proximity to faults and roads, and forest loss.", "source_data": "NASA GPM Landslides Program — Global Landslide Susceptibility Map (GeoTIFF). (https://gpm.nasa.gov/sites/default/files/downloads/global-landslide-susceptibility-map-2-27-23.tif)", - "sci:citation": "Stanley, T., & Kirschbaum, D. B. (2017). A heuristic approach to global landslide susceptibility mapping. Natural Hazards, 87(1), 145–164. doi:10.1007/s11069-017-2757-y ", + "sci:citation": "Stanley, T., & Kirschbaum, D. B. (2017). A heuristic approach to global landslide susceptibility mapping. Natural Hazards, 87(1), 145–164. doi:10.1007/s11069-017-2757-y", "method": "mean", "resolution": "30 arc-seconds (~1km)", "themes": "Landslides", + "start_datetime": "2017-01-01T00:00:00Z", + "end_datetime": "2017-12-31T00:00:00Z", "table:columns": [ { "name": "hex_id", @@ -60,7 +62,7 @@ "type": "float64" } ], - "datetime": "2026-02-08T18:21:53.658868Z" + "datetime": null }, "links": [ {