From e1a01d48f4c7bd953330fef0b490752e10fe7585 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov <35913079+alexey-zakharenkov@users.noreply.github.com> Date: Tue, 9 Feb 2021 10:09:15 +0300 Subject: [PATCH 1/5] land_area and coastline_length estimation for regions, and improved mwm size prediction model - with coastlines, land only area and broader region sizes (up to provinces) --- web/app/config.py | 19 ++- web/app/data/model_with_coastline.pkl | Bin 0 -> 24029 bytes web/app/data/scaler_with_coastline.pkl | Bin 0 -> 656 bytes web/app/mwm_size_predictor.py | 3 +- web/app/subregions.py | 204 ++++++++++++++++--------- 5 files changed, 148 insertions(+), 78 deletions(-) create mode 100644 web/app/data/model_with_coastline.pkl create mode 100644 web/app/data/scaler_with_coastline.pkl diff --git a/web/app/config.py b/web/app/config.py index 7377eed..dd3912d 100644 --- a/web/app/config.py +++ b/web/app/config.py @@ -12,6 +12,10 @@ OSM_PLACES_TABLE = 'osm_places' # transit table for autosplitting results AUTOSPLIT_TABLE = 'splitting' +# table with land polygons (i.e. without ocean), split into smaller overlapping pieces +LAND_POLYGONS_TABLE = 'coasts2' +# coastline split into smaller chunks +COASTLINE_TABLE = 'coastlines' # tables with borders for reference OTHER_TABLES = { #'old': 'old_borders' @@ -30,12 +34,13 @@ DAEMON_LOG_PATH = '/var/log/borders-daemon.log' # mwm size threshold in Kb MWM_SIZE_THRESHOLD = 70*1024 -# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X -MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl' -MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl' +# Estimated mwm size is predicted by the 'model*.pkl' with 'scaler*.pkl' for X +MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model_with_coastline.pkl' +MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler_with_coastline.pkl' MWM_SIZE_PREDICTION_MODEL_LIMITATIONS = { - 'area': 5500 * 1.5, - 'urban_pop': 3500000 * 1.5, - 'city_cnt': 32 * 1.5, - 'hamlet_cnt': 2120 * 1.5 + 'land_area': 700_000, + 'city_pop': 32_000_000, + 'city_cnt': 1_200, + 'hamlet_cnt': 40_000, + 'coastline_length': 25_000, } diff --git a/web/app/data/model_with_coastline.pkl b/web/app/data/model_with_coastline.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4fde43d5d775e390044cdbe62a361fad2cbd82c3 GIT binary patch literal 24029 zcmdtKc{o;G|Mzd6LS#s#giHy|D%IMkG^!{`Nyf<3X{eNtP)f*98q7oH%rnbrI7O6< zQ5iB;6d6*)v%2nUuk-%tcOCcr{r>eFkK=f?+vk4o&syuV=C#+}Zp_B^mR6>Qb~cLk z7p)ZyjI9jq?M>~uv~{$&C=7DfDU8=BOtQLc3=9mGrgk=_RupDkW^`g_WJY26>t_?w zb9Sbt6xIXGx~%BJIYVn}LkgQN3;MsIqk}Dl{jZ;mZB5PAQaCiwuYdjhWyHbOio&S@ zju;(k7)_YbWkgrhDO~#rwS^42Oz6bMk+Oh(2Rr)3^n$&Gm8}hh`%vBjT~73ay}6x* zjirUnISS7$-391}3wE|fhDH`v77k7nUT>Q09xG0PNrQ)(N`PWs7$N>gV$`W-(Bg)d7 zlx12JQP~5Gzkgc(x6s7F>4GU2WyN34GuuKD^S*VHvQmp8E{ihfw|BQwBs4WO|4Pqa z{~Ta&phy~dt20nmIZ&jG{>o2-f90UG2B?1c`|r_T-mrJPaKYBj!GI!TzS#V)c*vTg zNXThXR-;H9%477T$eaHwV2h2EHNOK^(4r`!fX)95Qpw0$tqT>Y-%gepP}p@q;voJO2Om?LZvj);qhYY10_8{#Iy z2XPgbi06oEL=EBvq7Ko7XhXCk zIuJdGLBts12jUmvw*wdvEC_Z42Z9^Hi{L|uB95GxT92uZ{$gcL#sA%|FlP(&yp z)*{v;HX^nnb|L^`7vc!wDB=|2EW#9FhA>B1AkHH!5mpFmgbl(LaREU=1R%l@R74q~ z8PSR8LVQBZAUIHl@*@@_ln@&bn-R*0eTYK{ErcGz0AYl%M%+O7BZ3iOh^L5Wh)P5Q zq7~7Gc!zk8=s9RU&Lh*yXvL_6XmVgxaZU_=F)8?geh191)Efw+aZ ziwHx+BMK3(5ekU_{IHXu|G06`#B z5qgMo2y?`Fgbl(CaRYG&;e+r)1Rw$tQHXd%5+WIqhsZ}1AW9IWh;l?7;x(cL@dnY3 zc#G&p^dm+QzYrX#N8m*8Ab1gCh?NL&gakqou?iuDkVeQLWD# zG$0xgO^9Yh3!)YA2GNG-M0`Xrq7Ix3!HZaokU*?PY(Z>8Y)9-s>_h+rflx>6L+nQ! zMjS;PL+Bt*AWkAI5a$s#2wTJj1O;J-utzu`91#~0mk^f`PKcX`TZr3;1Vk;O9?^*S zfEYjsqv6C>gc-sW;f}bC$VO1sI#AXb>GGl0h3r2sni@OU+SwaW)|36{hTr|@MlH&w zfAymWSiLBl&BeXdN_U|I=qL^<`Sk(4L5kAkEHiPi`#_GTXBXH$92ef4Gz(m1dlm(T zm%)aiGm2AYZ-I><<*kC~6!;MP(zH?REqGm@@yKDEhah!{#fz3q1b2e*tYc+!YiEF% zUeTg{k@w(ec&=NmF$>ml#szR|8BYT9v84&Z5ry!uV^Zge{0jJfsL$iQ7!8v4nBotL zs(Lmy6O&tDP^j+!Hq@Op0A8d49C$fka*7eLURi<;T^oyIK<@x z5UpIR*7->g>uwuQE?U`K3G360Pdd6L;&9(wG`*~^261{_EhRs&P-w86PwP-r<`b-2 zcRBi^@tZQNGl*T6x9VLwEb{(60#a5z%2KDS)-M-h}!1maG z1-hgztHe6(2b`MviZs~Jx@ecdjX>DUUGMpWRtf`3sZWjF;;>HVD1-2V&`JoEyF(+j z9%EfQ*KF6gVldXp3yW>?sieWLFWhI@$}@1dN7ix##J0siG9CWAUukP6(ctX?6-zfO zcbtwlU8V1f(xR|Er~c+{_6MbK*X6ed)jvAJi$hg!Il~@+<+=6-HJ{3{&bv0Ep7wwS zNAE?L)a2g9@ynZP65nrHjdiQIPdHe3RziV|&jO=YUBcmBEec*VQ&5X_*PZ0ADt$tJ zPPEt~xFZRt!yv%5Ot<|p)*bBDI+Ht933Zig?JqT4!8(tK<6GHP>Gl!~hRwrr%i)!$ z$4wXSiNw18gP!Yc?v!AAWf$)8u5F;fw0%uqxeuFQ-TdvUI;(8t-jMzgq=0le;Z+eB(g% zd#|m#lAc59#~ZwwFVOwGUFOr*Qv)=3a!)Z~{)k>qPmN{Rhy<77aK$ryeK&dC14-ORvvV&2j)A*7Njb)0#D_9hUZ`B zux@OCVIlj95Gd6byo9~>7`PnXAEi@N27*f?4y<|Fhjl_Jn}<6s%HW&DKVNI_&4Sr` zKc0wB$cH1+QyZ@;dt;rI=b?n#FY{oFtd&h@YYiaFRWkRL&U1%9W1V~7Lg5V|fk0u6 z@^flII%E$u=vbVV4Z;ovr^q(eV4Z{QqWk_EA>11s5#+cb4U}G5AERHH4$EhSnp_y( zVcnbWxW%4@-M1jw zP8#$rVp^d~_j8=#_$~Q6AAtxZ`^bcM2@EC9Sz8%=27}ed9W8HGLbARI%JrTW=b*vj zgvrtGdiB7-$EozB*+=mF`eubCn!`ALPMd7>^S*R}cMtr;H?B;8g-biiCr%9kh94Di ze#?uo&M>pF_|^9!$oy?n*H*zGkfn5fN{{FW8&7jmTT}##o(En%KF2xbmAD8}VwhrovfXLE`G#!4ze0G2+88 z!;RxhR}xai} z$3}GbY3$SP7zY9mBELFc9s>G-Bkfg-If;eCYtQtb7a)$l3p=4V(GO}uEFZlpWgv>B zCF@H1N3lJhtNzb6uHqmLck26V25=G|i{@l*om@%eJQaS(YV{a?UOBaUoM9S-GVS~E z?j6K_R?Ug`*3TOP;&)43({8^7KksfB-Qqe8`1*_U^)8Kq(RWHG7aVT^k=E*^>I_Uo zTI<-D;r@?+`}^^Tstf_bupwoOa6bcafBXbQbk`H2eG(*}^?o^CB}}y1oC#}>5F!$- zoGqIsIf!CLpG(n+OhicGi>JrN`@uTP+*_(I7Z8Tqf=ZtxG2wnSdUgRv-8v@X{)D$x zp2!ez@#*Rc+09Gz%ni;CbFdM&zs?v=$JBv=UlYTdit}OW&veT+mIm;pX>rCal}6AQ zBY5BPdljIZ7PWoF^BOGX+$OZtn+o06y)yH93Zb3;q1qd@u8_@q!a3kwIrd*>ze(xt z=}dUvL+3}FVJQ5@UN&=dDus zvR!7snP&qa+dn~L75?f7)Xs5Bgo%oE0kYi<_6v6JsDA>iE`;htM$~|Q(_JYneKata zlIr{HbS!XJe0BKTyA)vSZ}7`IE)G`hli^Fc)C*XSyM>QyMgZqbRbJ<3mB1lCd2yYM zC%91miuYY-93aO{+b^DC^=In>{zshmA3Y0U{bKu`g!Mz<(4B~P{Kw;=R!3~`?nRTp z>*5dNi7&o@Y&S05$(>PWaDfte$1byR6oILiFD`H#{{qy1W*x|m>IbhnpEma#EeDZ) z8Y|>=UV|4~f)CFsg}`{*w0!ZaJz#R9;JT27G;B{{QG0QwZ64HLOiWs>|AE_knSqkh zwH#kTvP$4e=VO9ItfjByG*dfRQ}1iT+K~^)_Ak6C&}em3H@K_6DMrem0h|ih#4)Fn z4Ep$<=*tj97! zv7}3Z#(~=d$yg_eZEVKC?ZutADfL=F`uT~2g8Qz#L9qI3%PH=PUf{)-J<9to2hL1t zq`ptdz~zegCA}w!I~ShxCJgzJ^_j1{MT08&xi8ftOk!aHiJ)n{2lI3 z+;RCM%Yn_ok_PS18F-wd{k7-*kNbH*ZAr$D?FB{f;iOcwq<0=gK845d90W?k*jlT~*DPcKTYQn|X&)-9&e$1v z@wkcQ^QvvZT}40PmuBGcB=^@b{;ZA)@U3H#RY#bg81PFxR{gmE#*_#>F7D0(v7=pirP6I+@9?48 z-C1vOI*fvHJ7Y4VL6bvx%S>zmu1A|ao-g;%h{N$)OwBl8cia#9vkabp%n0FwL*Kik z%A#@qQQP;SBY{63x_=EY+<71l@>Ga^cN6)7^Fx@~@T2I0IWR`4b)1e9Acpz2tXT9l z3)cG6Hk?|@MUef%wd5NwK6kyw@v~Slb|6DA2d;3xIgm&F2*~kY7Mr8*uBd9@P-Ygn z_@^W>yY;D=+H^KFSt7JJ{w)(hw&UmS3^!VFwu8fGi6c&IJBhtVy{3=BMgW_fgq3>p z@%X;GW4B1phf28qUS|PEQY3tJ$jKqdwH*A+o$8b-@4)qgdBMid@w%0u?)YZDf}MiI z8IK0nrTePj>yCo zaU)nwSxdY7dM$CBf21X%{t@60T{Z#6Sg>EHR-)ntGR>fyqVnRyigkq6tM4jF?2#b% zr1q+u6(hJ_4#~eXj9$q{P+Pt2rWUdjwo~Wg8>Q30k(As|dqWurvY)=dU{lJy^fQnY zTeLJuc_UG*WGeKEp#^jYX?CXF{SHp^Cc-KHPLRCM_^IX>CSvEiq{Di?OvLBoh9>V{ z2LlaOcemgyy8o72%Otss=RxyzCLDQBU*mo?GQhw!3o6 zCl`LSE5vcCG*q`_GF z_?`^cweM$B?luo>ZzM(N$wi(&b%k2FsZG>c9KWH$Y!1r)KXoqU8}$5nu3;US4y*U7 zzM=0Mar}JGuW;i(k%x6;xWVl3p7pwJtg{_fZg}4F4C{EbS=*Ohr;q>0bbK2i*q+$j zfMmXrxGd}kjh6A5Z?;xJ`SGlp36?lq-)3)Y(z&Tq z0aZrbUGh8l3CSnTx0upskT5Ub?Vsj`+qLQ2o<1y&74WidfI^Vy7o3jp+1U)OyAe1Y zR>PIP=KgiK9C+V%f289*};~k#k>V z^|1Oth!&ga?Ns-UI2=$cE-CfJ@SQ_i}s&-8pF{Y-H- z>W^+n!|BK@l5L+#D)}S5N+OT>WKv6TxIL{C-KG_lQ2&y6rEm8Hj-N@W?K9q!8Q9*F z{gRD~HrM0wV)?0_Tz`**=L2=veUD94Z z0H9-)$Qn8dZ*OUdIic%qc})Bflns!G6B9=n%+Jv-+;%@hZ^@V z6dkAQC^mId>bv*|qf~8}{8@nTB=Yvh+6xd$u4BnvlV!juTJ-2?Awi-o>hYHCtqjBg zQCn6c{@0+I!_WV1 z-=FkF@rd=QkZXu2k@^1i zTK2}rc%0o4Rl6!gst=e-v4y>^=>oTpO+P$zU<_0r`K4Rr-Gb*wWcwF)S(A;0V-grY z<5SysY!;l!5!6}|Q;FwgwO?PSKGORMHgBBX)701kI#gAD?M(g&%tBM#8<|=Fxt^o= z=~z-lE)_~U7#!3F<6yO2zvS2SAuzqm$Zx`zir3}I`sVKNL{+a`gitqjw+>Vv25Q<1 zr3RN3;rw7;=)`QF63tN%L-1~llFDe=LPV@8wa{kt*5H?d`y9Vg@m|t1mKMZ)? z*j=X{O#p4*C!1IgHRE*HQ+a4QyTYJe$*r50(t1Eg;TgF`_+Ex&{+@W)FYtZGFQ7ee>HgMCG}H6p+K z3$RFWT(go^ipP@+^p5j|gwy*E>Y^(b9G9~Z)+*d`+ZMjT^F}GQV2}KDWl$#|xrQrm z0O*}g4I5RM1fm8@&*-0S!0DYozArl(eCoi>O+sYQJI0LBjAIhlmkD_CX)i;FMx<8`=`w8Wy11788lsopbDmxh7R zbx(?k#A}=mFS)yK1JAqxTa=5}DQ#mQ21Ism+=9lrRQ;W2Lp9O-;_vlDvOJURbFJPE z%Qv@e;mN1nhV^+Rcs@fucbR_N*WY!w6uPE9`*h?@0gM>9bMJvA-5xnkB<*FIz2TRh zD1^oXOWZO(LmVzS-pFhBx%9@Q0NUG>-Ms8s3eVpZve_?Diq~1maJ57^F(l%`XS?~7F|c$(@ZpsV<)MQyZ&KNWAn zlNN!x>T!SS$aEa86Z19U=mAq-LWDiO7U6I;4@=)vRe1$qXqZ}$QZ~GnwIj15(+z0k zSI3sy{%J4buJ30WdLD8@;AMuby%doUCAh3Hx)eWOAjj=wxR2}JChYfb1-ryM)Vu$G z(xH@P^G1t}3PU|U*!nd+1TJIY@u5MwWxsLkDc` z^k#rM%iC=yA2fk4dV#(bEPvYDZ8uvqsZ$Dtx7k!kJf{MIdKdO1i%PIxNI#R|z7Uo9 z%pFz_eq5Puv^)LJx=*tD?{`hU11t5qziGb-#eNZ4>sS*%SOy&{_v^oPErIWs{N&a@ zp9US)^FA>_&vVFfFh5+ra#8lH`Y(XDnXiQC7P@Z!`0vhwU>UdZ2f)VexpJI5T}RqG zFMg}C@)i}c$mp@MEUN+4rC(mISEK95@~7Lnl#4b8;YzCjhUlo*KtB2-hshGfmdIF!zDJ$%kh~wz2O(C z^9oSKwUIW0+6~e#WO*jT<+oQ(66LM{raEQ5jgS8H^Qpbj;j0-Q!Ug-5Y|23S>wI#f zBIj0myEfl{JQ~}mWf4C>__F=lUQ_gXRPNXEP~grmaJ9H|VjpxHoT zrwKiNq&=Ct@k>p(^I_Q{o4O(QLQrDB>}J+RZ(sPhq|TjfZpG=9H<*NdDNo_qL#B^* z`qAx?Q~VR_IWQ}P^X)P!zI_{r;fHCPfaR>Vp{J9 zd>40tZyt?~ssue9WO~VT%!FxazSnvK#CUe4udk;+za({g5-zTh6^?>MyXSUn8Bd2( zKi^Q+dgS16d#!?ct}|DGzy}JU6(tcM*Nxzk;CT!}i<7yM!~Qwkw>KS!eFdpNLuT?+ zmJMAu-_I(m7H;n{3j_MQb`8xK(RHMs$#fiaQICkQVk6f3NN-M)E5Pw1pL5!#To2T+ zP6Epg$%wC_Wx@MO0ax$cr`sdvp`^VO0r8My%eE3b!=mq``D`QB1e}#0U9yhYv-2!p z@&!I(P=c-Ec<<=2u!YNfJTSW#RlQ#s$RBT$#<`x1|ZT zdIyPpK0?ItEl0|R?@xpD3!{ZkhZhsa5@*ed9KM2&8sbq|N{)D4R8*o_KQ*ZgDjoZ1 z6W&=0Ue|fmte6}D2aaExHv5nT$mhT7-(~HrxkAP5uK(K2=j6s{(7*em)({#Wlj~;G zfW1MS6ZH0;oDZ_DUvqP92*mR|GCwA7WZdJ53V;G{F2_11{rpr zN9FYOCVS?K+Xh#~fW3!4q%0ju1LQh2`P?N*d%N<)(JaVzUSWK72@PIfGG$T|1abVx zbqg}zn)D6?Y`ZxNHnTY-2v6R}`*_~&yu~4Zvk0g6VmsG&op2g#y6wwu!%xG0A=isZ zd-alwk6qoI0l6M+<=eFNJ|Ndw$^M+wZP;#g#_LKu7?wx~WiN=q>39@#{iO{Xecwp> zN2kD4G;b4Vl`&-Sj|b$sqlfoh0qY~7IDYmX2M)3i%>qi;G8SK#N<3ePUvf9~q4{Ij zmarmxPqQ1`k+ye3l2Hwap+2ho;Y7vj=#|o?5f*5?N5*d`%)V`pKrWmXJSWJ+){DcP z|GYLLXe_;@x1JfA%5$)V8!R#lt!=E_#W zo1EVQi0xmEFB=VAFA;<#~Gr>{?Ou_9%)k=|A$h3+cZutK?_# ziK)<$rrmwKARd>OqKH8Tmi6>?mB?h(rAk>9Fpk(N^&#jz){)Nxd}s|Z&qS(0QC>#L zuZkwH+R@%B@_QC;4@Qiu2Uagmgl{eVEhIF1K)T$bx3WP?3FcQd@ymD8+dp!g!>{?; z$l!A*xL}jJqqrvzjx+SDWVK}ha=zuZNr2~<_AQ)mw5`4gx79NN*UJ{$vOSKtTphRT zS0OW1ek;7x22k&v!C>TJH)Vr|Q@M@8w8v@ch^yUtk#|mKBUg z@yCLJH<#0B0WX1lDZ4_$XcV@0BW$7Gpg0Y_vP|aO+7N^D!|%cE9cjm&VI5fx?p}Sw zBUxMYM|;IN-8C(6D;qpG=wlD^X;3HM>z>|*CY*1AJB5^^rT#2e;`+<7Ehp&ZitH~J z$36@-R}P1#o=LCw;~xjo$s48`B1ZA^Rm*o8Y*m)@@otZk*CAugF>sMD$+!ePcX2Tr z5$-Yi1m^EwQ~7>X$u5WQYYS4S$Eyh?3+uNVJ(#i`#&?i?u zrY#kaxjIiJx62Ff_nE(bW%44B*FJ6x^qraReUY1kpW7e0HJ#A&=zp#EvZ@`=i zo+LFK-<9l*(?PCJx?k*_(JCke%xdDErEfEUn^b*Uze0h_pH><4VA-(M@Bre4!^|xD!C7UjGupG-MgZc9GLXoAb77`2p%8I zUw^TnWBkWF&yHwO7xCBq6llZJCIs9NwQ(anA7A{Z%Y%&mg+c1jIqsDwR#IglgnL0BXGZsQ}+(eFJYYaLn$~A9-t&&e? z1-6WXI*n^14ZZYnJK4UpJ8ADbKr4Y$mTHRxU-1x<&rjZ6I6MZJZn(D9&xjN2UiaS0 zc*u#vz1=GBH{HokOiC@vAA7-0G>11b`W#w8guI;6YYAIMNZl_x#`onbFmDgFDErb2 zHf?oVx_@p0#9fqW@>|19+#8U62v_kFO+7AD%K%0qZSlo1XK#6;xXnXuTQLLCer+sN zp;L-bzofpz*kuI{SLV3RvcMPL!BoT2xwE0)KL%Jrece)`Y*5gp3`svGoi+;ksX@ag8i~|uY`iX))3Z- zM?JD_I>kqL7)cv`I{yM(R&s2hrZW;BUAGHK-|fXZ{;OvUcSL^$ol_F}XX87-LH6x# zD&{Q2&F!^i+!lN|y|mZ6&#*{;04pvoIF=?=3z~}Cn)RQu5XZ_kK75hdhwVX`4hhG+ zNpMq@rIF&?gwp{;6Ia9<)BWw9S)izor4X--7hZjq;1<*f|&oQ?AE@;?3P)Jg54tVlyZ4Qq4(4%6kD$ z$A{k5rz@idfaC_T&(!=b)IaDx5J1n{`D|`Yzu1k+!{6@*tSnjd*>Q-8xMOE}<(bk5 z-ak1Qqd!%BO&aThw@qfMPc?%wR;s1hi)J9(-1m*`=M3;r`os|Zh7Id(uFsatPizIJ z%EXJ$pnYg$Iat2eoHJ!+A=brIq$U#LSs;D4wr-kqF(k*6M6h z{telnp2NCY^!?9!B~;8cBiJ=q<*(RE_sjhC=M}B(P zIesSN$9%m%T-!Ate0tlo*Ys929*@soKP?sMJ^G_R8XDCunVl|A$ImN#ww5nabIHcx zs&)u#mA7ZX&V(Nh7P}H4IZr0{zmVbHc51ifHqQkQGMC>SKah_1i|)N2B-XzVJ@@## zJXeN1@T$1x0e5ZoS;{|`gTs}4am?jMSqly~GC)1}RlGkqJ0esOG@g&gf8>4;(%yjD zTG=%&@j&DT?@Nal36R|PYM#Nx>^JO>!yWfBJi1pg8m!~0WW01L0g~hU`R|<=KjJw$ zXqF6KotDo}x{{3do2?&nHZigE$ML(Ka(6+^x+EZ_O7)7;Nx-`K>kOxL?96XlG=bcn z4M!U)vhcntnYmq>+Xc#S{8r6*&2^r4g$xDidumsuV848fC*&K8B5}Au$C!;b_1uAG z?{g*BKaRuu$g+=6)%-(ZurBzKiTOvvd_c7Sbble}gVQ^IJ;LA_ZL&T-3Je}xS!28? z59eEfW2}jWVH%F#D=Ghm6cr2LRqS)v`ePi<4<`+_vazvDth=!wW_Pqp6_Ea1|HErh z8m_Pr~Ch z<5s(1$z}BY_r4c5#K?b2hi4owb(@aH;Bqj2I+- z9~3#JVSSuYavs z;RnCA6$ZIB#$*4jvGy>pDlEW$iM=^FX((L=rM_>`6KBc9<=*^;Nt($M`uM;&efoZ( zC3;^@kCW4IFdUbIeSH=2J2d*Sy@A>Cu5Z>^aKtxUh$fYd_b0}S+!PWLj==FdA)}+u z-uf7hU%B3!;+%@Zoj>2-!8gMk$XNi{ckPjx9f-pIGu%BEWn=#xhkMA!X}5V<8657H z+*!RY9=DTot}IHFS2I|5$7C+}LvAjt{qWeBVNVqH-%_^@;?BkntdowOdoUbQ2IV7I zk6bQK#o^9>U#Rx1NK#K-ITYn#7`-Q#iv99xh9Nh7bu_lOe(TALwGrh|?!ciPdyb}K zzl@IyM(!W;gJgN0c@iupostLlB#y4;XRpHRdJi)gI@g`2kFTzz-h6uf!3Yo(|2!nN zkKV7&?=RndjQDjSHw`?!e@(2!oUSAHuaV)d_E;X0x4RS;u5orgYmkEdH~;;-%YFhW zvnge;=Vo`GrkXt<-xpfc&TPB=Mg<6el=W$He+!U*A>W?qjN?i8iz~1A3)l>$1m@kiu%*uH#+tr`e`_Snd&s}58dd*_UxMsV+Msv;d@C1 zO=sc(_DdL}`dQuESy;Di@IHq??JFQCu~s=aIUA?r9fkYBJD(7ozd8;NWljV(f-HmW z7gEuFWpZ3cj>E|KZCYwte_AmU4wSL`^KoVZa$k7cz}H-+$!_4^@bQz7@)&TDkXw8t z1MMSu`pj|kcqv{7IkwKBar3Gs9KXfKyY{<%NC4I2H_n9HcH(t7Xy>%YCXznyQGDc_ z<#zHFc(alU&JT@(h1!~pvbIb_?9=$~fg!?pd@5D#?j(B!g6&POTZ&zqfsSa%c$32{ z5EowM_kAyYe7|5r9z%}ub8!EvRiovCFL*udgd~5}=o}Ze_tGiF@-)Z*XLMMWd9Ulo zy5*@~Qin7cux`-HtQDSk0n#|kgHOEb#ru&S&dMLC77)R@BM;{m7$2Gd4Y$@OX!!i| z_%u`1`&3j$G)#KchnemG^X*BFVaQ?cQ@d#=U+|)0%aCqS70WTtUmmK^7 zv5skld*#QKG^lh)Wmk$+GVW){^*S>96xhW-Y$iS03X+&=$30)a$NZ|ysk z(BM^7G!?BIlj{r_Je3vHQu_O4Ui^NY%VescjAYaJk+B4-(v~B7q_>!hEqD1MwzFudq@8G%n z`Jeso65)op&m$}E4}+@*j=8llr2vOX-UDYSb6`AI{mUbPXE=US%f<6gZ+Zc`LT>0v zu(|-D)p>*KmyClsk){*T!IfC|^Z839c7r}pcVgp7QMnSZ>cLpxr3d3cQI%JBmBf3j z+rd~~&@tEn+`@hQPWP7J`Z3_;b09j2-Vd%dX7Gv%Yyz^1r@SRsJi+hTko(}t^xjdn z-KGH0doC(W3-m8m;BuwcTP0f2-;Kll{>e-D_BRL&w~vU1J7nVZ$;q>AP765GsG{6_JRNX9_fPE zr~2lD`H+mC-jjT`;J|cn`&wb7cVs#q=Nzq!3#5wVV4csJN8+ZR3&BWXtKrwY3_PEi z-#!N{d3^fL=Uix=eBB^1y#V)b`~7x(G)MbV$Z(kt-_V^fe+`0^UAJ+s&BE>Y;=StL zRr2)pq3KT}_iN;{AXuwAs;-fN+wn*Pt~<1|sn}k6-JUhyj)y{y>e$E8U+Cj!a$h7F zZpKM*4Yh7hSZmHEDd8K7%ky=4+Tg4cef+#Xb#pkcZ44}sguL!Uiuk>W1Uc5QoNe^? zTq92TU6UM(0-61N$5*5G!^wFvnQvscc`Me0w0l4p>AJ))i#Z;*2lMywwEpT)F8=Nh z%2GDpkvkKJ+xuYiX0PRkUVtzSWzX}@ad7*W1X<7eArQOhSlq@HHGr+ydB>MWV}PM4 ztkxom-fuDb26iPr%><81?lm9p49EQ&`92~Uzq9MU*W1}P0)kmir*i!ryiUFO&VhJ# zqb@+|HfeS|`l2=l#7|t|8c3+d^^JDR%lN+4FxE-m4Gp@QI|_FBW}>`*WUGy=NoLih#npsdZlEwIK0|xVQS&PN4k3ygv3NeVt+c`T@7H z;h|pD47emZGo*_~U$-FFcS--%>o3-5^ht$0$IpE_N25QVnZJHuR^lwlav=>eY-_&k zu0_|8`@%_k2~YGr0yY)H3z?gAd1Z=ldq004sHDuHY2`XI*!6MU(lax3d*u58q&;qz zH-2Z5(x47Em#wcbT{nMyd{^qO1rm8T;7nfFm;BWwINbUB3RZ}+aBtt44`GB$X$Ko! zNA7zi!`+qZzWOP(0LEE537Kvz#`fl~Ta>=vsq^exEUeY)Xqo%BeJ%6XeYW$RTs%_V z2o9CnG|N9J!u5?D7rS^k7%WBK<#-i!@Wa&Fm!Lngs6gu9_6d^qSX+emZkB~mr*6>p zbkIZi$z79(BsX%_|q8-u^UfkN3ze)?qF?@ynoXi>KR{*D=wCuN7ZqPIFn9{P&Z zZzww*DBxc=n>ZR;8T|f)oB@R(Z&v+%vzit~{rAnkzd+_i+4a|VQ_$zujBFdPEPME! zsQB?}iOR1fRF!YKR-?1xR1^;eHxEVX8lN{8su!@STK7Hwl4Qz)e?Evv`3VX?MOI^8Ak@FZCy8e?{siTZbZ$m{=jzW-#0 zbJb1>)sa%`^L)*(OYPEy;w+akQ{As~8}9zWOtlJ3|2`czLAXAU)*O?ZDHTb)ma#;I zpL$8DO<;QB10h~9vbWi(rS#~w$=Z;zHb<~`H6Q^cT3-cKHH zgOXm^RezQ<-?PywoGZzU-}jT($>V$-wdWhZHVg6BH~4((Zq+up>33xS+_)~*_wf`GwfHAI(+OC+O<)Oy{mM7I!jla8P+{M zQhF~)>)BV;2_i`G@?fITa#c13&th&fF6u22?rYS2%c#_dQwodjV*gR?VqI2pP0Nz@ zRkJykiHW!gQw3HFE1W$uJ3ky%GTbj$>XueiO_s)N)ai-Y`=ONa=s1VckPvC-e|#MM za_Wd+<;?u>sOBT7_a$5B{`j4Y$A38D`2Gjo|2V#+y?bV(;&BS&1bIIHxXd8(&Izv( ze4Ru1+m=0H3jY}1{_97$ds_NS=iB)&&s7DT#Wdbzj{Mj6FO#JH;GGjug`x@X3xg&I z>C%(mmTR%9y2LaeW8C?Sw1fTFKG%F(;}o`!f1e+}fi^*F8b$CQug};2r}HyeGu9%P zR+9F}BlYk5R6Snua44*tA!eU#IhK$tN&b$H{;oRG^0q%qNqt<&!j$rjf0noZ5RX5? z)!SDcFxoDt`aQ4Oa|QcC>Ykm2_E>i$?@n6ab|-o%xbU7eW}RIoF_v6+0*O>7sgfvw&IRS8ldaKV%`(NfoeGa!mWiOrEPg zZBaR9KPaWz7gyJ{SD!)E+kI(ZgPN48x2{hPmu_Y0Gr~qYkRV;dPU`1hS4}bRa}aN-#^rXNzsURk+atFB{o=!m z>#W&SS-jkrBsI#;zn|2R$NAUipZ_P_f4V;3-v4vwl-&-LJ^z~eS=gX&a2uOmK%bu0 zAg6wNe^34PX;Jq7HTCnP96<9vbq?B&O70I=m{chT9VmzX{+9LcuYs#lG|3we|Gx2v z7Uk&g8_|cewa|^`=+oEQ|0Y+X93$n&f6H~WC@21w>;5f2Y4q2p#?34o41RwK+!1}w zQ||!7U(;NBbHfX!2EVURP93V?{{1!2(|?K4*T8W?^vM9u{0_iCi*okwgc$yR6Jq2* yG5%Lj2LCn#HZfQK>yx4ee}A~$0DZXJ-U59T-Sn^Tcdu1kzd><5#mv!2@&5p-J<0 z6hELgx#bvoQ4e0kL+)NgMEpR|vml6gkP4neX%9NPO;6%G4DgByz@#+w5pniUL^&-KnN{O#Kt`2=_6b*#3kIP<(t3DxI-($MfvR9YQ3MF#6 zT4D~S6xSu66JSmd+nGxit9wL(IO^dhP8OtQV;%NtkVvT!By#N>>?;%sOMiHUhbl1W zH}bI;2|nZn1NUH1gCP{n7`1okidi@k(2Z#@>xbxh9NeKJ~5LcIK0b+@sE~=OuJsDc<<9^jREL zE~j6AYGzlJj}x}@s&++zNd{9!xZfJ4Iwb93J485Y^ZaQIj&<{o+vD7JWphi|cwbw5 zy7Bepg`p?8^FKedrl6&~oxFNC@?bvq_|miXY-c(LCm5VGP#@n$uxp~jsa@k#U_7Hi zx@$Ze8sCa-i^;Kt-1o@JcJlW-<>HT#^jua^HZ$?Xtv4;D-g#&t)==OygE<2Ya$~*w Y*Yp~pzEg2Q4)e~+nFTomXR3z$7pENIpa1{> literal 0 HcmV?d00001 diff --git a/web/app/mwm_size_predictor.py b/web/app/mwm_size_predictor.py index c69a14b..8526879 100644 --- a/web/app/mwm_size_predictor.py +++ b/web/app/mwm_size_predictor.py @@ -6,7 +6,8 @@ class MwmSizePredictor: - factors = ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt',) + factors = ('city_pop', 'land_area', 'city_cnt', 'hamlet_cnt', + 'coastline_length') def __init__(self): with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f: diff --git a/web/app/subregions.py b/web/app/subregions.py index 5305530..db21dee 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -6,6 +6,8 @@ MWM_SIZE_PREDICTION_MODEL_LIMITATIONS, OSM_TABLE as osm_table, OSM_PLACES_TABLE as osm_places_table, + LAND_POLYGONS_TABLE as land_polygons_table, + COASTLINE_TABLE as coastline_table, ) from mwm_size_predictor import MwmSizePredictor @@ -19,8 +21,9 @@ def get_subregions_info(conn, region_id, region_table, :param next_level: admin level of subregions to find :return: dict {subregion_id => subregion data} including area and population info """ - subregions = _get_subregions_basic_info(conn, region_id, region_table, - next_level) + subregion_ids = _get_geometrical_subregion_ids(conn, region_id, + region_table, next_level) + subregions = _get_regions_basic_info(conn, subregion_ids) _add_mwm_size_estimation(conn, subregions, need_cities) keys = ('name', 'mwm_size_est') if need_cities: @@ -31,130 +34,191 @@ def get_subregions_info(conn, region_id, region_table, } -def _get_subregions_basic_info(conn, region_id, region_table, - next_level): - cursor = conn.cursor() +def _get_geometrical_subregion_ids(conn, region_id, region_table, next_level): region_id_column, region_geom_column = ( ('id', 'geom') if region_table == borders_table else ('osm_id', 'way') ) - cursor.execute(f""" - SELECT subreg.osm_id, subreg.name, - ST_Area(geography(subreg.way))/1.0E+6 area - FROM {region_table} reg, {osm_table} subreg - WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND - ST_Contains(reg.{region_geom_column}, subreg.way) - """, (region_id, next_level) - ) - subregions = {} - for rec in cursor: - subregion_data = { - 'osm_id': rec[0], - 'name': rec[1], - 'area': rec[2], - } - subregions[rec[0]] = subregion_data - return subregions + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT subreg.osm_id + FROM {region_table} reg, {osm_table} subreg + WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND + ST_Contains(reg.{region_geom_column}, subreg.way) + """, (region_id, next_level) + ) + return list(rec[0] for rec in cursor) + +def _get_regions_basic_info(conn, region_ids): + """Gets name, land_area for regions in OSM borders table""" + if not region_ids: + return {} -def _add_population_data(conn, subregions, need_cities): - """Adds population data only for subregions that are suitable + region_ids_str = ','.join(str(x) for x in region_ids) + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT reg.osm_id, reg.name, + ST_Area( + geography( + ST_Intersection( + reg.way, + ( + SELECT ST_Union(c.geom) + FROM {land_polygons_table} c + WHERE c.geom && reg.way + ) + ) + ) + ) / 1.0E+6 land_area + FROM {osm_table} reg + WHERE osm_id in ({region_ids_str}) + """ + ) + regions = {} + for osm_id, name, land_area in cursor: + region_data = { + 'osm_id': osm_id, + 'name': name, + 'land_area': land_area, + } + regions[osm_id] = region_data + return regions + + +def _add_population_data(conn, regions, need_cities): + """Adds population data only for regions that are suitable for mwm size estimation. """ - subregion_ids = [ - s_id for s_id, s_data in subregions.items() - if s_data['area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['area'] + region_ids = [ + s_id for s_id, s_data in regions.items() + if s_data['land_area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['land_area'] ] - if not subregion_ids: + if not region_ids: return - for subregion_id, data in subregions.items(): + for region_id, data in regions.items(): data.update({ - 'urban_pop': 0, + 'city_pop': 0, 'city_cnt': 0, 'hamlet_cnt': 0 }) if need_cities: data['cities'] = [] - subregion_ids_str = ','.join(str(x) for x in subregion_ids) + region_ids_str = ','.join(str(x) for x in region_ids) with conn.cursor() as cursor: cursor.execute(f""" SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place FROM {osm_table} b, {osm_places_table} p - WHERE b.osm_id IN ({subregion_ids_str}) + WHERE b.osm_id IN ({region_ids_str}) AND ST_Contains(b.way, p.center) """ ) - for subregion_id, place_name, place_population, place_type in cursor: - subregion_data = subregions[subregion_id] + for region_id, place_name, place_population, place_type in cursor: + region_data = regions[region_id] if place_type in ('city', 'town'): - subregion_data['city_cnt'] += 1 - subregion_data['urban_pop'] += place_population + region_data['city_cnt'] += 1 + region_data['city_pop'] += place_population if need_cities: - subregion_data['cities'].append({ + region_data['cities'].append({ 'name': place_name, 'population': place_population }) else: - subregion_data['hamlet_cnt'] += 1 + region_data['hamlet_cnt'] += 1 -def _add_mwm_size_estimation(conn, subregions, need_cities): - for subregion_data in subregions.values(): - subregion_data['mwm_size_est'] = None +def _add_coastline_length(conn, regions): + if not regions: + return - _add_population_data(conn, subregions, need_cities) + for r_data in regions.values(): + r_data['coastline_length'] = 0.0 - subregions_to_predict = [ + region_ids_str = ','.join(str(x) for x in regions.keys()) + + with conn.cursor() as cursor: + cursor.execute(f""" + WITH buffered_borders AS ( + -- 0.001 degree ~ 100 m - ocean buffer stripe to overcome difference + -- in coastline and borders + SELECT id, ST_Buffer(geom, 0.001) geom + FROM {borders_table} + WHERE id IN ({region_ids_str}) + ) + SELECT bb.id, + SUM( + ST_Length( + geography( + ST_Intersection( + bb.geom, + c.geom + ) + ) + ) + ) / 1e3 + FROM {coastline_table} c, buffered_borders as bb + WHERE c.geom && bb.geom + GROUP BY bb.id + """) + for b_id, coastline_length in cursor: + regions[b_id]['coastline_length'] = coastline_length + + +def _add_mwm_size_estimation(conn, regions, need_cities): + for region_data in regions.values(): + region_data['mwm_size_est'] = None + + _add_population_data(conn, regions, need_cities) + _add_coastline_length(conn, regions) + + #from pprint import pprint as pp + #pp(regions) + regions_to_predict = [ ( s_id, - [subregions[s_id][f] for f in MwmSizePredictor.factors] + [regions[s_id][f] for f in MwmSizePredictor.factors] ) - for s_id in sorted(subregions.keys()) - if all(subregions[s_id].get(f) is not None and - subregions[s_id][f] <= + for s_id in sorted(regions.keys()) + if all(regions[s_id].get(f) is not None and + regions[s_id][f] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS[f] - for f in MwmSizePredictor.factors) + for f in MwmSizePredictor.factors + if f in MWM_SIZE_PREDICTION_MODEL_LIMITATIONS.keys()) ] - if not subregions_to_predict: + if not regions_to_predict: return - feature_array = [x[1] for x in subregions_to_predict] + feature_array = [x[1] for x in regions_to_predict] predictions = MwmSizePredictor.predict(feature_array) - for subregion_id, mwm_size_prediction in zip( - (x[0] for x in subregions_to_predict), + for region_id, mwm_size_prediction in zip( + (x[0] for x in regions_to_predict), predictions ): - subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction + regions[region_id]['mwm_size_est'] = mwm_size_prediction def update_border_mwm_size_estimation(conn, border_id): + regions = _get_regions_basic_info(conn, [border_id]) + + if math.isnan(regions[border_id]['land_area']): + e = Exception(f"Area is NaN for border '{name}' ({border_id})") + raise e + + _add_mwm_size_estimation(conn, regions, need_cities=False) + mwm_size_est = regions[border_id].get('mwm_size_est') + # mwm_size_est may be None. Python's None is converted to NULL + # during %s substitution in execute(). with conn.cursor() as cursor: - cursor.execute(f""" - SELECT name, ST_Area(geography(geom))/1.0E+6 area - FROM {borders_table} - WHERE id = %s""", (border_id,)) - name, area = cursor.fetchone() - if math.isnan(area): - e = Exception(f"Area is NaN for border '{name}' ({border_id})") - raise e - border_data = { - 'area': area, - } - regions = {border_id: border_data} - _add_mwm_size_estimation(conn, regions, need_cities=False) - mwm_size_est = border_data.get('mwm_size_est') - # mwm_size_est may be None. Python's None is converted to NULL - # duging %s substitution in execute(). cursor.execute(f""" UPDATE {borders_table} SET mwm_size_est = %s WHERE id = %s """, (mwm_size_est, border_id,)) - conn.commit() + conn.commit() def is_administrative_region(conn, region_id): From 32844a62f248f552d601eb7d199e051808ad5a5e Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 9 Apr 2021 10:29:19 +0300 Subject: [PATCH 2/5] Metalevels in admin levels; country autodivision by metalevels and mwm size estimation --- .gitignore | 1 + web/app/auto_split.py | 95 +++---- web/app/borders_api.py | 58 +++- web/app/borders_api_utils.py | 24 +- web/app/countries_division.py | 502 ++++++++++++++++++--------------- web/app/countries_structure.py | 286 ++++++++++++++++--- web/app/simple_splitting.py | 186 ++++++++++++ web/app/static/borders.js | 46 ++- web/app/subregions.py | 131 +++++---- web/app/templates/index.html | 2 + 10 files changed, 947 insertions(+), 384 deletions(-) create mode 100644 web/app/simple_splitting.py diff --git a/.gitignore b/.gitignore index c266415..67a6a07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__ *.pyc .idea +.DS_Store nohup.out diff --git a/web/app/auto_split.py b/web/app/auto_split.py index 56d2adb..447cc4a 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -6,18 +6,19 @@ OSM_TABLE as osm_table, MWM_SIZE_THRESHOLD, ) -from subregions import get_subregions_info +from subregions import ( + get_regions_info, + get_subregions_info, +) class DisjointClusterUnion: """Disjoint set union implementation for administrative subregions.""" - def __init__(self, region_id, subregions, next_level, mwm_size_thr=None): + def __init__(self, subregions, mwm_size_thr=None): assert all(s_data['mwm_size_est'] is not None for s_data in subregions.values()) - self.region_id = region_id self.subregions = subregions - self.next_level = next_level self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD self.representatives = {sub_id: sub_id for sub_id in subregions} # A cluster is one or more subregions with common borders @@ -33,6 +34,22 @@ def __init__(self, region_id, subregions, next_level, mwm_size_thr=None): 'finished': False, # True if the cluster cannot be merged with another } + def try_collapse_into_one(self): + sum_mwm_size_est = sum(s_data['mwm_size_est'] + for s_data in self.subregions.values()) + if sum_mwm_size_est <= self.mwm_size_thr: + a_subregion_id = next(iter(self.subregions)) + self.clusters = {} + self.clusters[a_subregion_id] = { + 'representative': a_subregion_id, + 'subregion_ids': list(self.subregions.keys()), + 'mwm_size_est': sum_mwm_size_est, + 'finished': True + } + return True + else: + return False + def get_smallest_cluster(self): """Find minimal cluster.""" smallest_cluster_id = min( @@ -143,15 +160,14 @@ def calculate_common_border_matrix(conn, subregion_ids): return common_border_matrix -def find_golden_splitting(conn, border_id, next_level, mwm_size_thr): - subregions = get_subregions_info(conn, border_id, osm_table, - next_level, need_cities=True) - if not subregions: - return - if any(s_data['mwm_size_est'] is None for s_data in subregions.values()): +def combine_into_clusters(conn, regions, mwm_size_thr): + """Merge regions into clusters up to mwm_size_thr""" + + if any(s_data['mwm_size_est'] is None for s_data in regions.values()): return - dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr) + dcu = DisjointClusterUnion(regions, mwm_size_thr) + all_subregion_ids = dcu.get_all_subregion_ids() common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) @@ -172,57 +188,40 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr): return dcu -def get_union_sql(subregion_ids): - assert(len(subregion_ids) > 0) - if len(subregion_ids) == 1: - return f""" - SELECT way FROM {osm_table} WHERE osm_id={subregion_ids[0]} - """ - else: - return f""" - SELECT ST_Union( - ({get_union_sql(subregion_ids[0:1])}), - ({get_union_sql(subregion_ids[1:])}) - ) - """ +def split_region_at_admin_level(conn, region_id, next_level, mwm_size_thr): + subregions = get_subregions_info(conn, region_id, osm_table, next_level) + if not subregions: + return + dcu = combine_into_clusters(conn, subregions, mwm_size_thr) + save_splitting_to_db(conn, region_id, next_level, dcu) -def save_splitting_to_db(conn, dcu: DisjointClusterUnion): +def save_splitting_to_db(conn, region_id, next_level, dcu: DisjointClusterUnion): with conn.cursor() as cursor: # Remove previous splitting of the region cursor.execute(f""" DELETE FROM {autosplit_table} - WHERE osm_border_id = {dcu.region_id} + WHERE osm_border_id = {region_id} AND mwm_size_thr = {dcu.mwm_size_thr} - AND next_level = {dcu.next_level} + AND next_level = {next_level} """) - for cluster_id, data in dcu.clusters.items(): - subregion_ids = data['subregion_ids'] - subregion_ids_array_str = ( - '{' + ','.join(str(x) for x in subregion_ids) + '}' - ) - cluster_geometry_sql = get_union_sql(subregion_ids) + for cluster_id, cluster_data in dcu.clusters.items(): + subregion_ids = cluster_data['subregion_ids'] + subregion_ids_str = ','.join(str(x) for x in subregion_ids) + subregion_ids_array_str = '{' + subregion_ids_str + '}' cursor.execute(f""" INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, next_level, mwm_size_thr, mwm_size_est) VALUES ( - {dcu.region_id}, + {region_id}, '{subregion_ids_array_str}', - ({cluster_geometry_sql}), - {dcu.next_level}, + ( + SELECT ST_Union(way) FROM {osm_table} + WHERE osm_id IN ({subregion_ids_str}) + ), + {next_level}, {dcu.mwm_size_thr}, - {data['mwm_size_est']} + {cluster_data['mwm_size_est']} ) """) conn.commit() - - -def split_region(conn, region_id, next_level, mwm_size_thr): - dcu = find_golden_splitting(conn, region_id, next_level, mwm_size_thr) - if dcu is None: - return - save_splitting_to_db(conn, dcu) - - ## May need to debug - #from auto_split_debug import save_splitting_to_file - #save_splitting_to_file(conn, dcu) diff --git a/web/app/borders_api.py b/web/app/borders_api.py index cd04e34..047ae64 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -20,6 +20,7 @@ import config from borders_api_utils import * from countries_structure import ( + auto_divide_country, CountryStructureException, create_countries_initial_structure, ) @@ -28,6 +29,7 @@ borders_to_xml, lines_to_xml, ) +from simple_splitting import simple_split from subregions import ( get_child_region_ids, get_parent_region_id, @@ -233,15 +235,22 @@ def prepare_sql_search_string(string): @app.route('/search') def search(): query = request.args.get('q') - sql_search_string = prepare_sql_search_string(query) + # query may contain region id or a part of its name + try: + region_id = int(query) + search_value = region_id + is_id = True + except ValueError: + search_value = prepare_sql_search_string(query) + is_id = False with g.conn.cursor() as cursor: cursor.execute(f""" SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom) FROM {config.BORDERS_TABLE} - WHERE name ILIKE %s + WHERE {'id =' if is_id else 'name ILIKE'} %s ORDER BY (ST_Area(geography(geom))) - LIMIT 1""", (sql_search_string,) + LIMIT 1""", (search_value,) ) if cursor.rowcount > 0: rec = cursor.fetchone() @@ -249,6 +258,38 @@ def search(): return jsonify(status='not found') +@app.route('/simple_split') +@check_write_access +@validate_args_types(id=int) +def simple_split_endpoint(): + """Split into 2/4 parts with straight lines""" + region_id = int(request.args.get('id')) + with g.conn.cursor() as cursor: + cursor.execute(f""" + SELECT name, mwm_size_est + FROM {config.BORDERS_TABLE} + WHERE id = %s""", (region_id,)) + if cursor.rowcount == 0: + return jsonify(status=f"Region {region_id} not found") + name, mwm_size_est = cursor.fetchone() + if mwm_size_est is None: + mwm_size_est = update_border_mwm_size_estimation(g.conn, region_id) + if mwm_size_est is not None: + return jsonify(status='MWM size estimation was updated') + else: + return jsonify(status="Cannot esitmate region mwm size") + region = { + 'id': region_id, + 'name': name, + 'mwm_size_est': mwm_size_est, + } + + if simple_split(g.conn, region): + g.conn.commit() + return jsonify(status='ok') + return jsonify(status="Can't split region into parts") + + @app.route('/split') @check_write_access @validate_args_types(id=int) @@ -863,7 +904,7 @@ def export_poly(): borders_table = request.args.get('table') borders_table = config.OTHER_TABLES.get(borders_table, config.BORDERS_TABLE) - fetch_borders_args = {'table': borders_table, 'only_leaves': True} + fetch_borders_args = {'table': borders_table, 'only_leaves': False} if 'xmin' in request.args: # If one coordinate is given then others are also expected. @@ -994,6 +1035,15 @@ def border(): return jsonify(status='ok', geojson=borders[0]) +@app.route('/auto_divide_country') +@validate_args_types(id=int) +def auto_divide_country_endpoint(): + country_id = int(request.args.get('id')) + errors, warnings = auto_divide_country(g.conn, country_id) + if errors: + return jsonify(status='
'.join(errors[:3])) + return jsonify(status='ok', warnings=warnings[:10]) + @app.route('/start_over') def start_over(): try: diff --git a/web/app/borders_api_utils.py b/web/app/borders_api_utils.py index e152919..ea5f800 100644 --- a/web/app/borders_api_utils.py +++ b/web/app/borders_api_utils.py @@ -8,7 +8,7 @@ BORDERS_TABLE as borders_table, OSM_TABLE as osm_table, ) -from auto_split import split_region +from auto_split import split_region_at_admin_level from subregions import ( get_parent_region_id, get_region_country, @@ -70,6 +70,9 @@ def fetch_borders(**kwargs): for rec in cursor: region_id = rec[8] country_id, country_name = get_region_country(g.conn, region_id) + if country_id is None: + # This means region_id was deleted from the DB meanwhile. + continue props = { 'name': rec[0] or '', 'nodes': rec[2], 'modified': rec[3], 'disabled': rec[4], 'count_k': rec[5], 'comment': rec[6], @@ -152,7 +155,7 @@ def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr): """, splitting_sql_params ) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, mwm_size_thr) + split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr) cursor.execute(f""" SELECT subregion_ids[1], @@ -258,7 +261,7 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr): """, splitting_sql_params ) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, mwm_size_thr) + split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr) free_id = get_free_id() counter = 0 @@ -395,7 +398,7 @@ def find_potential_parents(region_id): return parents -def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'): +def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed', mwm_size_est=None): errors, warnings = [], [] with conn.cursor() as cursor: # Check if this id already in use @@ -407,21 +410,18 @@ def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'): return errors, warnings name_expr = f"'{name}'" if name else "name" - parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL" + parent_id_sql = None if parent_id == 'not_passed' else parent_id cursor.execute(f""" INSERT INTO {borders_table} - (id, geom, name, parent_id, modified, count_k) - SELECT osm_id, way, {name_expr}, {parent_id_expr}, now(), -1 + (id, geom, name, parent_id, modified, count_k, mwm_size_est) + SELECT osm_id, way, {name_expr}, %s, now(), -1, %s FROM {osm_table} WHERE osm_id = %s - """, (region_id,) + """, (parent_id_sql, mwm_size_est, region_id,) ) if parent_id == 'not_passed': assign_region_to_lowest_parent(conn, region_id) - try: - update_border_mwm_size_estimation(conn, region_id) - except Exception as e: - warnings.append(str(e)) + return errors, warnings diff --git a/web/app/countries_division.py b/web/app/countries_division.py index da64d89..ace19ed 100644 --- a/web/app/countries_division.py +++ b/web/app/countries_division.py @@ -1,243 +1,293 @@ import itertools - # admin_level => list of countries which should be initially divided at one admin level unilevel_countries = { - 2: [ - 'Afghanistan', - 'Albania', - 'Algeria', - 'Andorra', - 'Angola', - 'Antigua and Barbuda', - 'Armenia', - 'Australia', # need to be divided at level 4 but has many small islands of level 4 - 'Azerbaijan', # has 2 non-covering 3-level regions - 'Bahrain', - 'Barbados', - 'Belize', - 'Benin', - 'Bermuda', - 'Bhutan', - 'Botswana', - 'British Sovereign Base Areas', # ! include into Cyprus - 'British Virgin Islands', - 'Bulgaria', - 'Burkina Faso', - 'Burundi', - 'Cambodia', - 'Cameroon', - 'Cape Verde', - 'Central African Republic', - 'Chad', - 'Chile', - 'Colombia', - 'Comoros', - 'Congo-Brazzaville', # BUG whith autodivision at level 4 - 'Cook Islands', - 'Costa Rica', - 'Croatia', # next level = 6 - 'Cuba', - 'Cyprus', - "Côte d'Ivoire", - 'Democratic Republic of the Congo', - 'Djibouti', - 'Dominica', - 'Dominican Republic', - 'East Timor', - 'Ecuador', - 'Egypt', - 'El Salvador', - 'Equatorial Guinea', - 'Eritrea', - 'Estonia', - 'Eswatini', - 'Ethiopia', - 'Falkland Islands', - 'Faroe Islands', - 'Federated States of Micronesia', - 'Fiji', - 'Gabon', - 'Georgia', - 'Ghana', - 'Gibraltar', - 'Greenland', - 'Grenada', - 'Guatemala', - 'Guernsey', - 'Guinea', - 'Guinea-Bissau', - 'Guyana', - 'Haiti', - 'Honduras', - 'Iceland', - 'Indonesia', - 'Iran', - 'Iraq', - 'Isle of Man', - 'Israel', # ! don't forget to separate Jerusalem - 'Jamaica', - 'Jersey', - 'Jordan', - 'Kazakhstan', - 'Kenya', # ! level 3 doesn't cover the whole country - 'Kiribati', - 'Kosovo', - 'Kuwait', - 'Kyrgyzstan', - 'Laos', - 'Latvia', - 'Lebanon', - 'Liberia', - 'Libya', - 'Liechtenstein', - 'Lithuania', - 'Luxembourg', - 'Madagascar', - 'Malaysia', - 'Maldives', - 'Mali', - 'Malta', - 'Marshall Islands', - 'Martinique', - 'Mauritania', - 'Mauritius', - 'Mexico', - 'Moldova', - 'Monaco', - 'Mongolia', - 'Montenegro', - 'Montserrat', - 'Mozambique', - 'Myanmar', - 'Namibia', - 'Nauru', - 'Nicaragua', - 'Niger', - 'Nigeria', - 'Niue', - 'North Korea', - 'North Macedonia', - 'Oman', - 'Palau', - # ! 'Palestina' is not a country in OSM - need make an mwm - 'Panama', - 'Papua New Guinea', - 'Peru', # need split-merge - 'Philippines', # split at level 3 and merge or not merge - 'Qatar', - 'Romania', # need split-merge - 'Rwanda', - 'Saint Helena, Ascension and Tristan da Cunha', - 'Saint Kitts and Nevis', - 'Saint Lucia', - 'Saint Vincent and the Grenadines', - 'San Marino', - 'Samoa', - 'Saudi Arabia', - 'Senegal', - 'Seychelles', - 'Sierra Leone', - 'Singapore', - 'Slovakia', # ! split at level 3 then 4, and add Bratislava region (4) - 'Slovenia', - 'Solomon Islands', - 'Somalia', - 'South Georgia and the South Sandwich Islands', - 'South Korea', - 'South Sudan', - 'South Ossetia', # ! don't forget to divide from Georgia - 'Sri Lanka', - 'Sudan', - 'São Tomé and Príncipe', - 'Suriname', - 'Switzerland', - 'Syria', - 'Taiwan', - 'Tajikistan', - 'Thailand', - 'The Bahamas', - 'The Gambia', - 'Togo', - 'Tokelau', - 'Tonga', - 'Trinidad and Tobago', - 'Tunisia', - 'Turkmenistan', - 'Turks and Caicos Islands', - 'Tuvalu', - 'United Arab Emirate', - 'Uruguay', - 'Uzbekistan', - 'Vanuatu', - 'Venezuela', # level 3 not comprehensive - 'Vietnam', - # ! don't forget 'Wallis and Futuna', belongs to France - 'Yemen', - 'Zambia', - 'Zimbabwe', - ], - 3: [ - 'Malawi', - 'Nepal', # ! one region is lost after division - 'Pakistan', - 'Paraguay', - 'Tanzania', - 'Turkey', - 'Uganda', - ], - 4: [ - 'Austria', - 'Bangladesh', - 'Belarus', # maybe need merge capital region with the province - 'Belgium', # maybe need merge capital region into encompassing province - 'Bolivia', - 'Bosnia and Herzegovina', # other levels - 5, 6, 7 - are incomplete. - 'Canada', - 'China', # ! don't forget about Macau and Hong Kong of level 3 not covered by level 4 - 'Denmark', - 'Greece', # ! has one small 3-level subregion! - 'Hungary', # maybe multilevel division at levels [4, 5] ? - 'India', - 'Italy', - 'Japan', # ? About 50 4-level subregions, some of which requires further division - 'Morocco', # ! not all regions appear after substitution with level 4 - 'New Zealand', # ! don't forget islands to the north and south - 'Norway', - 'Poland', # 380(!) subregions of AL=6 - 'Portugal', - 'Russia', - 'Serbia', - 'South Africa', - 'Spain', - 'Ukraine', - 'United States', - ], - 5: [ - 'Ireland', # ! 5-level don't cover the whole country - ], - 6: [ - 'Czechia', - ] + 2: [ + 'Afghanistan', + 'Albania', + 'Algeria', + 'Andorra', + 'Angola', + 'Antigua and Barbuda', + 'Armenia', + 'Australia', # need to be divided at level 4 but has many small islands of level 4 + 'Azerbaijan', # has 2 non-covering 3-level regions + 'Bahrain', + 'Barbados', + 'Belize', + 'Benin', + 'Bermuda', + 'Bhutan', + 'Botswana', + 'British Sovereign Base Areas', # ! include into Cyprus + 'British Virgin Islands', + 'Bulgaria', + 'Burkina Faso', + 'Burundi', + 'Cambodia', + 'Cameroon', + 'Cape Verde', + 'Central African Republic', + 'Chad', + 'Chile', + 'Colombia', + 'Comoros', + 'Congo-Brazzaville', # BUG whith autodivision at level 4 + 'Cook Islands', + 'Costa Rica', + 'Croatia', # next level = 6 + 'Cuba', + 'Cyprus', + "Côte d'Ivoire", + 'Democratic Republic of the Congo', + 'Djibouti', + 'Dominica', + 'Dominican Republic', + 'East Timor', + 'Ecuador', + 'Egypt', + 'El Salvador', + 'Equatorial Guinea', + 'Eritrea', + 'Estonia', + 'Eswatini', + 'Ethiopia', + 'Falkland Islands', + 'Faroe Islands', + 'Federated States of Micronesia', + 'Fiji', + 'Gabon', + 'Georgia', + 'Ghana', + 'Gibraltar', + 'Greenland', + 'Grenada', + 'Guatemala', + 'Guernsey', + 'Guinea', + 'Guinea-Bissau', + 'Guyana', + 'Haiti', + 'Honduras', + 'Iceland', + 'Indonesia', + 'Iran', + 'Iraq', + 'Isle of Man', + 'Israel', # ! don't forget to separate Jerusalem + 'Jamaica', + 'Jersey', + 'Jordan', + 'Kazakhstan', + 'Kenya', # ! level 3 doesn't cover the whole country + 'Kiribati', + 'Kosovo', + 'Kuwait', + 'Kyrgyzstan', + 'Laos', + 'Latvia', + 'Lebanon', + 'Liberia', + 'Libya', + 'Liechtenstein', + 'Lithuania', + 'Luxembourg', + 'Madagascar', + 'Malaysia', + 'Maldives', + 'Mali', + 'Malta', + 'Marshall Islands', + 'Martinique', + 'Mauritania', + 'Mauritius', + 'Mexico', + 'Moldova', + 'Monaco', + 'Mongolia', + 'Montenegro', + 'Montserrat', + 'Mozambique', + 'Myanmar', + 'Namibia', + 'Nauru', + 'Nicaragua', + 'Niger', + 'Nigeria', + 'Niue', + 'North Korea', + 'North Macedonia', + 'Oman', + 'Palau', + # ! 'Palestina' is not a country in OSM - need make an mwm + 'Panama', + 'Papua New Guinea', + 'Peru', # need split-merge + 'Philippines', # split at level 3 and merge or not merge + 'Qatar', + 'Romania', # need split-merge + 'Rwanda', + 'Saint Helena, Ascension and Tristan da Cunha', + 'Saint Kitts and Nevis', + 'Saint Lucia', + 'Saint Vincent and the Grenadines', + 'San Marino', + 'Samoa', + 'Saudi Arabia', + 'Senegal', + 'Seychelles', + 'Sierra Leone', + 'Singapore', + 'Slovakia', # ! split at level 3 then 4, and add Bratislava region (4) + 'Slovenia', + 'Solomon Islands', + 'Somalia', + 'South Georgia and the South Sandwich Islands', + 'South Korea', + 'South Sudan', + 'South Ossetia', # ! don't forget to divide from Georgia + 'Sri Lanka', + 'Sudan', + 'São Tomé and Príncipe', + 'Suriname', + 'Switzerland', + 'Syria', + 'Taiwan', + 'Tajikistan', + 'Thailand', + 'The Bahamas', + 'The Gambia', + 'Togo', + 'Tokelau', + 'Tonga', + 'Trinidad and Tobago', + 'Tunisia', + 'Turkmenistan', + 'Turks and Caicos Islands', + 'Tuvalu', + 'United Arab Emirate', + 'Uruguay', + 'Uzbekistan', + 'Vanuatu', + 'Venezuela', # level 3 not comprehensive + 'Vietnam', + # ! don't forget 'Wallis and Futuna', belongs to France + 'Yemen', + 'Zambia', + 'Zimbabwe', + ], + 3: [ + 'Malawi', + 'Nepal', # ! one region is lost after division + 'Pakistan', + 'Paraguay', + 'Tanzania', + 'Turkey', + 'Uganda', + ], + 4: [ + 'Austria', + 'Bangladesh', + 'Belarus', # maybe need merge capital region with the province + 'Belgium', # maybe need merge capital region into encompassing province + 'Bolivia', + 'Bosnia and Herzegovina', # other levels - 5, 6, 7 - are incomplete. + 'Canada', + 'China', # ! don't forget about Macau and Hong Kong of level 3 not covered by level 4 + 'Denmark', + 'Greece', # ! has one small 3-level subregion! + 'Hungary', # maybe multilevel division at levels [4, 5] ? + 'India', + 'Italy', + 'Japan', # ? About 50 4-level subregions, some of which requires further division + 'Morocco', # ! not all regions appear after substitution with level 4 + 'New Zealand', # ! don't forget islands to the north and south + 'Norway', + 'Poland', # 380(!) subregions of AL=6 + 'Portugal', + 'Russia', + 'Serbia', + 'South Africa', + 'Spain', + 'Ukraine', + 'United States', + ], + 5: [ + 'Ireland', # ! 5-level don't cover the whole country + ], + 6: [ + 'Czechia', + ] } # Country name => list of admin levels to which it should be initially divided. # 'Germany': [4, 5] implies that the country is divided at level 4 at first, then all # 4-level subregions are divided into subregions of level 5 (if any) multilevel_countries = { - 'Brazil': [3, 4], - 'Finland': [3, 6], # [3,5,6] in more fresh data? # division by level 6 seems ideal - 'France': [3, 4], - 'Germany': [4, 5], # not the whole country is covered by units of AL=5 - 'Netherlands': [3, 4], # there are carribean lands of level both 3 and 4 - 'Sweden': [3, 4], # division by level 4 seems ideal - 'United Kingdom': [4, 5], # level 5 is necessary but not comprehensive + 'Brazil': [3, 4], + 'Finland': [3, 6], # [3,5,6] in more fresh data? # division by level 6 seems ideal + 'France': [3, 4], + 'Germany': [4, 5], # not the whole country is covered by units of AL=5 + 'Netherlands': [3, 4], # there are carribean lands of level both 3 and 4 + 'Sweden': [3, 4], # division by level 4 seems ideal + 'United Kingdom': [4, 5], # level 5 is necessary but not comprehensive } country_initial_levels = dict(itertools.chain( ((country, ([level] if level > 2 else [])) - for level, countries in unilevel_countries.items() - for country in countries), + for level, countries in unilevel_countries.items() + for country in countries), multilevel_countries.items() )) + +# The dict value is tuple of 2 items. +# First: array of admin levels of mandatory, non-coarsable regions. +# Second: array of lower admin levels at which united may be merged. +# Each "admin level" may be a number or a tuple of numbers - "meta admin_level". +# E.g. in Japan [6,7] is a county metalevel: rural counties are (7), cities are (6) +# and may be divided into (7)-subregions. Any level of a metalevel may be +# non-comprehensive in the sense it should not cover the whole upper-metalevel unit, +# but altogether they should. +country_levels = { + 'Afghanistan': ([], [4, 5]), + 'Australia': ([4], [6]), + 'Austria': None, + 'Belarus': ([4], [6]), + 'Belgium': None, + 'Brazil': None, + 'China': None, + 'France': None, + 'Germany': ([(4, 5)], [6]), + 'Greece': ([(3, 4)], [5, 6]), + 'India': None, + 'Indonesia': None, + 'Italy': None, + 'Iran': ([4], [5, 6]), + 'Ireland': None, + 'Japan': ([(4, 5)], [(6, 7)]), + 'Mali': ([4], [6]), + 'Netherlands': ([3, 4], [8]), + 'Norway': None, + 'Russia': None, + 'Slovakia': ([(3, 4)], []), + 'Spain': None, + 'Sweden': None, + 'Finland': None, + 'Tanzania': ([3, 4], [5]), + 'Turkey': ([3, 4], [6]), + 'United Kingdom': None, + 'United States': ([4], [6]), +} + +# Transform each metalevel to list if it is not +country_levels = { + c: ([ + [[ml] if isinstance(ml, int) else ml for ml in mls] + for mls in mls_sequence + ] + if mls_sequence is not None + else None + ) + for c, mls_sequence in country_levels.items() +} diff --git a/web/app/countries_structure.py b/web/app/countries_structure.py index 5567cb4..2a0bc7a 100644 --- a/web/app/countries_structure.py +++ b/web/app/countries_structure.py @@ -1,13 +1,29 @@ +import itertools + +import config + +from auto_split import( + combine_into_clusters, +) from borders_api_utils import ( copy_region_from_osm, divide_region_into_subregions, + get_free_id, get_osm_border_name_by_osm_id, ) from config import ( BORDERS_TABLE as borders_table, + MWM_SIZE_THRESHOLD, OSM_TABLE as osm_table ) -from countries_division import country_initial_levels +from countries_division import country_levels +from simple_splitting import simple_split +from subregions import ( + get_regions_basic_info, + get_regions_info, + get_geometrical_subregions, + update_border_mwm_size_estimation, +) class CountryStructureException(Exception): @@ -19,52 +35,248 @@ def _clear_borders(conn): cursor.execute(f"DELETE FROM {borders_table}") -def _make_country_structure(conn, country_osm_id): - country_name = get_osm_border_name_by_osm_id(conn, country_osm_id) +def checksum_area(conn, regions, region_id): + """Returns True if the sum of subregion areas (almost) equal + to the region area. + """ + region = regions[region_id] + children = [r for r in regions.values() if r['parent_id'] == region_id] + regions_without_area = [r for r in itertools.chain(children, [region]) + if 'land_area' not in r] + regions_without_area_ids = [r['id'] for r in regions_without_area] + regions_info = get_regions_basic_info(conn, regions_without_area_ids, osm_table) + for r_id, r_data in regions_info.items(): + regions[r_id]['land_area'] = r_data['land_area'] + + children_area = sum(r['land_area'] for r in children) + has_lost_subregions = (children_area < 0.99 * region['land_area']) + return not has_lost_subregions + + +def _amend_regions_with_mwm_size(conn, regions): + region_ids_without_size = [s_id for s_id, s_data in regions.items() + if 'mwm_size_est' not in s_data] + extra_regions = get_regions_info(conn, region_ids_without_size, osm_table) + for s_id, s_data in extra_regions.items(): + regions[s_id]['mwm_size_est'] = s_data['mwm_size_est'] + + +def auto_divide_country(conn, country_id): + country_name = get_osm_border_name_by_osm_id(conn, country_id) + metalevels = country_levels.get(country_name, None) + if metalevels is None: + e, w = copy_region_from_osm(conn, country_id) + conn.commit() + return e, w + + regions = { + country_id: { + 'id': country_id, + 'name': country_name, + 'al': 2, + 'parent_id': None + } + } + + all_metalevels = metalevels[0] + metalevels[1] + fill_regions_structure(conn, regions, country_id, all_metalevels) + non_mergeable_metalevels = metalevels[0] + + for metalevel, lower_metalevel in list(zip(all_metalevels[:-1], all_metalevels[1:]))[::-1]: + if lower_metalevel in non_mergeable_metalevels: + break + # Find regions at metalevel that composed of subregions at lower_metalevel + region_ids_at_metalevel = [r['id'] for r in regions.values() + if r['al'] in metalevel] + for region_id in region_ids_at_metalevel: + if checksum_area(conn, regions, region_id): + regions[region_id]['has_lost_subregions'] = False + children = [r for r in regions.values() + if r['parent_id'] == region_id] + mergeable_children = {ch['id']: ch for ch in children + if 'clusters' not in ch} + _amend_regions_with_mwm_size(conn, mergeable_children) + dcu = combine_into_clusters(conn, + mergeable_children, config.MWM_SIZE_THRESHOLD) + regions[region_id]['mwm_size_est'] = sum(ch['mwm_size_est'] + for ch in children) + if len(children) == len(mergeable_children): + # If the sum of subregions is less than mwm_size_thr + # then collapse clusters into one despite of geometrical connectivity + dcu.try_collapse_into_one() + + if len(dcu.clusters) == 1 and len(children) == len(mergeable_children): + regions[region_id]['merged_up_to_itself'] = True + for ch in children: + regions[ch['id']]['merged'] = True + else: + real_clusters = { + cl_id: cl_data + for cl_id, cl_data in dcu.clusters.items() + if len(cl_data['subregion_ids']) > 1 + } + regions[region_id]['clusters'] = real_clusters + for cluster in real_clusters.values(): + for s_id in cluster['subregion_ids']: + regions[s_id]['merged'] = True + else: + regions[region_id]['has_lost_subregions'] = True + + warnings = [] + save_country_structure_to_db(conn, regions) + conn.commit() + return [], warnings + + +def save_country_structure_to_db(conn, regions): + parent_ids = set(r['parent_id'] for r in regions.values() if r['parent_id'] is not None) + leaf_ids = set(regions.keys()) - parent_ids + for leaf_id in leaf_ids: + regions[leaf_id]['is_leaf'] = True - copy_region_from_osm(conn, country_osm_id, parent_id=None) + def save_clusters_to_db(conn, region_id): + assert('clusters' in regions[region_id]) + free_id = get_free_id() + with conn.cursor() as cursor: + parent_name = regions[region_id]['name'] + counter = 0 + for cl_id, cl_data in regions[region_id]['clusters'].items(): + if len(cl_data['subregion_ids']) == 1: + subregion_id = cl_data['subregion_ids'][0] + subregion_name = regions[subregion_id]['name'] + cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {subregion_id}, + %s, + {region_id}, + ( + SELECT way FROM {osm_table} + WHERE osm_id = {subregion_id} + ), + now(), + -1, + {cl_data['mwm_size_est']} + ) + """, (subregion_name,)) + else: + counter += 1 + subregion_ids_str = ','.join(str(x) for x in cl_data['subregion_ids']) + cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {free_id}, + %s, + {region_id}, + ( + SELECT ST_Union(way) FROM {osm_table} + WHERE osm_id IN ({subregion_ids_str}) + ), + now(), + -1, + {cl_data['mwm_size_est']} + ) + """, (f"{parent_name}_{counter}",)) + free_id -= 1 - if country_initial_levels.get(country_name): - admin_levels = country_initial_levels[country_name] - prev_admin_levels = [2] + admin_levels[:-1] - prev_level_region_ids = [country_osm_id] + def save_region_structure_to_db(conn, region_id): + r_data = regions[region_id] + if r_data.get('merged') == True: + return + copy_region_from_osm(conn, region_id, + parent_id=r_data['parent_id'], + mwm_size_est=r_data.get('mwm_size_est')) + if r_data.get('has_lost_subregions') or r_data.get('is_leaf'): + region_container = {k: v for k, v in regions.items() if k == region_id} + region_data = region_container[region_id] + mwm_size_est = update_border_mwm_size_estimation(conn, region_id) + region_data['mwm_size_est'] = mwm_size_est + if (mwm_size_est is not None and + mwm_size_est > MWM_SIZE_THRESHOLD): + simple_split(conn, region_data) + else: + children_ids = set(r['id'] for r in regions.values() + if r['parent_id'] == region_id) + children_in_clusters = set(itertools.chain.from_iterable( + cl['subregion_ids'] for cl in r_data.get('clusters', {}).values())) + standalone_children_ids = children_ids - children_in_clusters + if 'clusters' in r_data: + save_clusters_to_db(conn, region_id) + for ch_id in standalone_children_ids: + save_region_structure_to_db(conn, ch_id) - for admin_level, prev_level in zip(admin_levels, prev_admin_levels): - current_level_region_ids = [] - for region_id in prev_level_region_ids: - subregion_ids = divide_region_into_subregions( - conn, region_id, admin_level) - current_level_region_ids.extend(subregion_ids) - prev_level_region_ids = current_level_region_ids + + country_id = [k for k, v in regions.items() if v['parent_id'] is None] + assert len(country_id) == 1 + country_id = country_id[0] + + save_region_structure_to_db(conn, country_id) + conn.commit() + + +def fill_regions_structure(conn, regions, region_id, metalevels): + """Given regions tree-like dict, amend it by splitting region_id + region at metalevels. + """ + leaf_ids = [region_id] + for metalevel in metalevels: + for leaf_id in leaf_ids: + fill_region_structure_at_metalevel(conn, regions, leaf_id, metalevel) + leaf_ids = [ + r_id for r_id in + (set(regions.keys()) - set(r['parent_id'] for r in regions.values())) + if regions[r_id]['al'] in metalevel + ] + + +def fill_region_structure_at_metalevel(conn, regions, region_id, metalevel): + """Divides a region with "region_id" into subregions of specified admin level(s). + Updates the "regions" tree-like dict: + region_id : {'id': region_id, 'al': admin_level, 'parent_id': parent_id} + """ + + def process_subregions_of(region_id): + subregion_ids_by_level = [] + # "regions" dict is used from the closure + for sublevel in (lev for lev in metalevel if lev > regions[region_id]['al']): + subregions = get_geometrical_subregions( + conn, region_id, osm_table, sublevel + ) + subregion_ids = list(subregions.keys()) + subregion_ids_by_level.append(subregion_ids) + for s_id in subregion_ids: + # As a first approximation, assign all found subregions + # of all sublevels to the region. This may change in deeper recursion calls. + if s_id not in regions: + regions[s_id] = { + 'id': s_id, + 'name': subregions[s_id], + 'parent_id': region_id, + 'al': sublevel, + + } + else: + regions[s_id]['parent_id'] = region_id + + for layer in subregion_ids_by_level: + for s_id in layer: + process_subregions_of(s_id) + + process_subregions_of(region_id) def create_countries_initial_structure(conn): _clear_borders(conn) with conn.cursor() as cursor: - # TODO: process overlapping countries, like Ukraine and Russia with common Crimea cursor.execute(f""" - SELECT osm_id + SELECT osm_id, name FROM {osm_table} WHERE admin_level = 2 """ ) - for country_osm_id, *_ in cursor: - _make_country_structure(conn, country_osm_id) - conn.commit() - return - - -def _get_country_osm_id_by_name(conn, name): - with conn.cursor() as cursor: - cursor.execute(f""" - SELECT osm_id FROM {osm_table} - WHERE admin_level = 2 AND name = %s - """, (name,)) - row_count = cursor.rowcount - if row_count > 1: - raise CountryStructureException(f'More than one country "{name}"') - rec = cursor.fetchone() - if not rec: - raise CountryStructureException(f'Not found country "{name}"') - return rec[0] - + for country_osm_id, country_name in cursor: + # Only create small countries - to not forget to create them manually + if country_name not in country_levels: + auto_divide_country(conn, country_osm_id) diff --git a/web/app/simple_splitting.py b/web/app/simple_splitting.py new file mode 100644 index 0000000..5f41c54 --- /dev/null +++ b/web/app/simple_splitting.py @@ -0,0 +1,186 @@ +import json + +from borders_api_utils import ( + get_free_id, +) +from config import ( + BORDERS_TABLE as borders_table, + MWM_SIZE_THRESHOLD, +) +from subregions import ( + update_border_mwm_size_estimation, +) + + +def simple_split(conn, region): + """Split region {'id', 'name', 'mwm_size_est'} (already present in borders table) + into 2 or 4 parts""" + + mwm_size_est = region['mwm_size_est'] + #print(f"simple_split, size = {mwm_size_est}, MWM_SIZE_THRESHOLD={MWM_SIZE_THRESHOLD}") + + if mwm_size_est is None or mwm_size_est > 2 * MWM_SIZE_THRESHOLD: + return split_into_4_parts(conn, region) + else: + return split_into_2_parts(conn, region) + + +def split_into_2_parts(conn, region): + bbox = get_region_bbox(conn, region['id']) + width = bbox[2] - bbox[0] + height = bbox[3] - bbox[1] + split_vertically = (width > height) + + if split_vertically: + mid_lon = (bbox[2] + bbox[0]) / 2 + min_lat = bbox[1] + max_lat = bbox[3] + line_sql = f"LINESTRING({mid_lon} {min_lat}, {mid_lon} {max_lat})" + position_tag = f"(ST_XMin(geom) + ST_XMax(geom)) / 2 < {mid_lon}" + name_tags = ('west', 'east') + else: + mid_lat = (bbox[3] + bbox[1]) / 2 + min_lon = bbox[0] + max_lon = bbox[2] + line_sql = f"LINESTRING({min_lon} {mid_lat}, {max_lon} {mid_lat})" + position_tag = f"(ST_YMin(geom) + ST_YMax(geom)) / 2 < {mid_lat}" + name_tags = ('south', 'north') + + free_id = get_free_id() + ids = (free_id, free_id - 1) + + with conn.cursor() as cursor: + with conn.cursor() as insert_cursor: + cursor.execute(f""" + SELECT ST_AsText(ST_CollectionExtract(ST_MakeValid(ST_Collect(geom)), 3)) AS geom, + {position_tag} AS is_lower + FROM ( + SELECT + (ST_DUMP( + ST_Split( + ( + SELECT geom FROM {borders_table} + WHERE id = {region['id']} + ), + ST_GeomFromText('{line_sql}', 4326) + ) + ) + ).geom as geom + ) q + GROUP BY {position_tag} + ORDER BY 2 DESC + """) + if cursor.rowcount < 2: + return False + for i, ((geom, is_lower), b_id, name_tag) in enumerate(zip(cursor, ids, name_tags)): + insert_cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {b_id}, + %s, + {region['id']}, + ST_GeomFromText(%s, 4326), + now(), + -1, + NULL + ) + """, (f"{region['name']}_{name_tag}", geom) + ) + for b_id in ids: + update_border_mwm_size_estimation(conn, b_id) + return True + + +def split_into_4_parts(conn, region): + bbox = get_region_bbox(conn, region['id']) + mid_lon = (bbox[2] + bbox[0]) / 2 + mid_lat = (bbox[3] + bbox[1]) / 2 + min_lat = bbox[1] + max_lat = bbox[3] + min_lon = bbox[0] + max_lon = bbox[2] + position_tag_X = f"(ST_XMin(geom) + ST_XMax(geom)) / 2 < {mid_lon}" + position_tag_Y = f"(ST_YMin(geom) + ST_YMax(geom)) / 2 < {mid_lat}" + line_sql = ( + "LINESTRING(" + f"{min_lon} {mid_lat}," + f"{max_lon} {mid_lat}," + f"{max_lon} {min_lat}," + f"{mid_lon} {min_lat}," + f"{mid_lon} {max_lat}" + ")" + ) + + # 4 quadrants are defined by a pair of (position_tag_X, position_tag_Y) + name_tags = { + (True, True) : 'southwest', + (True, False) : 'northwest', + (False, True) : 'southeast', + (False, False): 'northeast' + } + + + with conn.cursor() as cursor: + with conn.cursor() as insert_cursor: + query = f""" + SELECT ST_AsText(ST_CollectionExtract(ST_MakeValid(ST_Collect(geom)), 3)) AS geom, + {position_tag_X}, + {position_tag_Y} + FROM ( + SELECT + (ST_DUMP( + ST_Split( + ( + SELECT geom FROM {borders_table} + WHERE id = {region['id']} + ), + ST_GeomFromText('{line_sql}', 4326) + ) + ) + ).geom as geom + ) q + GROUP BY {position_tag_X}, {position_tag_Y} + """ + cursor.execute(query) + if cursor.rowcount < 2: + return False + + free_id = get_free_id() + used_ids = [] + for geom, is_lower_X, is_lower_Y in cursor: + name_tag = name_tags[(is_lower_X, is_lower_Y)] + insert_cursor.execute(f""" + INSERT INTO {borders_table} (id, name, parent_id, geom, + modified, count_k, mwm_size_est) + VALUES ( + {free_id}, + %s, + {region['id']}, + ST_GeomFromText(%s, 4326), + now(), + -1, + NULL + ) + """, (f"{region['name']}_{name_tag}", geom) + ) + used_ids.append(free_id) + free_id -= 1 + for b_id in used_ids: + update_border_mwm_size_estimation(conn, b_id) + return True + + +def get_region_bbox(conn, region_id): + """Return [xmin, ymin, xmax, ymax] array for the region from borders table""" + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT ST_AsGeoJSON(BOX2D(geom)) + FROM {borders_table} + WHERE id = %s + """, (region_id,)) + geojson = json.loads(cursor.fetchone()[0]) + bb = geojson['coordinates'][0] + # bb[0] is the [xmin, ymin] corner point, bb[2] - [xmax, ymax] + return bb[0] + bb[2] + diff --git a/web/app/static/borders.js b/web/app/static/borders.js index 3ade574..4fd5a0e 100644 --- a/web/app/static/borders.js +++ b/web/app/static/borders.js @@ -598,6 +598,17 @@ function bDisable() { }); } +function bSimpleSplit() { + if (!selectedId || !(selectedId in borders)) + return; + $.ajax(getServer('simple_split'), { + data: { + 'id': selectedId + }, + success: makeAnswerHandler(updateBorders) + }); +} + function bDelete() { if (!selectedId || !(selectedId in borders)) return; @@ -977,18 +988,31 @@ function updatePointList(data) { a.onclick = (function(id, name) { return function() { pPointSelect(id, name); - return false - } + return false; + }; })(b['id'], b['name']); - list.append(a, $('
')); - $(a).text(b['admin_level'] + ': ' + b['name'] + ' (' + Math.round(b[ - 'area']) + ' км²)'); + list.append(a); + $(a).text(b['admin_level'] + ': ' + b['name'] + ' (' + + Math.round(b['area']) + ' км²)'); + if (b['admin_level'] == 2) { + var auto_divide_link = document.createElement('a'); + auto_divide_link.href = '#'; + auto_divide_link.onclick = (function(id) { + return function() { + pAutoDivideCountry(id); + return false; + }; + })(b['id']); + $(auto_divide_link).text("!!autodivide!!"); + list.append("
   ", auto_divide_link); + } + list.append("
"); } } -function pPointSelect(id, name1) { +function pPointSelect(id, osm_name) { var name = $('#p_name').val(); - name = name.replace('*', name1); + name = name.replace('*', osm_name); $.ajax(getServer('from_osm'), { data: { 'name': name, @@ -999,6 +1023,14 @@ function pPointSelect(id, name1) { bPointCancel(); } +function pAutoDivideCountry(id) { + $.ajax(getServer('auto_divide_country'), { + data: {'id': id}, + success: makeAnswerHandler(updateBorders) + }); + bPointCancel(); +} + function bPointCancel() { $('#point').hide(); $('#actions').show(); diff --git a/web/app/subregions.py b/web/app/subregions.py index db21dee..7690aec 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -12,6 +12,21 @@ from mwm_size_predictor import MwmSizePredictor +def get_regions_info(conn, region_ids, regions_table, need_cities=False): + """Get regions info including mwm_size_est in the form of + dict {region_id => region data} + """ + regions_info = get_regions_basic_info(conn, region_ids, regions_table) + _add_mwm_size_estimation(conn, regions_info, regions_table, need_cities) + keys = ('name', 'mwm_size_est') + if need_cities: + keys = keys + ('cities',) + return {region_id: {k: region_data[k] for k in keys + if k in region_data} + for region_id, region_data in regions_info.items() + } + + def get_subregions_info(conn, region_id, region_table, next_level, need_cities=False): """ @@ -21,72 +36,78 @@ def get_subregions_info(conn, region_id, region_table, :param next_level: admin level of subregions to find :return: dict {subregion_id => subregion data} including area and population info """ - subregion_ids = _get_geometrical_subregion_ids(conn, region_id, - region_table, next_level) - subregions = _get_regions_basic_info(conn, subregion_ids) - _add_mwm_size_estimation(conn, subregions, need_cities) - keys = ('name', 'mwm_size_est') - if need_cities: - keys = keys + ('cities',) - return {subregion_id: {k: subregion_data[k] for k in keys - if k in subregion_data} - for subregion_id, subregion_data in subregions.items() - } + subregions = get_geometrical_subregions(conn, region_id, + region_table, next_level) + subregion_ids = list(subregions.keys()) + return get_regions_info(conn, subregion_ids, osm_table, need_cities) -def _get_geometrical_subregion_ids(conn, region_id, region_table, next_level): +def get_geometrical_subregions(conn, region_id, region_table, next_level): region_id_column, region_geom_column = ( ('id', 'geom') if region_table == borders_table else ('osm_id', 'way') ) with conn.cursor() as cursor: cursor.execute(f""" - SELECT subreg.osm_id + SELECT subreg.osm_id, subreg.name FROM {region_table} reg, {osm_table} subreg WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND ST_Contains(reg.{region_geom_column}, subreg.way) """, (region_id, next_level) ) - return list(rec[0] for rec in cursor) + return {s_id: name for s_id, name in cursor} -def _get_regions_basic_info(conn, region_ids): +def get_regions_basic_info(conn, region_ids, regions_table, need_land_area=True): """Gets name, land_area for regions in OSM borders table""" if not region_ids: return {} + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) region_ids_str = ','.join(str(x) for x in region_ids) - with conn.cursor() as cursor: - cursor.execute(f""" - SELECT reg.osm_id, reg.name, + land_area_expr = ( + 'NULL' if not need_land_area + else f""" ST_Area( geography( ST_Intersection( - reg.way, + reg.{region_geom_column}, ( SELECT ST_Union(c.geom) FROM {land_polygons_table} c - WHERE c.geom && reg.way + WHERE c.geom && reg.{region_geom_column} ) ) ) - ) / 1.0E+6 land_area - FROM {osm_table} reg - WHERE osm_id in ({region_ids_str}) + ) / 1.0E+6 + """ + ) + with conn.cursor() as cursor: + cursor.execute(f""" + SELECT reg.{region_id_column}, reg.name, + ST_Area(reg.{region_geom_column}) / 1.0E+6 area, + {land_area_expr} land_area + FROM {regions_table} reg + WHERE {region_id_column} in ({region_ids_str}) """ ) regions = {} - for osm_id, name, land_area in cursor: + for r_id, name, area, land_area in cursor: region_data = { - 'osm_id': osm_id, + 'id': r_id, 'name': name, - 'land_area': land_area, + 'area': area, } - regions[osm_id] = region_data + if need_land_area: + region_data['land_area'] = land_area + regions[r_id] = region_data return regions -def _add_population_data(conn, regions, need_cities): +def _add_population_data(conn, regions, regions_table, need_cities): """Adds population data only for regions that are suitable for mwm size estimation. """ @@ -106,13 +127,18 @@ def _add_population_data(conn, regions, need_cities): if need_cities: data['cities'] = [] + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) + region_ids_str = ','.join(str(x) for x in region_ids) with conn.cursor() as cursor: cursor.execute(f""" - SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place - FROM {osm_table} b, {osm_places_table} p - WHERE b.osm_id IN ({region_ids_str}) - AND ST_Contains(b.way, p.center) + SELECT b.{region_id_column}, p.name, coalesce(p.population, 0), p.place + FROM {regions_table} b, {osm_places_table} p + WHERE b.{region_id_column} IN ({region_ids_str}) + AND ST_Contains(b.{region_geom_column}, p.center) """ ) for region_id, place_name, place_population, place_type in cursor: @@ -129,7 +155,7 @@ def _add_population_data(conn, regions, need_cities): region_data['hamlet_cnt'] += 1 -def _add_coastline_length(conn, regions): +def _add_coastline_length(conn, regions, regions_table): if not regions: return @@ -138,14 +164,20 @@ def _add_coastline_length(conn, regions): region_ids_str = ','.join(str(x) for x in regions.keys()) + region_id_column, region_geom_column = ( + ('id', 'geom') if regions_table == borders_table else + ('osm_id', 'way') + ) + with conn.cursor() as cursor: cursor.execute(f""" WITH buffered_borders AS ( -- 0.001 degree ~ 100 m - ocean buffer stripe to overcome difference -- in coastline and borders - SELECT id, ST_Buffer(geom, 0.001) geom - FROM {borders_table} - WHERE id IN ({region_ids_str}) + SELECT {region_id_column} id, + ST_Buffer({region_geom_column}, 0.001) geom + FROM {regions_table} + WHERE {region_id_column} IN ({region_ids_str}) ) SELECT bb.id, SUM( @@ -166,15 +198,13 @@ def _add_coastline_length(conn, regions): regions[b_id]['coastline_length'] = coastline_length -def _add_mwm_size_estimation(conn, regions, need_cities): +def _add_mwm_size_estimation(conn, regions, regions_table, need_cities): for region_data in regions.values(): region_data['mwm_size_est'] = None - _add_population_data(conn, regions, need_cities) - _add_coastline_length(conn, regions) + _add_population_data(conn, regions, regions_table, need_cities) + _add_coastline_length(conn, regions, regions_table) - #from pprint import pprint as pp - #pp(regions) regions_to_predict = [ ( s_id, @@ -202,16 +232,16 @@ def _add_mwm_size_estimation(conn, regions, need_cities): def update_border_mwm_size_estimation(conn, border_id): - regions = _get_regions_basic_info(conn, [border_id]) + regions = get_regions_basic_info(conn, [border_id], borders_table) if math.isnan(regions[border_id]['land_area']): - e = Exception(f"Area is NaN for border '{name}' ({border_id})") + e = Exception(f"Area is NaN for border '{regions[border_id]['name']}' ({border_id})") raise e - _add_mwm_size_estimation(conn, regions, need_cities=False) + _add_mwm_size_estimation(conn, regions, borders_table, need_cities=False) mwm_size_est = regions[border_id].get('mwm_size_est') # mwm_size_est may be None. Python's None is converted to NULL - # during %s substitution in execute(). + # during %s substitution in cursor.execute(). with conn.cursor() as cursor: cursor.execute(f""" UPDATE {borders_table} @@ -219,6 +249,7 @@ def update_border_mwm_size_estimation(conn, border_id): WHERE id = %s """, (mwm_size_est, border_id,)) conn.commit() + return mwm_size_est def is_administrative_region(conn, region_id): @@ -248,12 +279,14 @@ def get_region_country(conn, region_id): possibly itself. """ predecessors = get_predecessors(conn, region_id) - return predecessors[-1] + return predecessors[-1] if predecessors is not None else (None, None) def get_predecessors(conn, region_id): """Returns the list of (id, name)-tuples of all predecessors, - starting from the very region_id. + starting from the very region_id, and None if there is no + requested region or one of its predecessors in the DB which + may occur due to other queries to the DB. """ predecessors = [] cursor = conn.cursor() @@ -265,9 +298,7 @@ def get_predecessors(conn, region_id): ) rec = cursor.fetchone() if not rec: - raise Exception( - f"No record in '{borders_table}' table with id = {region_id}" - ) + return None predecessors.append(rec[0:2]) parent_id = rec[2] if not parent_id: diff --git a/web/app/templates/index.html b/web/app/templates/index.html index 669416f..0713c43 100644 --- a/web/app/templates/index.html +++ b/web/app/templates/index.html @@ -227,6 +227,8 @@
+ +

From 2e1922393c6c5a4fa1f7c8b0ab4bfe6cfc3839da Mon Sep 17 00:00:00 2001 From: tatiana-yan Date: Fri, 21 May 2021 10:16:50 +0300 Subject: [PATCH 3/5] Fix usage before assignment. --- web/app/borders_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/app/borders_api.py b/web/app/borders_api.py index 047ae64..ef06e79 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -298,6 +298,7 @@ def split(): line = request.args.get('line') save_region = (request.args.get('save_region') == 'true') borders_table = config.BORDERS_TABLE + warnings = [] with g.conn.cursor() as cursor: # check that we're splitting a single polygon cursor.execute(f""" @@ -346,7 +347,6 @@ def split(): new_ids.append(free_id) counter += 1 free_id -= 1 - warnings = [] for border_id in new_ids: try: update_border_mwm_size_estimation(g.conn, border_id) From af23bd423a3d6216925be257e7e65d01d9748a27 Mon Sep 17 00:00:00 2001 From: tatiana-yan Date: Tue, 25 May 2021 11:07:57 +0300 Subject: [PATCH 4/5] Fix Cote d'Ivoire processing. --- web/app/borders_api_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/web/app/borders_api_utils.py b/web/app/borders_api_utils.py index ea5f800..0df25ab 100644 --- a/web/app/borders_api_utils.py +++ b/web/app/borders_api_utils.py @@ -409,16 +409,18 @@ def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed', mwm errors.append(f"Region with id={region_id} already exists under name '{name}'") return errors, warnings - name_expr = f"'{name}'" if name else "name" parent_id_sql = None if parent_id == 'not_passed' else parent_id - cursor.execute(f""" + query = f""" INSERT INTO {borders_table} (id, geom, name, parent_id, modified, count_k, mwm_size_est) - SELECT osm_id, way, {name_expr}, %s, now(), -1, %s + SELECT osm_id, way, {'%s' if name is not None else 'name'}, %s, now(), -1, %s FROM {osm_table} WHERE osm_id = %s - """, (parent_id_sql, mwm_size_est, region_id,) - ) + """ + args = (parent_id_sql, mwm_size_est, region_id) + if name is not None: + args = (name,) + args + cursor.execute(query, args) if parent_id == 'not_passed': assign_region_to_lowest_parent(conn, region_id) From 9951b0767b8c9829a95bf17e06f8f8deb42a3778 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 2 Sep 2021 03:08:05 +0300 Subject: [PATCH 5/5] land/coastline tables absence checks --- web/app/config.py | 4 +++- web/app/countries_structure.py | 3 ++- web/app/subregions.py | 12 +++++++++--- web/app/utils.py | 27 +++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 web/app/utils.py diff --git a/web/app/config.py b/web/app/config.py index dd3912d..d4ddc25 100644 --- a/web/app/config.py +++ b/web/app/config.py @@ -13,8 +13,10 @@ # transit table for autosplitting results AUTOSPLIT_TABLE = 'splitting' # table with land polygons (i.e. without ocean), split into smaller overlapping pieces -LAND_POLYGONS_TABLE = 'coasts2' +# TODO: prepare this table during docker container setup +LAND_POLYGONS_TABLE = 'land' # coastline split into smaller chunks +# TODO: prepare this table during docker container setup COASTLINE_TABLE = 'coastlines' # tables with borders for reference OTHER_TABLES = { diff --git a/web/app/countries_structure.py b/web/app/countries_structure.py index 2a0bc7a..15b3aee 100644 --- a/web/app/countries_structure.py +++ b/web/app/countries_structure.py @@ -24,6 +24,7 @@ get_geometrical_subregions, update_border_mwm_size_estimation, ) +from utils import is_land_table_available class CountryStructureException(Exception): @@ -64,7 +65,7 @@ def _amend_regions_with_mwm_size(conn, regions): def auto_divide_country(conn, country_id): country_name = get_osm_border_name_by_osm_id(conn, country_id) metalevels = country_levels.get(country_name, None) - if metalevels is None: + if metalevels is None or not is_land_table_available(conn): e, w = copy_region_from_osm(conn, country_id) conn.commit() return e, w diff --git a/web/app/subregions.py b/web/app/subregions.py index 7690aec..15f74c1 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -10,6 +10,10 @@ COASTLINE_TABLE as coastline_table, ) from mwm_size_predictor import MwmSizePredictor +from utils import ( + is_coastline_table_available, + is_land_table_available, +) def get_regions_info(conn, region_ids, regions_table, need_cities=False): @@ -69,7 +73,7 @@ def get_regions_basic_info(conn, region_ids, regions_table, need_land_area=True) ) region_ids_str = ','.join(str(x) for x in region_ids) land_area_expr = ( - 'NULL' if not need_land_area + 'NULL' if not need_land_area or not is_land_table_available(conn) else f""" ST_Area( geography( @@ -111,9 +115,11 @@ def _add_population_data(conn, regions, regions_table, need_cities): """Adds population data only for regions that are suitable for mwm size estimation. """ + print(regions) region_ids = [ s_id for s_id, s_data in regions.items() - if s_data['land_area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['land_area'] + if s_data.get('land_area') is not None and + s_data['land_area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['land_area'] ] if not region_ids: return @@ -156,7 +162,7 @@ def _add_population_data(conn, regions, regions_table, need_cities): def _add_coastline_length(conn, regions, regions_table): - if not regions: + if not regions or not is_coastline_table_available(conn): return for r_data in regions.values(): diff --git a/web/app/utils.py b/web/app/utils.py new file mode 100644 index 0000000..b6b5b39 --- /dev/null +++ b/web/app/utils.py @@ -0,0 +1,27 @@ +import psycopg2 + +from config import ( + COASTLINE_TABLE as coastline_table, + LAND_POLYGONS_TABLE as land_polygons_table, +) + + +def is_land_table_available(conn): + with conn.cursor() as cursor: + try: + cursor.execute(f"""SELECT * FROM {land_polygons_table} LIMIT 2""") + return True + except psycopg2.Error as e: + conn.rollback() + return False + + +def is_coastline_table_available(conn): + with conn.cursor() as cursor: + try: + cursor.execute(f"""SELECT * FROM {coastline_table} LIMIT 2""") + return True + except psycopg2.Error as e: + conn.rollback() + return False +