From 37645ef2b79b2baf2c13857bd6713a5bfbb2e2ae Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Mon, 3 Feb 2025 10:29:17 +0000
Subject: [PATCH 01/10] Large scale inference docs

---
 docs/_static/ancestor_grouping.png | Bin 0 -> 31844 bytes
 docs/_toc.yml                      |   1 +
 docs/api.rst                       |  26 +++
 docs/inference.md                  |   2 +-
 docs/large_scale.md                | 136 ++++++++++++++++
 tsinfer/formats.py                 |   2 +-
 tsinfer/inference.py               | 248 ++++++++++++++++++++++++++++-
 7 files changed, 405 insertions(+), 10 deletions(-)
 create mode 100644 docs/_static/ancestor_grouping.png
 create mode 100644 docs/large_scale.md
diff --git a/docs/_static/ancestor_grouping.png b/docs/_static/ancestor_grouping.png
new file mode 100644
index 0000000000000000000000000000000000000000..a79c75b5a3bde6aaaad800d8d5a983fbd52dac34
GIT binary patch
literal 31844
zcmeFZby!tf*FL-vK^jpIrBjgZ?vn2A?vmQ1GzdsYN_VFM(yf9Z-6bhVw={fn<2g@#
z?{$5@_qx8{f6wu_Wv?~oTyu^&#y#$F&!u6C@-NX*@K7KS2)dM{m@))%4+??64I?3f
zD@YcZXb{NL8BbMB7iA+TiKCN)xs|OMiHnD$8Ht&@l{p0BK2x4z<%G}uDfrd|r{(_l
zdQFV4Z|VZ;u5RRG6V0^LhAZs9ky)`MBhM|N2{j(w-a_Zw&Wc}nCCOCJ*qmIgW_S@a
zGu>W18#LKJ_watLeROcW%edD>_=AyrFl_z!qJ49V`^x?HI+%FSXurgp=Uw^fgQH1I
z&)57fFP(BG-MvR3$DVBpV+&N$2PeYrnTE{;KW*xMd!QUOXDlrE2Kx&ZhT67aT-M!k
z=G381N-<|($F4KypF(19!es(+{kz{gjg@w8O|EQB?W6QSUtOGSy!~ZebUqvS`de>z
zDdTj?-cWtzbnk(W<i(@ai-W73ONMwx;=Jgs?m>-#-%8Khnir%kE3tgST5ope-7>jZ
zBi_+wSL{h$zh6EIUwNK1l;6)>vx)B;tWvXHcVwe_l7s!p;PqjX2<AJN`Is&Y@;S$h
zb(|-n-w*1i+_6@|H#|m_Bls(*;<Vb9z9xFkeCyfr9LZ3*dMe}2FnjGZIR2J)uRWXD
z@ngHj&DrGj2JWs}tEeKKl#lyQP6>omBWh>QRVJ24ySzm|k@uq)Dg}FO$O6Tc)FWK(
z@fsAqr@RsiIHurvWw>-c9+<szij0Jn#DMmkf<<WJSEyp31M{bcNO|wFI)=ts<IsYX
zm(63C3st{GitD9R*6b7D$*kISU-~^urY<g!u=m8Hpj{WauCW~(qAp%lmZmS)+%Owc
zb*8VKP)7*Q%}>;$E&nu8cEUlY^~rcVCD|39|Mz{%M18NV2`}P68;k+Iv)_w;%;srU
zYg%(<)pDsmQR3%i&Alb1^g2F~gwUDA?usV6S)b^~82K)TFA6i<wvSmE^4;Q^&J%SU
zn%pi4iM6~6PwEx~d$ipx=mnBT{CCK0nrCOCRql64DGf+!x^TxF6ksb3DRCb?af<3q
zb6WGtqm?#|5$6u1>v{U@JJn~WuIRajv)&?>Q-h!VwO2AfmIeFMR5ovzoZHrSn#$~k
zhdfz+Z;Bv{Q#D4LNU+^>sI>_;wOuSKWZrf?*5ZfQ%bIuC^MAMhuK#UfQ{WL@>F)O?
zDF)F6`t|i`{4v6<jPEs>&t~SK1zyTGYb~)HZwQ*bd=R^Rjv^WNZq5ekk{o?~A_bd*
zF1Kv1rYYwHlfOcf9B)nW7W6u=<E*@)$fy167e~nsP_^0W1h3Hw)t+Xxy<wv<_2bIu
zqKkFWU%##!t}i;eFTEYM5%VKIhU0w-adJYF=-xe0CAu(dvV-)>S%p+hZkDUY6xMw@
z40-t-9z~Urir6BFr;^dc?#Lou-Zy3b@wxGfifFtxtFW_%<-V6+MweP`8=*{RugZ>|
z=H)c;UTnsAUhiehI}kg5kSk2DetsZeCM)7vF)obO$JL8REMRiqm*LQ@TYFAA(q-=H
zCW0eXILcW{b*`CwTfbtyhZDvSOGSocFO5fvL0=t$>OEt}V<h{)?-p^7G8Mw}v(snL
zZ(_5YJA?<GbsW~1GtGKHx)vojOY_Y(91nJnet)t1{)RD;6al%-c?j`}H*uEk(WKe-
z2kqc$byOj`2MPS}?ah;6*xj^ryEwR&%BDRj9@C`!<5=+dDUZip5gP=Y#fa!j3AQH1
znr`8GTNT8nb1FOOYva<2Z7!sT6YjmgaHmyEIV*kVJ%3Z9y%!lz80n>z%z3;);hl_<
z!ceG@Oq8B4cYl0pdgOOp<JeoCv6@pc5n=yVmAajLv~a!q^+y(=dgfd{)5QuXS(>IZ
z`q^KC&vx=hM$jI%G%0E!E(JRm2SgH4`yU-__d<ug-IppYg*Bwj9MCNDSVa9xP29Rw
zoNnU`eXk9%#l*8ip0i+OiHPeOiuNmfz|Cy;Get^B*2(jc&lJI*R0(QjcPt1FAx5kG
zIiLJWKF)hs*Su#~uOlvz(X(jMmUfcVC-!%ZXny0;-hS@8?{0~9I)&_Jj`Kc>Vko}C
zX^NjM#XIfQ1HVYm;McQvkX1xdJ9=}EEX64cE1oo(Z@ri1lcbC*9QARZ;m1APiV;2@
zT`q61$zPOQh*BN;O!)4jQxigzSdbx#B2B7gK8rxim$s~%jcknv*As%f&hXiBL}LZ&
z1<Wg6)&ygI3x?a@Vm*?w8DdTXw4Xg=to|_jesYI1k8~wtgvM$6c5&M0HGF#gJp^KY
zels_*M_Iv{Y{3<TmY$v;6Q3E;HGc_yV9)qHiwA86A%<wrs;3<(C0fqUsqG}0+DUGj
zszy-?W2@-=N2C^0;3VVI;e;-(O(HWCe*c#NkGlD~E=bRt5sC6Wd@7bRIRdMoafbT@
zd&w$KtB6|==kOo8=jUO4_82$Mo%&{-)3^vXlZ-uk0G)YSuSN2@tQ9x;rKwCU<>V2I
z%Nozw0b)X`yQAXh#9{J>=S<^)SG6j`-|p4Fl3+Ex+4x4@<Fosi@|R)I2+zP1-E8Q^
zG-^QcZ+M~`M8X*h7e~P*f61_p*7!ai{A_`T`{fLd0myT~HAJKg<fVCZMFm4I#h}q{
zoA75wg0)#E34-vCPB|Y8)s;}oUfTsFh@H0J68RhIG^VfER?uN8^$#QC2Udks&NL|5
z=00eBc!}(Nj`khRC-NNx?PY8ziT{*QY&v}Kl2M=N#fn|J?&G`yoN#C10C#4-o>JKt
zA%3D8@w#*T#>YlesU*hrSUFfPSNUsRkG1Sm#X9&A;b|y^^rSvG?r?FXF+}f$f0kgM
zs~@e%!#OTBr{!cM`k>;A8$@hwyuU9~0)J?t>hV6sW<{K-N*??VE1}K;2PUI$3ccZ;
zaH}kZmhuYeeR|CXB241KO4rEZF;}Oi758KWQNR26VaZnpc0#g9=%YueEf;9v3fDXo
z+@LF>@R*4`kWI?tEt_;A=eM`<?!CE#3HMCs+^aoE%yc49-bm`bce1A(-<;=;ae1M8
zkN3BH-1`J?0&5wRxN!YTN@nV{tGe;J5%DRqR_cwepAQWqvE~E5c7+Qh=p8h7M}!UO
z_bP;!@Ya)J-jH6`E24TqXQF!iT`iZZEnJhHoe-=Gj}oSnlgLs(iG8C;D%2Bxwojyt
zra4Eoj*QKGni%8v0^<cb)2xe84i=Ie*<f{n4wiTIseAlviO80XmH^rkUCA&Nuc~v9
zH%@kxl3UuIIx{r~kv128s8vyP1%lq2qYj$GkvH3)<uj2if)Z$~nqHNVVsTU7ilK|R
z=bp$Dx{q3i3<-8mZRy@igir;CVb~;K)4K?V7g5)zlA#w76j(4kKZmv_G9VOVC!s5l
z?Ph<1B*Z2cv#!d<VGm4NpsV5*tmM=^%-wQo4&+1|-A+Jyi_Yzjo2y<Qo1JU2`_tT9
z<@a=<)0Z3^20SHg5_9SRhtP+*U()DC5GRx(Lt;WZWnKz1k*vKk&QRVS(nXQ9d*s%c
zLHI=a`)-<S?v#Dhx*Fq%k;S)Lq3p<uGUa`{hY8#K@3pv)72!8mh>FU@WgL5%ohyXi
zL#L=eFJ%o6SN%FfsLgiHZW0odrXCSF_Te@pE(*2IBooVHTr2aCu(vR4BxcMOF*YYP
zqc$&Sv6sUZ;(B)vE$^Ns^7@eevBwndTr<tMGu*KJ*ACwI961M)$13>(6-JTDfntj7
zl6rJRE~2NI5{1O{lVr$6LKI}8<yofNpGVU1-m6ZN$N0ULK;XlY%s}(E-api49sZFE
zZ}re7h4yP$U;beBI#Ew7+Pk<S8VH>qT?7r+mvW92vx@ZJTtzKE-Yu(A8;HCU*ACvb
zo-|l~kWYG6y7~b(;&FTYZf}4KN{zFVsYVYr>dWjysUOlWK8I8Uekx3Ix_`<bA=NiC
zFl&MmldAfy<xEWV0SP>>OB!3(PEi2D=5iCYk@3B+%*2j}oP_VBkxWd{!=ELj_&39?
z!^3ALzWuR!^XMj;GZ8H>ipb$Rb2RVTGhejPL%6V{i+8ch{6VjxT{g*izaGSQzZ>az
zNjf>C_EUtOgy*N=J9SxWJ5D1ti7#`!D#Up=Yln|ANs!oH$)>m%zZzq7EoCa2nt)@<
zBlC%xIh>>v3O;>eI&qKelQJ)PzAQzIHj}2@v6HaM($R-bwMnEjVkbC<!k)*zgjHL1
zql-N`;mV90kG{k)W4NL{G#TSCvuGd^)nlYsI4^o{IV4F#&dPd><@!K^WQ(fdCGsMN
zsn5@+!u4SbkaupPgh*#pmC*OehY*A=TlY-yL`<Zx=Y}O|p{R8GZX8IQE<uDCUp~Km
zTNxBypnZ@0gpn;;r2Ua`9I3KuxG16S7<^#H1a7j{s_$_S0Y@H-V~9%0ccM#{Ce!;B
z#5!Nz*Dg1GTo|(1x#t`Hnlz?hBCymSWsIbkA=pMlocZafm{kGA72C3D5%%I(RsFHF
z#Pddz>YhzSS79Mk{6Rc&PsB2Ms?#EN_<`vTydcTMgH*CWWU1);%GueBc(KYK`W2n%
zd(u#T#m5ERgwv`@4IH4yJo-pm6!EP~E)OsH$8WT7uCKivBk%5sBC0%Nn(w?nMr8fK
z<+-2qI?F&s$>;dxCyM3bGHhaQZi-F*s)j8qZH+Nylq1osO4U7GO^i66m_H(n`5%#B
zy42y>-y5~OxQCyKD!BB6`8DSzwjb@U`$UgE(PJgCbUt{qGg1OI!C3UXx3NMWvb`Fc
z7u5C4`oWWkQ5Ii%rP){QkP{K}fX%Ft?09op3KYz@Ob<|&@FkJhk~OtHAU}aG4oE|Y
zMs%v(&S}AKP;cf4xsS}vH~>Evrp*?X;o;Xpc3N)x2uW31*Vqk-rRfugo4Q)0crZQ_
zX85_g*jR|krVY;h+vwTA5n*(a@eK4OH&muBGjnIej{`YAEyk>P#^0#F#Jmf8J1TNA
zQ)a6vlb6o?YpQ4>kj~6a@<osn&6~$bi=*O#CJ#O(KCJ2%I_of13i>VirdW@m!oTV<
zP33_jhlW<`^D_+csi;wHtYu!|#C<C>^LWgxlp(ozxbJ?ZBNzBR_1uOIi^#eG)<RFW
zzY4o#FX<Zm!YRR+r&2<Pdwbk-FX;Y`UoVy@{D?F8VMhc_jnvSSmJf<1GQ62<dPfjn
zO8n3t651r=hM&-<aK|Xu9{7ZHk+MAPdnxnD;)`&C|3exbh@+^hD!v|^0MX;0BV{hB
zzthN!q#o2xGJAfO(tcdOzX%<XFU{s6MS2=GGO&hpR^2tis->$P{*Wwi?6}B6q{xZJ
zVhqJq)VRQ(+>`T=+LAfoT@J+gl?*(=eeC!O7VFO72oiBWCHwdd+eh9MV?A_jk1pOw
z(Cb(`AI|%ZDW$&5Ko#}J<3ajJ_o{t^x{)IbC&ipJ$4`PKlOmg9K+PxhYaawxrDN3C
zt}F{Tuxy_EAp?n82!~53T_@g(R;xgGO}HZ)HC%|KVfzT))AIG67cn(N`5ad!7j4@o
zR~Dal^uPC$7Y8E#Qc)tFm_%$u2{P-+(L@TE`)MDq!i9>ym9NW<Vsp=`F*Mot%)8`Q
z=Z`ab7T546!aZdah?Pe<24PQsyfUU#L=>VJKpS?xBDFy>R+!CQD*gRSve>DvW*~%N
z^kX_>m6>QWqnN|Y(+5(>CB#7@9^Zo&UE?^Pm<x=JdExS8K_4b*z^~o9{}f^N#D-%<
z*JfDts+WzaC5aypqF#5^n~>fLQ^bEGnKa&RgTgX1#<D_+VySuYUTd_oX=GWON4djP
z(>?tDff7b}53VT2s_!@MEPsa~vd?*_B<_&tufNjn#mP3YK3_%L%qZ^MCBQUSjQQn7
zfXeba(LU}C+%!wGChpw8m-dln(;{-)$Jp&SaZfrNzZS84Aaib3xKKj7C;CXX+Vrpr
zZbfeeWoG;>5=Ukh$uINIKR9kgN~VJ%CYQ%rB(nSiQFG{6d$oV5SE<J&sH4>$H)09H
z(LFJc(|!BtJ_H4uE!@UKvl(91d12E$2h-VbTS>hjmRTF&;zaI(4qH~iw6)&F62YUV
z=NZPmCo7VQE|-XC+;jb0@EwE_Vo$<^g`ROr=_G|f+h^O!*(u`Y4+w<=n$+-fv~;|i
zao63_vL2_aDZMLVH^gawnwHi7^Fu3E6Mb|jBLduhzuP<0Lwqhq%byM1RC6J?$zy{2
zy?GmRXur_|cm3&+8E%Sl=`lBa=@@G=`6JaLRFP+fH$GucRau!uX*Z57g=@dZ*2%K6
zWwJMpT){Fnk_mT@`kdt;Cq5RUUPci0^7O5de-Y;GHU?YyJxgIYx!4%;%MJ~{t7DAC
zMfWV0XfeDh?i#|H!PS_KAXg#cRnx2sx2mwgEmbzpL&F}S#ZRab4N|->gJV|P$(bf`
z4h?VAVk~B=wIhGycs^Ru-*_<396;VT#z^Ui))AoHXA-8!Jl&Y;_dQ;ty>V2UPu4xz
zrHycwOBnYXrWLZ-rPvQ+f<tEor3ejLB_$|3A6K|(pcJp-3RC9eJwyep{Xv-#%oKs9
zN12?+yly0q<d)e@{G&N<Z~}Gx-(6|r1e-p5*N6;1JlV93eyn6FRFsYqai5ei5v~Do
zjbz~ASC`$@b{;}i_<}D%7$k^U@rm`uVz`q<HOfP=G=%74ge*AP@efDXEvajW<`9cN
z6%n?DtZbvNcnxXIbPa`&PC#2Ujjd-<l+K-yh8P+B;>ri<+<gjaweQoP#FVPn%Ufd=
zM!YlXgrYde;07pn#P1LN78|lIU5<gbi$?X*RX)KWMNP`R#*aj2UMZ+4h}^rUTKq_i
zHt61g!N$|_CIPQ0%ZT&Uy=fEjH@kZRYtl7+@|N=xHG0)z$w(Y_9cS2uyRY(M!bsLM
zF-R}gcZt3GM9h^-GkR<ov;r1Hh0j>amvzH81WGMF@0;KE$`^FeT7RGyg4npx&LHU%
z{`jk>UAp(}vuB}?vC46!K2!E+y{SljfGAeDKOb_F>;KG;zJ$^0)B;lQ8t?I}_T|Bw
z8WW<xdmSX>7yiBu_`6L?qrQ8uCe@jLo*_O=H=ftV5P7dCt4ONyJhXA=(ca3?`=^KD
z#h*2|R_xIu9$*O*A`#eC^7oanwYsHZd7U*3ZR~o=4+Ia~*N<UWtbVVo+$KP~Q~#oo
zh)`D0kdYKZ;#n%X_BA^SuCYstpU+0VzAPJuQ{!~}#$)1I9N8e?tVH$+Y4Br+6gpe)
zjPn<bb~vU77?Dd9XEiUwR2t>+VwaR2wNEMP#;T9=dg36ZK({1v)vm?_9`Q+^7`}U)
zgCH1m+Nxb=P=)2XO@W&6Q=jZOo|Hm3qL%U`pH+Q7Pv{nhRH*H9PKzhKS*kyNFs;Cb
zTUzDT&3wfE)W*;l>k69?;TWSdl_I0JfnT0p9=Y0h{yc-Wd4>i5O*{Lp7H=rcL9~>{
zDuPyu*khe2b-dD*=HVN?)zo)anyKN1k=p2AH^vg)c)GYuj}_d1V0CgY$CLs2Ir$8$
zF*?0fZL(N+cbEiR-@{nc=>l^^=fPe>4s92awZ}~&4I-<&D@e(3$MCMQ^r#dJ;;#-)
z-qAfsUS}4TdW%&rYwoZY<=*aCJ~DR5;=cA>xUmkdFJkvte5>dzQ@Wr0M76Fl;oWOd
zO>0WD9s;XI)rST;wC{Jc)JNtEHc93&^bWX1KNW4uaKuEvJpG>i7XLbhf1D^6LqLdL
zNC(0zNiI>#I;u?`LA?JhR{f16K^m1CdbmXHiB1u{L2ZkqaV$sUsmpo$&-!$i5*MzS
zbEFl8c%{$3F81fdag)@gd9QEEzR62DL^wqVA!%20Z3Q6T6Cua6<Fg@;Iv{B9?@opH
ztXDoq*L)(9$NKRK(f;6SuE;eLYM<PSeqUI`-uh^islt7oT<0Nie;}KWJ>mmrIMrmj
z_xJhds^eN{v~Z-@vvG?sc6!OZ-gEIIQ)dRG1^igEJeq#S%vPP4XuA2nP1o2p^6R@)
z3^kpa&DWN1syceq;(Ge(Io?np<tV;;EXqmk5KDwaX!^`JLx0v-R+P47Cb3C7jlqzW
z?ETWK=gs$Ie{a8%vLXv{)wy4tGP5TdqqAydNE+1sQe<W~ed8Aq-ql=&yEu>E*DN9h
zuSH%;?@LdFkSf6_RV>yWb2Vk*ujdR_K;TRZqJqS^qn3o87Dly-Ghp<89JhuDB&sn`
z<zQ4Le_?L7FbFaJZa9)Y!*mZN{F9g@X4-mXq5~-lUy*$W8iGoWnbLgZJ>mN@9$dmt
z(5$_mp*C*>&vC_6Q5B=_h{Kc2pp)tf*V)y{*3gz$5aMe;Tf$p*eXK8VhDP$_Tk&kv
zh_||cD=X7DevN<4vW?PigA`dyKoCxWw2NR3$M-n+i9$Scs}mi+?50G6*IZOqXQa()
zIg$_wsY}Y?9Evcvdtsjv=eV(wAZJSRZ;9x`2)yP60uMd&2-KOf<25$!W2S4Gk4Did
z&F2-N?YusWe)5c3WI&Jw{pnM-6xF7NbG7z#Bb$Z;J{es1OXn<G-51Z8Z1FK0g(nh4
zKW9I8#orD7xX&lLQ9QV2t%hkFF3jZl&=SksjI6>e?vrmrS0`z~?!=SBHn?w1;tgo#
zX-(kBBiKq*R8dM)^zTy+aJG@=^NwG#OA$L*K_#Cify|A1IVPJ&>mkmoF&gY_ks=&z
zS0;}QVm2#DWnBV<$$&t?2(<^e7J7ub2*UdhKI=IuyS#g}Cxb$7+_LXZygu*jz9%45
zl$31l8VR3bpIR)Y``uLIOFOOi8<HS>w4U!QYZNcSqE1{D=2fB08RHFmgPR)8PvE`D
z4(M2AQ{4mIJx4jX_8y07DXE7IhIPv}k{pmv;zTcS;p#qdH=mL%Tnp>^Y@8jLz+o<~
zLY_euRgxlqF7nkR;pmA)xvp8i*C8&V^F=Sqq!l{3zc`(601|>)ykbb?(;c1{Pn2DV
z_7;hqt{*jEJ0-ODeDFhI>!~2V!hAhA(W@ZuF4e&`biK?)OfF=IYdJ=+emV6DkIL`%
zS^bF4jWpr=bBdOUk(`%8miYDc_Ex<_jt%f)8EDh-dfyaede;3<SuYCcnA}m1%RQWJ
zGFFb^BuC3)Iomr(nY6REPcZg&tuklZ`1{YVA^6KqLD|A56yP`t&k7tnY0Am+m^j!m
z7@0a4n=!cCIf7#;2!v12-O<Rz#>|Dp*v!Jpo}X;5sf~=p%9Ni>gI$hE&Qa9N(n`|P
z$xOvlUe(0Y#)R9HOi%!Y&z%PhU}xrHMB;8|YwyhC&QEqXE)Vz_cA1fk<gST}4L_Nt
zoFa*+gOeEv8v`2y6TP^*l^YA001648lc_n6vY5nQLx6AmWR@;2jy#NvP$-lE%F5v2
zWWmVH&CSio#KOqJLJwNdJA2r>7`fBiJCnnP_%nu>nX`$Lm7|N5gFOjsOe13lR~LRV
zGH{>d@A=s|%E|p>czfr+tN`r6=x*f5$jrdRXlKXxuRWYy#NEIke@*Cr?BT2mPVX3%
z&72)volMNc-OTJ=$p3W+Q<H!6cXV~Ky<3i{38R^<nH^~A44%sTZ;yN_C8zk09xxMF
zSlKz=^#aEJx0Wte=KnO-zs(JH=WaRwIuX$QALIU8>%Z52*BG>tlj9L{FmZ)VPfCoR
z4EB5;QwI|(Q=YpYIoP<^Ik;H3>Di3f%;?#eOw8zwxy@PWjm+81nM{nBO;|Y0{$(gB
zduJCTdlNI*P+)KdD=-c>lexJW3zr!^H<uY3JsSrz7=n$JgWlMfo5S3U+nANx-1J|D
zP;jyWUTI|eud{*;WeSF3GUjAAVdmnXXEWzE2Sc%P(;IP`8qsqYvly|Ma5I~5a&g=Z
zWop7B;oxLv1h&)4&d9=y(b3-G?gq@@Ji>}n{A4T)O#i&1XlvwR4m$9Y$y(XFy8rWr
zs+FCYii;7<n#`Q6oSdxSlZ_Ktg@yH>jnvJYoPj68#$;w<VEr>LY+HE1Vt`?dV15b)
zxa$Gd!XxTrX5`}Fr0U>c%TETI5(%v5oqLn;{n->rD`(Kc1LpC6`MiqRt3TiVgn+Hp
z-7ON5JGbRAGWqimXCpVWKjsAe{@gOLGy)1Ci0{7~>hJ4T|HEc+va@oVGO@DJvlz3o
z)3X_~n9&<Cu`<z{n6j`Mb8>R9ns6E2E#n`fJ3E-WK#iQtge|~I!D@g5-K~a%`p%Ru
z{=GHS(hO!NCKh&jCT4mTHdSVJ9`HXa8!ZzP4-*p^<6jGAghln=E9PVTf7yiZuED>+
z0O<GU8h{soR*e6EtG{dpGvoio|Nh#G|BFumL;v?7|110c*IfTK*Z;}_|EtFT^{)S#
z>wjf||5fAvde{GN=0f>r!DD6*SP&GHGYCaG2%r=~G?sZO2Dyd(%4#W$16Pn8CAFL(
z5Y)%8-*AwWw5Q<W0~aYd@dq1lPcbpc(0-TkK_Da$DKTMH_nGZ^4{z*~>+XZoHAJa<
zaL7TlWJybs4=L^$he=@J&{%#cCDY`?3wp#@=47chmZ@DLp+xuP)2C%AewreyhtWt*
z_k$e86|rPH(UD_fOkexbO;@d^Q1E)SW<<XvYU1VPZ91C&w(Hf#nz=WH*2Q!mOs4Ij
z7?>R7;V$?Ps)q-kOBmoIh!=cx5`qsT@D3t`WDX7j@pF9e_r-s<`HznO(&nEX|D(;H
z!S62qv(0~Yyqmy(bo|dY|D4r-&itPp|Jmlxa`5Pao);NtX(dlo=#@@QX(AxJ3x$J3
zKE}q*%gQ1N4ho{?;?g7~CH**U8@2Z&wf}h+6G2j}s=j_jcsNQzVq!^iv#yz$8C#wd
z4h{|tHMQdH%{9)WM_lhh9W6R%YHipphe+`5K_JA~#Ke{6{V6Z#>E*1ftb%wYKYaKQ
zyLyHL9K}1Z-+XjE)3;(Eo%g|ap6cjfS@I9GPau%iZe-{UIUiq|1TgfS1r%hYq?ANN
z0$5mB?iu^oAwGy5-`Zd53Xh6fdk?nAoBr37?M#DnA&=YkQ>HE1!E|JQRRQ#W?Am!0
zvG3KpKco2$m+Nrifqf(v7e*-IezhFb+1W{51c9_q24EoI6xns%ZS*;^=Qj9ZX&`#Y
z8SM98JLEy`)^`tL_%0k}<10SBPq<KN;?FVE=T$mj))9I6QnwdLW4WPhHi?u9?-CNk
zYw6O4L2o}ZgO9&uc$AT#yJCaDnSyv>F6B4vh$}`xI@jRZWKu^5fg~0cRT+#pzpQtr
zx_bzgTi>HyzUukA5%QM?I_E#41qWFFYJi8dFFQ__C7UU9C{lwt?qWc(uxb#N2GVB<
zo6O)KRcv#?p`%ZNcu$RB<ASH98`wdQf$s{{lh2)_djcn~;+hK%9tLe7kVVcV2DbQ5
zBy$bUe@!HW*K-%l^|QSeY~ew?Z8N)fPaSczSR(lv#H%U_Tg8)NFkfx=?}UZKAYL4C
zIk8n8FPO)w=0Cu!A=9OYbS)Re%V`Xo!^D*HD%Vn!<9Zk|4s0TZ!Q+D4Nfg&t9vqRv
zU{Y|9#VuQ^Hj}n<CvtGXIWzP1MHtwx_Hak&;o)1P0Yz{zS3aF*7zqN=Bw00Wyd2N_
zz=pUq;1duSXaype&M?lW&Q=kLAIz6?8Rispm$9{>&E|KOiEN{JiUM5Drqx8@cjmsF
zOucidUX$D1XpeTc3yD#~ZG}zhk;oBK9^&G@B!@r@rRsS-zUv*aUYVyaudDzs&j*1~
zYdtQlrKJ_bTRFQwMUM*cqYRM$JvS|KRqB4QRoSK<@g!p-(zLD&8Cw+$5!su;t3y5h
zF^Csx?|!#ztNNoSeMyXyMM}BYxVTiQ{RLkla5Yk?$jL<y4i3WKyis&=`lw!^t3}Uw
zwW-lI;<27yIBDRU?zkR#=b|N!8cS-Gh4uBCVPRptJv~JSH{uabUhwnlRh##Zq_Eq>
zzkgqZMJ`cPRW-ysm}Ky}c)V0&>-uzIawS@@;$&;GiI=H*@Ph!Ae43)B=F=t%IEdp1
zob*c?k9EXucPIlU4@at#v-5bi=!5Q>{e8#a(9l%RZLhtS!qSSyUS@i3ZmsonXokM~
z5`vVnxOgy+>t<flZlla-p+e=Sx1gOyi`VG{ecjyhCyxB}fz=QYXLM&#uA+l_AbN9h
za|wN&*S=egIZ4aN)Owy+=NA;HBNS(mfIn$?d39o2*7thek;mrc<d9wO`QEDR{b-q=
zDNyZ85sC&Lryk6^m6=<R(e!ny30m3P*N20Tzs1JS!J(eY>5xc)gN<$Z{XKJR-r=E>
zTnd{Mm?_cHz!nGJox_X8;Ct?pk&}a+{F!*NF;iPQQLeK-&0$szU@8L@Isu)}&&#7G
zCC#27L$02wvC3UsGz3F($uR<WYjWFZE|i-t+pM#j<$$3F>XfO^pq{#VA}|-g!->3~
zLBYXR?S62*-@m)amKGH$nwu9OViIk$+x6OOJ#K{V$-}%#y#DB8*GxosczjaQdVPyY
zH^$G^RqE%@CDM7J>YkpBU!#dD{ox;24`m6b@IouXBO=(1I`4O;GiGOjY1-7Uyl;w+
z#|{n&p`oWQ|M8<qtyKL5CFPe_yubxw*<N3{)9cnNu6&Ig&EWT9b6%7ExxP;O;)Rq<
zB3-e%>C$w&tW&WDKz$KWQDhGYq<-q_RP}h1V^jXh+|FEs3L&o>T3Y*ea}X(fPsEzH
z(QtyAAi^-yPgI>N)|OQ+zUO7dCt86;a6h!H3=;~1VLLIHHADms(w;-o`GdJLor(z?
zyu1Gg<AnHa?eS28O?DT90>DK&6exl7y3_Go^7Q_lKc|q0fcHQK9>d_p&+%pb5&2!1
z3`6MxpFRdK<wuNwg~;s2jY1=9c!wj7-Op&Ki-g#lpnC;a0LCcZ4Ec-HANkeSbHR3n
zHUO!64r`as|L$oQQ9p;MU{JfCOCkbL#^W9W4uB7wU9Ri*rPFA!GA+Hmz1Vno(meoD
zzhH!BW*3d*#)>a|;7FC@h3@8Q0~~876nX1}=kK`wj9$AY3p~>C!y-zF88V;)?Y;p>
zqS+63`8t@;#3dv~9s+}ES|T)on?xYGB*A_|I@8<UL`LR=e)~%P9N5xG4j$LG9H|3b
zD`<D=F+L&T%N>Z5%$?ICG9tnZCum$fZ3!SvsQ!rR%a<=~VB^7ER1_MpUWtZCE)f_%
zd7Vqf^<DyiHtyY;o&tm>pKHMX>q`Lan+N3X8}{xLy-fYl{k#6ua|l}aFS)LtmQ2?<
z5Jv(FM~Cpr3x-%58ymx(NrwtOX2;r{dkZsQ85;C6R%iMHMMXG>pW0GX0LtHA#FwJ1
zweBoSj0jt={lYIUQQ$RC{-h3j31RSMB0DR<aUJUp3YF?n&8e~f?rxJnz{?P30+I}{
zA|nC6`!7@A*&t7)D#0GLMaZj5@l7yPgxmADsL^F3$7{DC^lu0}!PuU9Aw`3Ve$?dZ
zwf&*n<>q|dYqbFx`uwi1&CWuVTC8c%nTGI}Pj<649!KVp1l+NX>-~2QS1awd_l_bV
zG|02xM!VI!K_U__ANbC+EeE}RbNvk8vuDr3qoXy%#DZeu;%c3KN*7-4HLMf$C$n~&
zY~9f-A|qZUYfNbNkH@<pIuiiizJ2=^Z_l8{^4-YjD7(!BUAb1Z1b}BeX5%0fR8&f-
z+w(!+vD}xKC%=ZfOt#wFS)Sh+bPFM!i&Uzls(yVJU>=Y6S=EZ0yL)|SSC>s1bhYQ5
z>#n@d?`aEms}Tyh_e{dEYz{y8e2#}-YF8Qh7bs*_IJBNiI5Z!G{P5hO2cWj`IqY$G
zc=*WoWY+n~3R~^xueJU>MpSb8o1?`mVe!8aIC9=hTU)yXWCmJx_F8MgfPnavl#=aj
zTd9#WZs!{3b@}W4E=<phjY8P7>kxUvGI&)EkB(lzNYWRACNuiohQ9uOwNMN~f;yY6
z+S0)vEnaGX8Uh|!xB>XxOtVMzWDV`oK#<$c%-ya;sO=`Q(+R9Gu&^jADJj_-wCUJ4
z<O?4$YJC&~aW#a1t4s1tzn<Cm#+#CYf*Pd5a_w3c*fPK)<CX?C=Rdx9@gmA^4BKv|
zM#<5!3IzovtpFDry9i`)x!38ZKfEeerrP&fFXf>NK6MOjS57-qA4E4cHVTqN0UNyY
zLGgNLg<4wx*?=WZt^NyMkEv<snPC>--~j9%Tm2T7+I$6&5TpX(*y91-y<fewm1D5{
z2{!lFVIL#aT*ITg$O3Uvt@DW#SRIDu*!50MPS}qh=K?xQ3*dOAJ9@I+A0Ebp`_p+f
zKqN<PP8Wo9=e~@Vjup7vW}Wvw>W|HP|Nd#AwLxck#NBS$AS5+7)Y)2E7LAOI#3v@I
zuQ!*fmn%z4hp)K(+#Nbt>9$$w494TMPy4++otw;RK_e(=ASETG{n1pamMr4yQy!Nh
zm>atMQY_bPP>y&)A4_33-&iZmA05OS_a_8sO3i4SE$g-cf!^XLusK})#xRZU)%f-;
zT7G{1#!lVBgbZWLYSI4n*-EtQv{@1s42Yqn+YPLAyA4MOfc9&DvQ1ta%rq$HS{d}c
zaSlurul+-L7Coa(cIk-L2h_$ts-qwy4--P?vSVXo3m7BhN?*fvVbQZAK{Pu^Yu0(a
z#9FHq9gj`YX0k#pi9ruAP9Yd<FknEbpnVd*H>FJ^F8~XuY}#wp5xl*sMnrq6xZ{5N
z`uYNES2qj>P`agub03xTct@YcRy6JMNR7n5P$3uv_=?sQ({HZg;lT~7(A+C~Gn&-(
zc!r*Sj4obB3F3WxXNN{d-04rX`Gcb|uXg#0i@RsAcJch~U=J1APo|@J!Ply+NGi49
zd)u-$kX|a6${`i|=+PrK*G<)Q5SRr?fPMeLoQSUZZ`N8<^m>h|$jHdr4NmzNCtHPJ
zo>pJNaW?0h>IANjRYv<hJll3I)9~<U@I2Yfx10Sm465LINh4dTTKX%kk4)iHwj3Y{
zk5YN=UYzb#0v7ixj4H!1MZo96#pm!Fm3FmxWbx0tYJ`vMcy*(@Yz;N=smw@$T&l}f
zd7XNyz^i4kpKEI=`WW4F$wKST0UQT4jXbL3i>@SahOQ-B&`?pmoNt0cQ!?V>asfC#
z`rC|*Wyf5Rj^R>q&o@5J+K>L;nPoTac_=laVc?TbmFcYv;%LnowR>*75|n-EJgy_V
z^G#N0!2bCkkAh5g<v9Yg8=TiSlPxk{nMMIPod--Xg~cp<W8isb`Zvdyux$k-nA>Uf
z4aEx<mX96L`7k=uj;iZ2EY9P;pG7{VhS<~<in)~&!9WJWD?JtD>0Gv+i>_Ord~LVs
zk7hLV<@&<jS7)z$4!+=mpcW4Pi({{3FQHVQaW}VUyIo(LL~S)RH1rM*LgyG587E^Y
zWyZ1t&yV&V?aD5r!E*n@U6D>~oJG5s_;?CX)u#5Vuk22O`gG%bJrl(B%rlELVUV{M
zjk`s<;9JoFP>^;pjn&#{z@o__!=qrOwW^A#)^1k4qod=ldb*AZ4MOn7ci9*bk5yGt
zT8jOtq-gYqXar_7(NwnlP7%2HMc%!0x!a^buk&dvhE((zpvgaszpdaKOF?<v`SfD~
zk?++j05Y<%@#*R1xcctG3j8PWFe+*_T9B-d(PSo<1G5|M#ZjhJ;OI7BZ1at-aTFkR
zX-?`oN7^^-aG(?Nr~*TBwh(yk$}*t^-i67=?^#MI9UbVWt731nw5BKY+LjBgKCyWU
z0_S2$x=vvfFUZJ5x}ykL?|MyDTVTSN0c~)IbnM5bV>Qf(%--4VhEkVrlu5s!ZI|gV
z|18(3+XP^kL}52quLO#hKR^rMIU7cj^5giGC_7a9)IpIlwAc|?YcWXFax}ne4X}v9
z>SS}g@#bPnjxjWYmlc`o4H_E6Pliy?zRi#tJ2a7AXEPXAzlh^{DU{e=(?0t}|A&$+
zGL!vUHU<dE!#2&GrKoJhG;iPsjT;52Y`{}@W@=?(kE4CBL=9e_Q!Wi801vj15*Gha
zTSvW<T1OG04F|cO68L~pDwJG%=m`~UCb0Aj3Z?>J@1ndsu~?U1L!|sSXDdOx8YOe<
zJJ!RqPFA`QAwwR=b|T%*XOTH0xzPe89!G0dH&^E<{92A=Qlg<4<6sCRXoczYGa6eq
zK?sS^u(L)#Sb@t@Fs^zk5g}oDAk}Py<9Z32`LMl8;QR-O9|RLB`Vu3HUfz1LBK`32
zute?_*l`^ASZ8{vma?KS4p^qpGk}5mDLgS>xvWO=q*8cXOVrCWNkPE~0(k3W(qNMR
zbS{^7uX*&?=_$$=3R)Bz7>B-J))h)Ab&zgM?_2VZIsqoU=+9Ds8chR~Yz(bu#)SNy
zs=&Rn0oQOH<Ujodz>}qgAk}CZ&g>?)s!9>Iuupas{0*;e&T&iT!x`nD^Z!7E2tkUs
z7pu-L2hFRk)(6rBUWJ59gOmXZF%>1HcK{p5a>To$Wb)Prf<RW2Nwk5bG$g}rk<RhA
z=8jx0zhYoD8_1ABLHE0luP;w&DJaAM9Vkc?ZDdNmpzmOYLi~g<Sh3Iool3SUn&bh!
zk*M7OM5h1-??(p#1#I2n0Vw2eOh8KV-*V*Y%1W~a7%MF$Rm|aYw!ftP!5HWu<7F?3
zvd-Zka0GxAJ1Z+I=Y0J5(Rx%NjVs%vI|^&p`yNat@&9%9%By~58(SU`9WPHq-y?tK
z7`DCbkfRkiWkn;A*RNYC`|{T@AP-}nFeXwEE`z2-KF3O+SOsqE+IY4cK`E7-oP6<n
z*5TwERpxtrjEznh<<WY)%e4l&y=lFSa?}XS77c|Jw=mpHb!aWd6gU$C-A6%=V7HxO
z8tuzoAD|e3@!p85ErZy+#~q(zMky&NH^44CjIda${Yl&6ug?Gnw7)K_`C>&wLjxE`
zVP~g^nyM<-JQO(Fquu6oVIa$p`1#B-3*(NWVJvtbMcUUdA;Qu)&-Hy{P}KbV`SZ@!
zk2}kF*ira_wIS*}j9q&Z8z4mSxE!-Usvav;Anwl;TsSt_t6#xDC*V@V=dc~RxxRE=
zY=`S!06+W*+h^Z%SXG~nser$pJ(Z7(zTe~;{`T$I;m^MDW)Ei^T-;KHOaVMzw=(TI
zd%ZmqP&OCTExgtN3%41u$=d1(y1MeV`O9gm3<df4UI4+dKb14dlsJBh32oF15*HVj
zkd!pJIw{o1lnrR!1kjhhgJMqK?ELU29=mn?^7682^>oeI>U-}(05gi--pyoWWbtWf
zWeKzzJoCQ$i^7!8pO=6wZv^~syg&|LWzc6*54y42v^UWij0zJV$_?6nYz)%5^?5!V
z{|KDVl`Fe4r+MEMEP$4oS?MvA{Mgh7lhXM%%VDzo!a|!L%^uS7@)JNSV>kYcWH<d$
z93=7tidx%g<*kWw6=7ljuTg}hHWdvLfCrxloQ`W4&`YIoy~;63={JuvdWud{L^79*
zY^08(%JhaALNb)b7sXF=%3H_(^Bwt1m$QAN?wvon%On>}3SoCz?a@v;*{&Y6hQR=n
zQ2@ndaR)4+&}&u+fslr#a02}Ulp7bz5sWVV?2}`cJkaf_r4;uLb#rsxni5ch0R?g#
zL?<>O;m%nqtA%`SqRKz{KaC^;!V7kGwdv{U*t~#%dxYFhf54F5pjDUO>o;5N6@y;m
z*tx|M{=Z5rhu;5z{OjA{+SuBbaX9>-le#$QCU%{*tK&v_vD>)CaI!O7*HL*|nE6`S
z%BpzucsTdv<G+}Pw~DDL5PRmFgBG%c1LNc3MvJ=b7h3g9zP=p>M1GxXLcU}su!rQU
z08I2oQqAulaGZ7iqtggKdnSj^)&O`J3I>Kcu>9yTn0_r_liw<xH^+*3w#uqiH8qRY
z`crLyA_A-S5HSdf%=!{RDg}WGbHFFxVUPn!;9Zlb<Jb8AR&(Z7CzC;Heo5pZ1}j7q
z6=MPd0$e%}a4iSYxbGOx&nlgX`x<!%DKMgjq$*X}ww+K3<e(lak|kJC0Vo(VGxO$L
zLp6-11HlFsVS$L~BfzE9bal6jGVH>Eeg*Ud>5r$6c7L?!fIUzJ^ti^X?+GkHC@FSu
zfwEbXk&;q8J3AW)X{B|i_TwdL00dvn$tx%%QA838>Z9T^jMcwdsR4PnKaE@E{QO*6
zT6zQ!BH2h2jZDBPEkP*4OazOU!VFJoBO@cvp9vZ;t^=d0iRjB~f&i{TmB<KOyg$Eh
z-h0$;^W@^&W0WHf2;^%1qnWaqqtobV22k-q$us`J<YlEpM!rl^36Lj2(1D6DqoVmx
z8k8SufGkYph(`dnhnuX8!4D;Z>EI+;s)$Xg_Br&{OdyuJPweL#t&Z0EXWD$*w1Lo?
z#SE3gH5YkrX7LBie`Vh0q05Gcg<0)?!<NK-uIUzn0+S#ToyQfaTLb1lKG#xFgel^F
zkPmKl<0tqIE?X)VW@hJ~nV}UTlkcR|)3QM;h@T&`t>*X%WJ8#I4uJ%wiomUm@f!{g
z*K|v*XLwa$5O9%5d9J>D>2mx_sWY9|F$7c^-btLq&UIF!&%3h?Fa@uwK+TuZEF0Cp
z+R@0W3Oi=0N&yX$;^Xs>A3y&UfXWbK^b}S>kAqd?t*3K4KL@PjP9kxv4MxSao+#70
z07bT0_4F|a7FvotPrmADTfjYXVFa&UdoY9l<SV{I+FJx+P?W09ezHrgkp=Pz>@WbS
znNd)_PAe6h#3a#$x)@pW)|iW$rtw0Ag!cDO(;3l?uYlqU^5?FywRMTIiVC|~FSgI^
z^(neGB0sd?lT-vuP6v{gQ!C4xgOPk0cF$u=kZ`Pi4Q0pXRkmJ8!$jN%#NJsl3=Kh!
z>(YtXs`np^RslsSmJJjmAmZc{_zuKsH%=FP+cq4#?X}{{sEe2aU~-Ni^vZqwB<TK3
zE=DSc-AH-e4{f^(AWbBsrrLON2k~OE6eS`+&YH~oODY`ZB)!I#zOxRh%FD~wZh#jQ
z(Mz(->dKNInyU!|9St01=z_`@h?PKLEvTtstMo~adoDL(S2te(FvX@PhBS7&dyfBn
zHBMSV0nqnYH3Xcy^M=Hugn;m8tmf1}Zy4k|dQ0TK2q&eitD9y%m{I;hrQqSNZhv?{
zcou8QFj&^D>#v7)LlBT4M+@XeYph0HR(r6dlub=d1CvDSS&aZ!o=D3E`{Fy%>ND<2
z{EPeOc_gxH0$0vL5D<|^!I=cx*as7}NuU|>&jmSJ1auUGf3^f19{Emj#oY%lj;?%B
zD+Xs@9}8f++jg=XQ6mhW#`Q`B*TDM&D8wY{opAw!>y3!Mn-28j;iw9rE%7liS)eMf
zw96S9QiNqH%Dx<!CaGB}jv6h{Qu)>yIx5GxQR#baQ|k;4i|Fp^XLKf4>+zH7!2*6+
z8d}=X00h+IX^TvlG(G+s59kinz{_B2vLB-RZbQkIheu7Q6fDIztY^?l&4B!zTaC0d
zKmiL3zXU*>bUzXR`Sq@RY9|DIwZ>&bnUI$o$wd?s6LxX~nOssVkeID=$b?Z#AaaR<
zbBBi4=Ra|!VB22;%J)>@M6lu?0;{6s0BV8A$Q%Paq5JsN%2%szv6KZ#UChve*FpQV
zz+9SJaS8&%z%fhFXGdDRpfCn=S6)cL1{o4sPk)Ke#$=N=W1?XN&K*%OF~Q!a;2_{^
zZ-YWJAp{-2pzZd0FKQO-C{V^cH;W77V`GgyxwjM9{Q&|%-d}XOw5F$Q;xN+w14Thf
zOqt|ZP*LE|k6>)ogsCndTLG#d04s1VPxmH4H4G#qN+`A}fh}Qa;w6fuEyVBE{pwYv
z`?FT3iw`hR1th=|urcgs&x*krMkFDxIw0TH3$6T11GR!;WqS=a4Q(+Yh>)|2qd{mZ
z(AjFF$i-g1-0FBhY&*h&1bt4x<MIYpvsoYY(vF1V42+r&<murp0U_yP%iy*I*zb=D
z0Zs+JeE$4i9}xlPxr$4ne7Ys@LhHlL(i(qMI*_?RMdrENu<im#8zGmY5McRf13X)F
zAgRwCJN;@ndSruWP~vQb2=QaQ_^mV3jsWx~Hv2iQmg5l_kWUjS0QcHDZM$s&*N^9+
z3wN;nyw-kxX5{A{e>Hq08Yl<)`uaw}y&WXCSZo4<vg2bHz%W5}T^hJwib7+HgQMo4
z@9_*B9kS2g9X*Q3-~?vG6III#xL7JUsj|G=5ul!f#Q1$=9f=fplebbO=|S(`?*det
z*9bo@1D^lNr#JoutO^1S=yLOIa|Nb;0<qR}IgAQce^pP9Rp{}`WRlmv1w>(=xyfyu
z?^-vCj1zYH!I9c;B|8u5@5&i(&N;`xuaWrRsKM>)3n~&+20`IK0Piw#@bI@k8G(RQ
z6Dh^(w(WS%f(+FU$l!(eDHNAOqYNiW^cr1A=gv#1e;@;!X*GSisHxC{Z`E?!ohx3r
z`CUgtN2e+&`NsVF`|O3QHO>O!Hm_5ApYxwdup3`q1SkETvt{6V{6ZF}6Ll`<^Ex5J
zJrKykP^m`$Xxpv8V(N2H3^@bkOj$V=h^_2~?GQZ>L_8~RSvP&Q0bO;y+^M6bqmu=>
z)D5q^%e)$>VEu;y;4lIX(rVoIWG3qg07Z?<-#diU*}zX=d9>J()8s1}i~wNr?fV4;
z;0bCIuFQEvJcb35ce_pHI3GXvG^4u1zf)@WgJtU@0Dsg10{U23STM%hn?P%$JFz)l
z3evj$1$M#9Xrgh~NxjKDLtx71`S~^A0O*qcNER&S{41k+<^RzUrBy2^0=L#u9VTci
zn~g6T99mA)TTUi)<n$i^Mve+Qw`*-*J(p|w_J2x_tpGv4_>~)7W7u&|E>l1+Ruxo#
z;OtTwP~}OWyvVi<a67NvDY`ImsLsIvoTaII*>|R+YU%ySig-Z7ldrSrPsyLIw%~N<
z1{JP$i)Sq?g>=_|GvL3^DzD4b>*kyy7#e?(UH`6Yvl}{G|DK!+<3ARI8L_J6S)Fv9
z=~C*rs<5{M7Ek0uFbv{SQ%f-jdARH+Cu}q_L6(KRMga;1`B4)SlUi_8@keq4N4muZ
zZLgzNPHqmOeTy6xT2B~mfw(pdu-v7OzK+ex2O0Xq_{4e!C{^>H?8^o0MktEmx)VG^
z?&x&R+@UV6uA^}6!Nzw0j*~;#qN!JSFwqkdx&E_5KK4&;QM7p%r1c+qdc7&;!dS7&
z$<@&yrw8}(oJXuMcrV7VwyccKwQPg09v%D1lL9z+L~2%66%9St1PTCiBA^CN;q!2c
zOKJlLCa`nc(NN;oMc`0CgSN>1bRLQf0@MSh0sgR^tM}9F2gnF;g4=!%Nc14-14^d^
z<b;Hbj0%{(z1Z%L$7UG=(_ELAjbEPFs?>n$3J^-WLni=`TKce3xvr#Kx!Tt^)BPNr
z3!tE*s{!-|NVS;;{Xb{IB{*7aUqR?{CY3P)o!CzZyi?(FRd33+=$?Hv5NFNJ8dzu5
zV?Vvr71~!+NcdbKV>@=fN}UuKr9&7{9Y)8i`s>5fwj_P;;W-m;imCJ1>H(h8frIT&
z4h!;tVD)|ZENM40#wT`v(MDSO%BdFbqs~O!&3>pL*E|qwgJAF8P|76!`r)<5-a-JJ
z1w>`hus{6owqAqT<DRahmdlFc$u=GZoAzX^=7iD`vvLmC%`v<}3sX}|uuhOPxuDM*
z^0PWIZ~XoIx^J1ir#k{`{BiNPo$`KotP{fMvE?de{OjxONj)aI2PwNi-B*{DeY<f!
z4c=aet+7pJ^L9Vk2n!o&be#~qoivaU4cQlH=k-2Kcj-bO@!C88vU%3a3<Y5dr72=U
z69sj|OtnR{KAX-ja18lyp9h-gwp(L>iRvtn)qE&AVICKP4*ty_=xbY1XvfztQa%@w
zncl{jvAj^}1z$$~^E@^m&!2_dd&tdQZMS-tKA3!qDy7S`-E(RgCppTXo*Ljgl0_#L
z&~5QN0Yb2{nG@n7mHA+z+fJ;$&$x%DmDTu3Wt&^;UIb%FMKnHD%enbZU)^+GH*uXW
z+z;P2{${BW#<r<jijAYeouo-UDthhDUiTIb&sJ*u5rEG7Gxq1hU0JTzSM`(#I9CD>
z_*R!fw=m2@lyF+*+Z<+U8Uyw61#gskzkL|I98;-1?ZMh!w9@B$%7_F_414ni902<=
zn}j514vvsVk9jX--rC+Y$fNrg?sn$qJo#aIWIog01xmtAg66544@N3lTF;gS)NFUJ
zNHW-@QUo(49+WBT>)-O#BJe_A(8OxjEVcxAsj8k`(|CFbfAMm~t}wN**aA#UMI8PI
zRcCq~GDZP9gf?9td&#Hm`A$;qOPSkq%spJ)YW{JxqPVz~)BeRB1e~^;lgd)L*^g!s
zu!BLEz{L5(J(tL;JzD;21M4?C1e~L_;VHia0~Z4yv;L2wBDy*o7dMxYI&Hnnkt&7f
zLJ1^zhI3rbM{@5KU9W%7+GOsv!_ACEY=iO>5N>5afYsF13)0dET7J*iY>wokf@%iT
zlXK3XHkv?e`dY$R^DM+=Ip}iYXQECX?{1ODHBG3h)$AwhnF_t;+nXA~w(|luW3lTa
zdR<LduWQfCD#600ZSWW8WdPxkgw+v^u3H6>_?$EVSctv%{V$F;Mi<Snc4d(;F;62I
zNg(W7L)kYTot>MlzE@`{g)b$vFK2Y>Du`C~z2rR4f6fp)F9+3FL^u4*bdCb&A-3eL
zYnOZY9JibcQ{8HOEmam9$}=OVd?#Zno8a9KfvbJd>&u1P3GjAJxlvaLyQfDGukV%-
zL~(w1{$voaZ)eQ;NX{8Xg&hR$@ujYTI=u#)-yi$(t}l0lD-O4D5D>=KdF>WQ@dB-t
zl=?H-Q#oZ&pGdA}4q8MrhBSo|eQa|$oin-lfQ%gJHPE!1Gf&R~N+&|W*Saw6&!)1a
z;Dbr`#M#Swc)yD|&r<%k<2XGx=Yu6%<=Wh#T&080Us+d5yM4NrxjH6Ft+naL(NCM{
z;xhE}sZUZV*IYU$PKu+dPOZu&^7{Vz{5$I!akZ7o&YrX0?6LFUxh?BIB$8+P=NI;^
z=jM7|JGC&Nb#ZYi_cd3dw|Qhy1o0lK(5u~xZQ1^~X;D<P7h!+<`Z8&mdwuM>o^`Nn
zhTi&Vt?#?UBpG30bW~N*P<1_U3|FFPKA{JdvYD8=ZH`QOeQ@!%7p}e>>6ftZNvI&>
z+Rpe#RGV4Wp{crv+mH(_HASNp0vtbA`{rSNO6^(P`EKG#qF;y@@xOP=uW#n&8Z(=!
zs%|DNo(>qik31i-wXE72&Rv^t{P9qFigG)J<3kCd@1@R<$HXGw6gLq^HD(90HO5SB
z+0uIbX=BiLKTCDi9$Nt+*=}~*S17Qp@~rca61+mPuh1Rk@%!8c#cjSxW@+h%uY@15
zugik-IJQfnoXKVR@sKFG4KK7+fn+Y*NT1*e9}mv5>?Kk0W#6wk=lM&|*Ed>?$I4b)
z>KkVVGwZX>T8*oZ2I~y|pU%z$s;RBp_px9F5k;kQ6a+*8l`1Wuf}%8~*MKO!D!m0k
z!E<Pq11Kd3NG}oTT||gdrG_4g5_$^|T9P;8cfa%Qc;nqW?t2U!5=nM;_FikQx#s-+
z|M*u^R^Mpl1z^<P*kcsEM2B3{Z7{E>Yl1{_<4NJ%lJS#{p~@(S0LF$NDrd|-z@ix^
z>eW~iExa|uF3UP%+!?mfyY^K=ajco=K*P%n%^2$H8G_kCa=`LCIS)OZ2zEXZbJHDC
z5x3t)0Qn6Way)vS-P}xj+#Z;jhrif`YnNW#-nUObL18FxW}&{_L^o;)uUkhg>8UDn
zZ#>nzlq=HDP^lauzq9-OdwUsgKo2pYXDIBtn<Q-EWX$o-tccq2HMfynGo7%noA|nT
zD#yk`uIw#mXXjpRZfl8$=I9Bgno@%Qq!+H=XGdG8Kozb#q<{dSkhf-Yn+{e^wWY_3
z<T`VPN?3*3^rh9h?H}jl@qa1x$8f!#wW+UQBw~N+*&FjWvUUFo7pL`4Jec{shiDfq
zf1hk+Tx`4PFyEC<^~un}QflKh!%k1fijd|>_#U0+LIO;{sXxVa_xtU*9u6wYb}9=G
z4^J6BCtI0Abi#`|)IE&mDilMn**+nWm&Bl>I(cwlVW3+qYVz^B#gT=$3m1wRpojeM
zk@9o??9h{YIU_FamuS_H)=Snm7iHpIteZ0(2+=nXduPNc)g!MUspq@;J!dGBCk)!~
zOlVBLNm85vrxy8P#f_bf`{o!j_vGm_f*y;lHb=&fX`HxUQ~JE!_5D-(BaF&{)GN{!
zK{a@%_QWOyYR_ou!jJ<-P`vw^Jci>Yb%l8@*vPGr=2A53JGyFCH7xg5@{kvGOH&tp
zQP{CsA;|7&U$=sF1Z6eJE50<5M!}9p4-{uxrT{om`l#x_A+J863l|C>R;UJ`)aPRv
zv=h7t>vbU`TBu|n((aDJd}o8-5D#3eGdw(tuY&*TruCZ3(xhLvWt>XdUd}*dPK^04
zSr%0+hRp;s=rtao_VM7N?+%r+%mYxw-*Qyg{&K(oA<-FCH5!P){Y;i#4Mk+;IQ48v
zIoT$}>+)ohaY(7%LMYag*2XMmW^>JG#-!C<l7j1_S6Yzu?=yCKn(yIIivGeaf8o>G
zTM|vU%8up5VvPaR4`;ivRhqtw*C2nLJ(x}#XYMkxQS;l_G%#TcrTkopoUyyMGoLe;
zt3M#d>z6m)QZbt|+s)&ro|E-)Nw~DxC2&%1sr%ETjVEmQM~z4D3WSqd{bFgUv&m%{
zf2|@u&)~*N9Nm-njw?`#DXXPJIT<59nLyr9umWgm5v7B7p}(^nCt`kSec@sxTjwUR
zAnT>)Qh_3+zc2%f=l1GZ8TVQZTL{|IHQQVJl7Bv~Bnc<e*+@L;(%T&9LkQ@)!Q49<
z;@fy?gQu?>4P>|`@&NOBX_JL+GatsIW6U%@{f*;WxrzbvsGoOZdla?Q)kj)n6xej_
z)tN0yT=-6+Zkd#A>nETo9&O!tY}wQnkN*!UPaVZ`eTVVJ_VNw89wF+PLta@ARg_h3
z_)P#qx)-BSxq2u2<Hrr3K$_oRcHwpTa&eq`d2XVhWXXmPkMC`0l~%@)a#2Pne-@N~
z@fi#ktIS-WFNAfdx}3R&upz(tgUkgrHZ0Golq~I(pMU>x`pMd5m;K9~w>${Ef8cry
zQ$N|Xo~)6Jsn0Js7u|J#hxo|adf_a!5cffPG;4?Cx%GJG%DVyy_pCJn4e~UU<U$B@
zo?R}1+PAgw@~mVB;3kyEBeKL9u{uZQG@Xo(Z6_D^;#YLEN~}(u4DmSN)_&x@GgsW8
zL=3SgfEGYTdOdpd#H`9&i*hM&CcP3Z#LR6>CiS|{zrUF1%QZdTz(i$P()Qsp)1Xql
zo(yesx-27=%WpVA`z8<lT9s#)(t~Xx9jK?dCT62&da@UHpu}a@6w%~zabd_H*}Qwj
zxg<#rAJg12S4-=`)KtzBmUh$9eV&x^9bepA4^X2AVYSXd54WFVANAC{aEYUu-)0D`
zHh$2_a~>>xoh&xuVL%~mEX;Aem1kLmz%=|gU-%1RT=XRr*gOA@j$L1<%JoX2@DuiV
zIZZGHqh;N{?N%dqmHaB!_BCh8A=c;Z6oVwCen@;hDyEw(WqF-~v#lI#V8#cat;qYi
z6^EC}hn~sxw29bc$#}f%^j&M|nACVmvuiHs7d5j*UhMG<eWBx5KYRb)cm=-JHKnF<
zuy@E<r*vs~xk;fEO?yKa^mhtzCGzs7BI=J}?hi$4ir3^=`^L!kl5~xQp|p|=m$pyQ
zrO0}{<H=lpNts?iRlpv|_}|}C-1;ycJu^B|6)1C4RhFO$xDlm?gp@E3h*>xaWyqzq
znzyP!E<R&>vfl@JE;AI!)V%$fWb^i|c;@VOdlCk%z;%OorYV9wO+VjwCf=a{wmjlc
zpR|{-yPwA6xgJWhGuz>*zPHS+C4aVJ)=)pkKL1g#t9HpkpnImyK@Dfej}tHVCR#%i
zW*c&>#X_Rs@VlR}!AH~}LZ~8(cWRTp(~f&H-jH>Lw|_PmrInleSJ7(T;n!q10xoEs
zX{2=KJ4p>V27{k7Rg}YCwmrn4&GkdP?l|t^r`XEv*-&Igsm>#x`@h}~6bw>^4XwlC
zo~Fl&p$hJ$sa_Bj)r87lxC2|*mH!j0p3wt}Pkj7sCT)BVdSkU^y@QmARI_UOp+7nW
z;Y7=C;?GK6@kf7A99h?HU#lXU+d)cSe-vPCdOMK4Br(&sGgSqu&G<Z{qGIbd4(Kr2
ze|^CMMIQd)t=HDpcknfz*#)B<&?MVTyCEyt=~v+{-F;GKYtt&d(Jli#-DfQb_{4I|
z_MDGF;nS+2P^RAKdpS#Q)5CaPj|Rk*k~~Atj-b&LlEUf>mjc2yqVD>tM&k5f_fLg&
z)}iR<QC!zj;kw9CA^o;bqH}`2zywNW#Pa^#p}3PLpb<~+K+xGWXr<`D-x8>Vd`ZH(
znJBBAY~+&53u&9**}|7Br4$_WbeN(|<>G3S*R*3vp=Epmnr6)N$NEE_z6l)2F)^To
zMGlR$9t(76>Pj!pth#xtYN-N3)RGA;jOX#9(asPdzP5GGz7JVK7ZfoIEf1h>0)h<B
z-L`8bcPjV%UkUFahd$|NWy4%$r*zAeicW5X1a}P9<zlw(cw3Dp#EO;#el(Ji^Zqe9
zH{0xLo)Ez}(|D%FfpYh*SzYV5Z=p#Km?L9<mr!yxbe~JFs-N{EFX{nM^}xz1DFfHk
zWKX&<j)Yz&y%p=`RtTJ5R@tcU-H?OKo<rMRPl(E@PPoMV2df@#-AliftKnDW(u-v}
za`}gCk@bL|e;uW7*UGfw=-ij1KFsndTFIY{@XZSz{U39N3TP)R5+1+#_U*Qcipn)A
z2j^nzHuPla-8DK(L8sG9Lb%7<yOX&~C{}SXl5Q0x1h2MGbq*d41E1EO_sUaK$=A9|
zI$V0o9kK}L<8DVQSW`&eEuv(xx}OT<eNe@?bNhBy$?Y$BQci<(@eM;J(`R#S?J+&S
z&3=(Nk-Fn%LV2kXQqR5IdcU{N%q&+oC3@yvsdg%>I#u;(05c+R@#0;mqdIPr9Sq+B
zD)+v%RUHa6HGg3LsnC2P`~O^xO_;pMaEj@0geo_QHB3@RGtn!mV$GAFtNKhjH=|ki
zqT|3s^-zh9;Ldup&XWA@<s}CHq2Y=GR7PXffle9t{LSCL-_z8L;<p202)^&sLJnxL
zzJ%({7Ku1lf;WdIN`gDU_w+vCNda|WmpM?vS+38;HMtDB#7t^dOY|0vsXG-0pDg<_
zSI5Xdv>lQ&4aA5;NSb4vKQSpoY^Qi%+;}>N&+J!OGrxSVJQHGj-zB_R&3oG)&I5qF
zXQOG<SwdYMU+el30VS@*V}2D%*9w%i?)Ym@G5}(LPI4MlR&+0ZTIL5a&9dTr!K#9W
zjK_m~NV-S;y8~+i{AS`%yEcAnp`_UZFGJ^JxfT7r7Z{=iH8Ao6xt<M}C`=V%JuXE$
zOGQmja=|1(tn)6BmNDp$Y7(4j3yhP|@U!mM7+s;btc*6vVSSaDO+5`7cpxeEsvTz%
z>AL2jVBO)|Z<Xv9g}z+gl#Q#Xuw{L!dO@sgY$y(kGTaW?zn-0yb(s#f#ED7!4LUZw
zjOhKI-Mf@0`>5E2rWn11nj|4FpKG(*j_4UuAcrRUGac=y`EtI+ZjF+8;qL9rjNgUs
z-dV*&l6%OVdEug;ItYbnmcs#F>p0|c(E89h|FGUAqhG6;8bzq4uRS#j#V$*Q9UUCB
z?R6_o@}_`rnn{^+(=}qERrT_hqk9V5cik_4*I_tyORVZkeuwllF;Y?!=|||$EUQZL
z<h<|dTtHd<sa(!OrjR53h9O65Nk+%&V+#UlgC+X3iHBCJ`8<0a2u01mkUM#l#{@;<
zj!CI|E%a{|u*9_)z{b`Wiky?ya7bCLyLYXs+=^g-1iYfKh|(a&)#kdI+Ml>1!tv<c
zvn~jZ%vOdQ6FpEtcspy7v)3|))b21Xww^P`J?!qh(2Zm8A7>bq2J~JeQA}W@auyjM
zKnfY{wfxL3l~;yF_pVEienbWCx>*ID?Tr1Xdi&(;-tcfc<nhJh{noTeH;($aM45YJ
zinZp!QfIT?^*OAKKREUC-TENq)Y^fzkKX9k>JHrVMb9Q{`5i&N`WI$0^494Anb8Zb
zUy`IB<J4xxcM57N?HL=3I$~=#uXD&7Hn(qJ@P_7^DtTwzdfA2j8B8#0ie8~dJ`at!
z-e9T|oo0V_*rm?QL*Z&y;EYDS=DB{*6dc{Vz<8?m^B;$L@>^RvuC&JSnx>>o5@OsI
zo#0e0zO-mf^Ah`zyUlG5$gVJ=>2x{8s7yKD=R5ay-q3r2y3p4zwQn)E!%wozkPDBu
zeEVc%R`{a(Z+(rE5Zju~KmwI?`=nr>LPxKG2}xBGbII%;ZpXWxK@V@!mLp4Tw|*lm
ze8_8=;CgWX-caea;Xj&C3G83{L-uhbY$d$%ty;O!;;GqP+UV~7{e60sNpXSC=Z-hs
zry1FLr!)KUmvXj0IXIA-j<Gp<zWfO_56|TZ*})%H=#Yc>UD+t^$$bY4*yVjze-6?P
z@;4k}mX2so8j(IOy^l@&#g!1Y27{ZtV)x@h+HriI@}|?TAC-v>+6>G^UGwbs<8bT1
zd>(SGrB<%u(^+{@>m_Gc>XD~fH`IF+yjMvbc!jqsE0ci46a*-E%=AT)TSM5svPq`C
zPxbsIMQnSqLj3WEoac`N%ce^1&%;G?&^&?VL9WCA=u<Dp9dh5sr74&0JAQk-z4fsW
zV&AyvXGPTX%w7cTm=_2?YbO@PPzEo?P0a>&Z~pnPC|h30u%$!RAfY27j$Z+DwbL!Y
zFM@Q9yDmbj3j%}P4=mwuF7l<LRK5eD;mShzCe^pf#Xg@Iy<VL=OJfMwrbKQv22j>}
zGnqK}BY8yDXMf34e#I@sDfr$ttGd$=dd&da_h?Xp=riXVG4N>bGr7;1ZlOmE1^Tmo
zJV)lm@CV5H22;x&Q}`ab=QDB&B2C3M-OQ3&nk(&72d&WsqmsjMZ|mCJ0{Ww`d599d
za;Z8_gM<T7eb4lk@VuKGt18+)JbgL=J5CEvS0cneq}^J;YatKh7Z5RWc(POk;@W$5
z^K(d)J-$7#=|y)NMnqxvqx-LzjUCP>qd()J)lWfkjO+hXX*X74)q^T3TCu$Led_9t
z>~SjbhNi^A*Ojx6((MB$4D-nywOh(<)E%oxV^8mQoSNN$sgWr$_3g=Qs_T&nm=onD
zj*cTkBiUpK?`wS~OoW1qxWU{xpF>_ejWalHzIN^LRQ@5uMGnI=N@Moj8LtQ*{9}S$
z!w|aWqs6ZJzNx0Am4U3%B0j4J_So)5Ym1LZVdRc4j=6X)h&gvnqMauu=XP5q8Al|y
z6&)HDQf@fz4<ZD<Ik5;^;A?<7NL<&EE*4mfEivk^uzs^{b*#W=B^jPmH?do~e3e!A
z?QtHSc6Q--u0?x-NLAjP5);xVKr38dwx|hVEkqp?D-oScOpK8|F>3lMQ=?;E>h>8l
zE&m~#@ZrOH-b~Tln@<Y1mqtyOWVsHz6e!pfcWrNsc=S<6gpVHS6MN<4{%cC??+X8X
z(Sn7iZv-zoQ(`kDEp9&3y?;(4QAos?e|<?zMWxVkkNL`;TOHhoycGWo{wpK9gxF^N
zWWSu6Y|*AgNJ9$0+0KZKr0C9sPC*!cwcmLMEmd;tRXUE<$X6ZmxPsbarLu~{;#Yf8
zvY8AA{HiUUQh+lZ$U+!YL1}BWIuyCM>Gp-YdX9WRV=SHVhA-I+dBp3KhQEKq6uPWK
z#o4URf-fl7CyeP$I})`$vXoecQmZ6lX1ftClGf^zO_q)$@vg*Q|9}b+L53*b#cE1O
z6Dm+eVk<n!Vyq-q3|py)t|bM~$mk8^XM^hHU<x)vX=4tB-+e3I@nNKz3)Ez%KeDtV
zHg^xc%eU;_^CihE?S1N>$$zTq@VXevA`ud@!$uUtAXghESRN`<Bv;<b#UBsCA-@H4
z@D3mH`eT?ORB6^ERgfuY3H=hpr!ajUf!6fUXNQKQ-e{-^EjKf@wT(yYJ%(^T>ID^*
z@pft0<+0qZxu?No@=Ow8&bgQucuA?eC^UxQlq%xyqY<OU)oRh$6-HK%a$)ilDPx2v
zwaM|iGJ{fi$eVC6D9L~B7S^J9Rc0m^&<as#r@HFp>w}&9=;8dC9BO&MW<y6>kNS&v
zH#8-`Oe;5@SiAE#deLI=$P`;>v3s_J{ofSd>(|>zW^O9Io?Z=1y>ZGZNB64TyxCh?
z=W5wNI|#*s#F`_RmoYKv!^5WOl2dDspUIW0IOb6%>Z@`yppg<-J^gs_z?7M9_8N6(
zW3G%~PLk&hH1wdX<I_p)X(I?V4kShj7!*8&5+yE^{XpQ;#!uR0S)5_#$bIZ}@*g2P
zKf4B3m531kGgV+Op2@j1SgHg1;Yze-)%}cp9!9#mA5_3k8wKe^2x2@+VA<>xD8B(@
z`7J7kSr_19USe<5L>DG^(+lhC^8-47DdTgowX+*Un@dbUum+*TO`vg?1uc<xK)%Jm
z2b-R@Wn#Om1gzgZNZ3a<IHoJaLDr$9+p<?l0YtFbqcx4&JkXtIm8bgx_~inYY~Pvs
zTs4X-8_4yq`B+#&8zAfhg7QDoa^V6%HmLtwwj}31{T`T7mc_l#BcIgoMU3hFkHlOx
z)-W3d*iJwHs8I*SNYwmnu#E`S^-*Lu-k+M^y}-Q_)n$nAZ~u3@nl8=BC&8ZWezf9t
z7VM~XCMo~*Htjc|q@y)bU{(biPUx#k;i7rVKD<9CC42of>*=oYvhfJ<B<6PYKwkkD
z{U9GPOYacgCo*|HC3d{GaQN!Tjrm#k9T&3$cVE{|zrH!yu(S{nmCmwz^WVy^|5ke|
z0YW9G{*THabp?&qD@E$2RV3RLQ1H>qGM6r7O%D~O^Jm*QKAb%BfNi7h%_p>1SyX7K
z1IZ|WUyai*F_)?4!0p$ip4V;-NNdFMQ{1fuaqv4tioHxx;zA`&nOoNL`?-&%q;}a0
z#_KU)tIa76i8fjtii4QP6-PSMdfrAFy|$dTAQr9Po%myc<MK^TY^{+=LAm~Uk<`R8
z<_2x*l)Op|tAWW>3%ic)<F&UB*#_4~tWPZ=h1f%7i=R;UtbEHPtx@7GO3sIsR2wb(
zB^}eJWYK(uk(dA@XXgUwy#L(v8+&??qa^X4-g|pJQc9|?^Dyg)6RDu@P^E9g0F<Ot
zEq!;4{!GzUFEr9U+rPCM{$U_;sUr9L*p}8^QnEhwO_^WvV^nnWMVhyHlGeLGdz+Pt
zl1pTNwXVHV2g09Y9@m(NkP>}XmvlwYtvW4l@NzvHd`kDLFgEz@;qkV41xVu7@<@^x
zg*Ew6q5V;BXmU4f4=0}nawscHs7!${-I+!Q^P=;`0Mtf&#{Spp<mH^R-M^)jSkPu&
zkd`(8=19X>)9{~tAg&l#L8$rJFOSum7u5m=Ri8tLjK<9VL%a6>qC^HU2F(5cZ~hdz
z?!Rp*F1>~j5XCJQAMXX={(6*)oX_e*`sT|4A7=&9mEYjpGRtrKhc9MhzxB_ApE)Vo
z{tO^*`slS}{@cW%_odb1?jAss%Xf;;LVS8AcNAjF3;+o4=?}--d9~>}{Uxvj&;^wO
z9g#;U3jc@#ee64JV`Ka8kB=g$hAgi&xfFz>W+8%c05P_wr1n{rFTFD<Zr%ElAJm}^
zn4c8#`P^70gi_E&(1Sx;W}h|Ph~n?wTwpjH`tt4p{Tv)@>i2d6CO@6rgSZV8@3vmm
z7OAo@FpqbxX}d6Rw9Xlh|DOyqC}1YQEC=FG5XXCf1O@ijh0gF~+&2B6A%Xa3_Ta+s
zFVhaFxXlBT(#^;dcrrLd(tp#nPM+s;GCp$Mief0;7rkyZQq6*+BYB%(9sxJ$fBS#@
z+s*_V_v%&3=BBsG&6`3X=D%oAuvUpL_uqOP$su{i&CLz`K8&lq3%e<ymbO!^?|@vZ
zp^IdCdUWX!upNfZ>j0OktE>Arhyb%qTx#^-j5SHXXqh*JGU<SDmhK4vKykCkoU~0l
zSCZGL4+sP31gl4{o`GFUtcWIw3)-M`E)K*%P<l;)b;6i|fkBhByVk8$Iq}y;IvPmV
zSkYyjAkj{oG6KP2y8_u1{0MSk*J@nunvy=Z(REjh&dI<Sz7OXEV*9*kB(<N2&y@l^
z^%=+j>8QnKqij1y!2}qv8LR=;Jdi}CtA(UT+~0sQAs>){Y~X8vH;zI1mwv_J4E&bh
zg@BUmPSjik30VZYxO<<GO}0^ywJflc>zPQ3T4tZk_O@3@hxix^U4YAS`n2^W!;hc)
z%iYqhfMPZc5OY=F(G6<aX;gAuQwX!7Z8gwmq5P9I<k2UUG?4XOhT9zp-VdCx@`<ZP
z^Ef4X^cgilOPGsf>N)U8^}&PGl{RSt8_Iv>Io+^ia(fQzHH?M<!Uy51e5DDDwmGHP
z=*N$Q&%iELxBKbWoHTIMlP>6;ghI=P1AKylxbofAmPmlJIpHQn`7DB<BTXk$Qx&WU
z3hjG^C+U<=C?oqsd0k#!>vVT!^sD%I`#iI%zD<C58~}uE@`iB9MRFw6j@XV?SG>Yt
zoPSPDadaSQ)ht|Gj+cx+ef{L&2WW95tZ(1?D?M@x0aKp@B~}^>2Zz05|Ht}Fe69ct
zGYK%!hy+O_JG}q;7P<X0qZKgFs$lRC37W0Ol`<%$)ftw-_~Hc6?uv88uhUkb>}die
zS0pf196mlifhz;|4GpK)Y6Act4dv&%c(Fx{O6KvKzmn!L*VhD7-gdsfu)!O|kP#qp
zV?%GXpmwLK>hWL*@DL0YRDlti^n`LD%b7DaAd|-VNym17F#}ChC$NK)H0lzKtNR#_
zyzR5FFi!fFWooz_0kinV>lMIeWT&<OLuE>6vz{L;X}QeU;0*$_NGOoA!+rv;Z5=Hz
z4ZtsF6b@uL@DdOd%Jm6KNc7Op2QJ}Y@%NVxf$oso>k635tK{URx5V@~^Q)<;sa$jJ
zga9%-tVy1}1pvgd^QlI-&TRt&-|W5KYe2EDfehk-`_kyhCQOh>6lK-`tW@UaWI$}r
zcjZc9(~f_@2>k>m0X)xI4@B=cz5`U!4b<bD5N!fi>bx3H8L)wv#XVlibKY<gX7Jk3
z(g9S1vk|k9h)6GBeKGOz#wBQZq1)Nn+0E_k8}HUZ_YwvmVp1QUtgP|(@86@=zqPmP
z!~B(W9aF-AVm2E@*np^Bgdyua{v4$4Pg=$QLlkrH7?<muEH?zu*Pw{zQ=<cPnu#*w
z#ZkcZ+=t+q)K>`8?yty54_`_cGjAUtncSqr%7|v4;q|~cKu;s3N424ZOLx$ao;$Aw
z(SjZra%zsD4RH{P=rw-G-+JL`S>P4(U94|`?s5W{?0N!WR~4S22V}4$=*jDL9l?C4
z7fIXF(tx55!~wC=ma{iP=>!{uk{EvADK%jTrbAjeQ$}FodJ^WlFj8dzgqkL;{52~l
zXD%p3l8vI9@y2-HpG0CV99)we1{O&I0!?<9_*l2uC*?O_6w#xNEim-6Fp(y=8oqq#
z`Shqo7GGG4M-9o-KO{`=OD36gkveGJwL$AopO6p|{Gjx^ytSUQ2KU9bEnYGnG&+$(
znPONgaDzE5YMMod#9*v+!Ez}9k};6zd?n(Yfb{g8NssG+3L9N|41qQ?L;YPU-rTQk
zNzvrAn0X!)zGq`ioVREjiwYmI>!SB<`bkPl7U~t*HGQ=c6%k3$wW8cJRDL!>oxbz-
zl*;M6t1VeV!RlXv3skjFo4q)6{!*5D(yJe*45C^;eLAXp{gHIqC1eM$0=K-O!j>OK
z)TU#HA!KF5%ZB(w(X-u#P_!gnw^_A62tXnAB-v8|I~yuWN(gXLDD+-=0M;xGVpt~Q
zue6<zthPfhJBSUSgxD1js2Vwj#fL*|9^YJ<ysf2m>(Qfh)YpyaPQLOHXVAc(yL|b_
z5;Rexf)Psh@1KBx6$}{?7kHH@D=WuG<(HJ4f~@UZTN?v{Ur?}tNU|g^j=*uqx$>Y>
zJ_Gv;WvqG4u0RGbgySH3mwEj25Qy&%!ka<_PNOdhamf6MZzH@GQVE?PWcDmd1N)Rv
z&|$TGUGYGnS#NoJ%W!aV>S58_Ylp?mtF_m+`lJQ}o3N!I3rm#uIRUe58g$;mVBdmF
z&CZ?)yF$&R(9Dy6{P=<LO_5?_1QU?w4db6-m|p>~*AXN!B)2^%XALPSsRt06e$1`I
zpRByRub>8akDs$FG?d`AG7)%l7}n%0EEaYbkc7X4tinDV>?xe(c3LWa)rTCE@mk~p
zP;yS1$cmuPPSvFoCr@4h-xr9?H`$fRnGXBH<z{~{;KyCx=jExdYn0~YF=}S&h`RxF
z4h~zUr*Lz4tlLc2`SZszyZ_9{5HJNlg&7@l^QgW)eMa`HH)ozucYTVBi-iK9p<`J#
z1dCA_iX-HO1+>A0L`79WV-HK29#)R)*#7<dvmWH-<~BfoO7BWvPfwPnF2J*gfX)=P
zfHy6P#0CU<)tZ=?tc*7tta}0O6Q?94C6|3`n)%M3XMj{o8?&RcI9eMJ4*N9tyw^!v
zPby3oV!0{<cBCLq?oJKwMLd80e0rcr5M*|92E@OormWp(KZx?mdM!r48P_!@XSiv8
zPc}KyXe-eSTE$<W{<;dr8CTw^=M3CKhe*4O-uCb)(KRqQiU3u4IDFFrv*hoYMS(YJ
z5TVXWI}g7E3KjjN(hnw|X{EcGsA>5h0o!Z$G`w8w?N5X^o)foeXHD)91Ohr{Y;25h
zbtIrZn4t@FQ!C!`Iq65^6B6|9Y8vM2Ul0?E%Qe9M)jh+S**)Mq5Ddb%aQs{hpvZR(
z4g00zMBtXi4h|X*z?c(5mHr_pcncI8zu>eVPc|_@$Yn{=6ofUU*WDD^<yo57XVu!=
zd?FghM_wYt<>Uyn1Lpa*4SxTvV~@XMoEYkKjky>oIbMQy%{PEn&qEgp_B<bq@7!o1
z#&#ql!+Y&xPF>V5_S2Z7ZNMh{Jg8%gj-+R0-EOrm0@1**%PKBidzyEsHlPa5{|X!9
zRqths3ZTJJWzjIi_r4$D17nMKtF_ShEHu3IK!W2ublYZUXLX0Xsk;^GAnv$+{W`yd
zgf1Ah{RA~<jv^k5iUYpx&raLFp?mhdrDeydrOJQq9tiW+d}!2~htM)Da~|P`a~04`
z6w<=T-*d`LNl}KqG9OHDP_3Sa#uoUM405-CG16c47RNyZy&ZwF1Op71cjpg;D1(6;
zKb*&f`C=}ETYt%k;ABAX1hnSykorGfy?S*O9`(w)8h5~M{LeLL7)7Y2tpP^>2k3Ub
z#wwC*0^&VB#Na{^^5-IWWKDQhHQJZlv9Nd#jtoYS8QOn8kx$f`bEnmQ1{8|r74MkK
z;tTjP9Hu{H373PhdhUVSt()+5>A(F6LOx)d)wzrlnD0Z3jL#Y$JZ<@PU0Hb#j2WMx
z7t^%Yn6RK1#nXUJyH=Q9ey0vWP36IZa}lb1_V)H4Aw{~Ws`_BK+-+eL-_;cy6olYW
zI=5=VRl#>o17u>)%&|Do>s^736lAhMk6G?+uE==IvO+qGK*%N?gCv_x+L;S1b%G&#
zL?B=RKYsG$zL0uE)kqXf1K}ZOTiYPb%n<yX-e7!u{QK^iu@!Fwf+zYQyc$F%*CGDy
z-M8-w*o$oXD++}Dz62g$NyHAfjd`O>rsc;WdE2t)P14MiN0m8q!!3OgAJ5T>QQ!ps
z6W6MdZ_UlY5cqeuNN{7GLgs#K>8_`z6v##+AW8h=(j`V%>22-oEMLEP(a6ox*^z_A
zhQ0XgT$Vf7W-Shb>-dQi!7zq}7GM_LQdK?RD8-qle-ZMOE07GqU^*u%dhBkTXyXU9
zH!qWu3F84qRh}Z?^7H)b*PC22t_Tp;-Eece^5$Q6ifRJWCKNxXQWr>pmEmBz0_cHy
zv8AP_MdR-FPCc&BHZd6*?{Jq*Y9_&2(%@W1OItY3LczLTXJE0@bacGrF3U|H{|izH
zOo-#)o@h@S%^z9{oLgA?<%fMC<W${z=caM}E4g!Bi98Cv7Cl+_Ag?+IHyA%o#^G>j
zIXQQF_4V|?adAIF2`t$VW8dEhHqSW;8Wel1OsM?Y0=qc3oed|rq2g*c*Kn>sqh3hZ
z;S0=QfZSG7s|Rcryeyu9HQqQ#G|D~ZPIDc@c@Ywlk|N<ieAlkA^WBQNsit-quC?pp
z@HGTVl^S}C`>*P1c~}C{3JX~vL{X<o`>?2JIKj^k3N3&vznswa_Oj*PlmEI(vgZT@
z4)pf+!f0%SH(A+Sv0{r<<>%+OfDrnvwRLyVPf-1B5UdR6kVsw_-kqJDLYFRmg)17P
z2w6%4*b0rg%2<>Q6@r2?f=8ipq!e^KW1dxR#CC2*E}7$yLSJTMREH~jMb>U7?Zmlr
z+ZSNaK_2`WKELJFciu`brm^*e-xIE|oDb$(Y8{J9%L$l%TP#uk>A%mN_M%V2uM<)9
e85-DLhtT{ncE7#Z;u`Ft5o#)$Hw&&?1phBb_eJFZ

literal 0
HcmV?d00001

diff --git a/docs/_toc.yml b/docs/_toc.yml
index 8f15ad5a..6ba9d46d 100644
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -14,6 +14,7 @@ parts:
 - caption: Inference
   chapters:
   - file: inference
+  - file: large_scale
 - caption: Interfaces
   chapters:
   - file: api
diff --git a/docs/api.rst b/docs/api.rst
index e3a5ebb6..0b9f9b8a 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -16,6 +16,11 @@ File formats
 Sample data
 +++++++++++
 
+.. autoclass:: tsinfer.VariantData
+    :members:
+    :inherited-members:
+
+
 .. autoclass:: tsinfer.SampleData
     :members:
     :inherited-members:
@@ -60,6 +65,27 @@ Running inference
 
 .. autofunction:: tsinfer.post_process
 
+*****************
+Batched inference
+*****************
+
+.. autofunction:: tsinfer.match_ancestors_batch_init
+
+.. autofunction:: tsinfer.match_ancestors_batch_groups
+
+.. autofunction:: tsinfer.match_ancestors_batch_group_partition
+
+.. autofunction:: tsinfer.match_ancestors_batch_group_finalise
+
+.. autofunction:: tsinfer.match_ancestors_batch_finalise
+
+.. autofunction:: tsinfer.match_samples_batch_init
+
+.. autofunction:: tsinfer.match_samples_batch_partition
+
+.. autofunction:: tsinfer.match_samples_batch_finalise
+
+
 *****************
 Container classes
 *****************
diff --git a/docs/inference.md b/docs/inference.md
index e6ebac21..d6b846fc 100644
--- a/docs/inference.md
+++ b/docs/inference.md
@@ -300,4 +300,4 @@ The final phase of a `tsinfer` inference consists of a number steps:
        section
     2. Describe the structure of the output tree sequences; how the
        nodes are mapped, what the time values mean, etc.
-:::
+:::
\ No newline at end of file
diff --git a/docs/large_scale.md b/docs/large_scale.md
new file mode 100644
index 00000000..ee343d8c
--- /dev/null
+++ b/docs/large_scale.md
@@ -0,0 +1,136 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.12
+    jupytext_version: 1.9.1
+kernelspec:
+  display_name: Python 3
+  language: python
+  name: python3
+---
+
+:::{currentmodule} tsinfer
+:::
+
+(sec_large_scale)=
+
+# Large Scale Inference
+
+tsinfer scales well and has been successfully used with datasets up to half a
+million samples. Here we detail considerations and tips for each step of the
+inference process to help you scale up your analysis. A snakemake pipeline
+which implements this parallelisation scheme is available at https://github.com/benjeffery/tsinfer-snakemake.
+
+(sec_large_scale_ancestor_generation)=
+
+## Data preparation
+
+For large scale inference the data must be in [VCF Zarr](https://github.com/sgkit-dev/vcf-zarr-spec)
+format, read by the {class}`VariantData` class. [bio2zarr](https://github.com/sgkit-dev/bio2zarr)
+is recommended for conversion from VCF. [sgkit](https://github.com/sgkit-dev/sgkit) can then
+be used to perform initial filtering.
+
+
+## Ancestor generation
+
+Ancestor generation is generally the fastest step in inference and is not yet
+parallelised out-of-core in tsinfer. However it scales well on machines with
+many cores and hyperthreading via the `num_threads` argument to
+{meth}`generate_ancestors`. The limiting factor is often that the
+entire genotype array for the contig being inferred needs to fit in RAM.
+This is the high-water mark for memory usage in tsinfer.
+Note the `genotype_encoding` argument, setting this to
+{class}`tsinfer.GenotypeEncoding.ONE_BIT` reduces the memory footprint of
+the genotype array by a factor of 8, for a surprisingly small increase in
+runtime. With this encoding, the RAM needed is roughly 
+`num_sites * num_samples * ploidy / 8 bytes.`
+
+## Ancestor matching
+
+Ancestor matching is one of the more time consuming steps of inference. It
+proceeds in groups, progressively growing the tree sequence with younger
+ancestors. At each stage the parallelism is limited to the number of ancestors
+whose possible inheritors are already matched, as all possible inheritors
+of a sample must be matched in an earlier group. For a typical human data set
+the number of samples per group varies from single digits up to approximately
+the number of samples.
+The plot below shows the number of ancestors matched in each group for a typical
+human data set:
+
+```{figure} _static/ancestor_grouping.png
+:width: 80%
+```
+
+There are five tsinfer API methods that can be used to parallelise ancestor
+matching. 
+
+Initially {meth}`match_ancestors_batch_init` should be called to 
+set up the batch matching and to determine the groupings of ancestors.
+This method writes a file `metadata.json` to the `work_dir` that contains
+a JSON encoded dictionary with configuration for later steps, and a key
+`ancestor_grouping` which is a list of dictionaries, each containing the
+list of ancestors in that group (key:`ancestors`) and a proposed partioning of
+those ancestors into sets that can be matched in parallel (key:`partitions`).
+The dictionary is also returned by the method.
+The partitioning is controlled by the `min_work_per_job` and `max_num_partitions`
+arguments. Ancestors are placed in a partition until the sum of their lengths exceeds
+`min_work_per_job`, when a new partition is started. However, the number of partitions
+is not allowed to exceed `max_num_partitions`. It is suggested to set `max_num_partitions`
+to around 3-4x the number of worker nodes available, and `min_work_per_job` to around
+2,000,000 for a typical human data set.
+
+Each group is then matched in turn, either by calling {meth}`match_ancestors_batch_groups`
+to match without partitioning, or by calling {meth}`match_ancestors_batch_group_partition`
+many times in parallel followed by a single call to {meth}`match_ancestors_batch_group_finalise`.
+Each call to {meth}`match_ancestors_batch_groups` or {meth}`match_ancestors_batch_group_finalise`
+outputs the tree sequence to `work_dir`, which is then used by the next group. The length of
+the `ancestor_grouping` in the metadata dictionary determines the group numbers that these methods
+will need to be called for, and the length of the `partitions` list in each group determines
+the number of calls to {meth}`match_ancestors_batch_group_partition` that are needed (if any).
+
+{meth}`match_ancestors_batch_groups` matches groups, without partitioning, from
+`group_index_start` (inclusively) to `group_index_end` (exclusively). Combining
+many groups into one call reduces the overhead from job submission and start
+up times, but note on job failure the process can only be resumed from the
+last `group_index_end`.
+
+To match a single group in parallel, call {meth}`match_ancestors_batch_group_partition`
+once for each partition listed in the `ancestor_grouping[group_index]['partitions']` list,
+incrementing `partition_index`. This will match the ancestors, placing the match data in
+the `working_dir`. Once all are complete a single call to
+{meth}`match_ancestors_batch_group_finalise` will then insert the matches and
+output the tree sequence to `work_dir`.
+
+At anypoint the process can be resumed from the last successfully completed call to 
+{meth}`match_ancestors_batch_groups`. As the tree sequences in `work_dir` checkpoint the
+progress.
+
+Finally after the final group, call {meth}`match_ancestors_batch_finalise` to
+combine the groups into a single tree sequence.
+
+The partitioning in `metadata.json` does not have to be used for every group. As early groups are
+not matching to a large tree sequence it is often faster to not partition the first half of the
+groups, depending on job set up and queueing time on your cluster.
+
+Calls to {meth}`match_ancestors_batch_group_partition` will only use a single core, but 
+{meth}`match_ancestors_batch_groups` will use as many cores as `num_threads` is set to
+Therefore this value and cluster resources requested should be scaled with the number of ancestors,
+which can be read from the metadata dictionary.
+
+
+
+## Sample matching 
+
+Sample matching is far simpler than ancestor matching as it is essentially the same as a single group
+of ancestors. There are three API methods that work together to enable distributed sample matching.
+{meth}`match_samples_batch_init` should be called to set up the batch matching and to determine the
+groupings of samples. Similar to {meth}`match_ancestors_batch_init` is has a `min_work_per_job` and
+`max_num_partitions` arguments to control the level of parallelism. The method writes a file
+`metadata.json` to the directory `work_dir` that contains a JSON encoded dictionary with
+configuration for later steps. This is also returned by the call. The `num_partitions` key in
+this dictionary is the number of times {meth}`match_samples_batch_partition` will need
+to be called, with each partition index as the `partition_index` argument. These calls can happen
+in parallel and write match data to the `work_dir` which is then used by
+{meth}`match_samples_batch_finalise` to output the tree sequence.
\ No newline at end of file
diff --git a/tsinfer/formats.py b/tsinfer/formats.py
index c485d792..95e23a44 100644
--- a/tsinfer/formats.py
+++ b/tsinfer/formats.py
@@ -2308,7 +2308,7 @@ class VariantData(SampleData):
         the inference process will have ``inferred_ts.num_samples`` equal to double
         the number returned by ``VariantData.num_samples``.
 
-    :param Union(str, zarr.hierarchy.Group) path_or_zarr: The input dataset in
+    :param Union(str, zarr.Group) path_or_zarr: The input dataset in
         `VCF Zarr <https://github.com/sgkit-dev/vcf-zarr-spec>`_ format.
         This can either a path to the Zarr dataset saved on disk, or the
         Zarr object itself.
diff --git a/tsinfer/inference.py b/tsinfer/inference.py
index 9693e0ed..b948d060 100644
--- a/tsinfer/inference.py
+++ b/tsinfer/inference.py
@@ -592,7 +592,7 @@ def match_ancestors(
 
 
 def match_ancestors_batch_init(
-    working_dir,
+    work_dir,
     sample_data_path,
     ancestral_state,
     ancestor_data_path,
@@ -613,11 +613,78 @@ def match_ancestors_batch_init(
     time_units=None,
     record_provenance=True,
 ):
+    """
+    match_ancestors_batch_init(work_dir, sample_data_path, ancestral_state,
+    ancestor_data_path, min_work_per_job, \\*, max_num_partitions=None,
+    sample_mask=None, site_mask=None, recombination_rate=None, mismatch_ratio=None,
+    path_compression=True)
+
+    Initialise a batched ancestor matching job. This function is used to
+    prepare a working directory for running a batched ancestor matching job. The
+    job is split into groups of ancestors, with each group further split into
+    partitions of ancestors if necessary. `work_dir` is created and details
+    are written to `metadata.json` in `work_dir`. The job can then be run
+    using :meth:`match_ancestors_batch_groups` and
+    :meth:`match_ancestors_batch_group_partition` then finally
+    :meth:`match_ancestors_batch_group_finalise`. See
+    :ref:`large scale inference<sec_large_scale>` for more details about how these
+    methods work together. See :meth:`match_ancestors` for details on
+    ancestor matching.
+
+    :param str work_dir: The directory in which to store the working files.
+    :param str sample_data_path: The input dataset in
+        `VCF Zarr <https://github.com/sgkit-dev/vcf-zarr-spec>`_ format.
+        Path to the Zarr dataset saved on disk. See :class:`VariantData`.
+    :param Union(array, str) ancestral_state: A numpy array of strings specifying
+        the ancestral states (alleles) used in inference. This must be the same length
+        as the number of unmasked sites in the dataset. Alternatively, a single string
+        can be provided, giving the name of an array in the input dataset which contains
+        the ancestral states. Unknown ancestral states can be specified using "N".
+        Any ancestral states which do not match any of the known alleles at that site,
+        will be tallied, and a warning issued summarizing the unknown ancestral states.
+    :param str ancestor_data_path: The path to the file containing the ancestors
+        generated by :meth:`generate_ancestors`.
+    :param int min_work_per_job: The minimum amount of work (as a count of genotypes) to
+        allocate to a single parallel job. If the amount of work in a group of ancestors
+        exceeds this level it will be broken up into parallel partitions, subject to
+        the constriant of `max_num_partitions`.
+    :param int max_num_partitions: The maximum number of partitions to split a group of
+        ancestors into. Useful for limiting the number of jobs in a workflow to
+        avoid job overhead. Defaults to 1000.
+    :param Union(array, str) sample_mask: A numpy array of booleans specifying which
+        samples to mask out (exclude) from the dataset. Alternatively, a string
+        can be provided, giving the name of an array in the input dataset which contains
+        the sample mask. If ``None`` (default), all samples are included.
+    :param Union(array, str) site_mask: A numpy array of booleans specifying which
+        sites to mask out (exclude) from the dataset. Alternatively, a string
+        can be provided, giving the name of an array in the input dataset which contains
+        the site mask. If ``None`` (default), all sites are included.
+    :param recombination_rate: Either a floating point value giving a constant rate
+        :math:`\\rho` per unit length of genome, or an :class:`msprime.RateMap`
+        object. This is used to calculate the probability of recombination between
+        adjacent sites. If ``None``, all matching conflicts are resolved by
+        recombination and all inference sites will have a single mutation
+        (equivalent to mismatch_ratio near zero)
+    :type recombination_rate: float, msprime.RateMap
+    :param float mismatch_ratio: The probability of a mismatch relative to the median
+        probability of recombination between adjacent sites: can only be used if a
+        recombination rate has been set (default: ``None`` treated as 1 if
+        ``recombination_rate`` is set).
+    :param bool path_compression: Whether to merge edges that share identical
+        paths (essentially taking advantage of shared recombination breakpoints).
+    :return: A dictionary of the job metadata, as written to `metadata.json`
+        in `work_dir`. `ancestor_grouping` in this dict contains the grouping
+        of ancestors into groups and should be used to guide calling
+        :meth:`match_ancestors_batch_groups` and
+        :meth:`match_ancestors_batch_group_partition`.
+    :rtype: dict
+    """
+
     if max_num_partitions is None:
         max_num_partitions = 1000
 
-    working_dir = pathlib.Path(working_dir)
-    working_dir.mkdir(parents=True, exist_ok=True)
+    work_dir = pathlib.Path(work_dir)
+    work_dir.mkdir(parents=True, exist_ok=True)
 
     ancestors = formats.AncestorData.load(ancestor_data_path)
     sample_data = formats.VariantData(
@@ -663,7 +730,7 @@ def match_ancestors_batch_init(
                     current_partition_work += ancestor_lengths[ancestor]
             partitions.append(current_partition)
         if len(partitions) > 1:
-            group_dir = working_dir / f"group_{group_index}"
+            group_dir = work_dir / f"group_{group_index}"
             group_dir.mkdir()
         # TODO: Should be a dataclass
         group = {
@@ -690,7 +757,7 @@ def match_ancestors_batch_init(
         "record_provenance": record_provenance,
         "ancestor_grouping": ancestor_grouping,
     }
-    metadata_path = working_dir / "metadata.json"
+    metadata_path = work_dir / "metadata.json"
     metadata_path.write_text(json.dumps(metadata))
     return metadata
 
@@ -725,6 +792,28 @@ def initialize_ancestor_matcher(metadata, ancestors_ts=None, **kwargs):
 def match_ancestors_batch_groups(
     work_dir, group_index_start, group_index_end, num_threads=0
 ):
+    """
+    match_ancestors_batch_groups(work_dir, group_index_start,
+    group_index_end, num_threads=0)
+
+    Match a set of ancestor groups from `group_index_start`(inclusive) to
+    `group_index_end`(exclusive) in a batched ancestor matching job. See
+    :ref:`large scale inference<sec_large_scale>` for more details.
+
+    A tree sequence file for `group_index_start - 1` must exist in `work_dir`, unless
+    `group_index_start` is 0. After matching the tree sequence for `group_index_end - 1`
+    is written to `work_dir`.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_ancestors_batch_init`.
+    :param int group_index_start: The first group index to match.
+    :param int group_index_end: The group index to stop matching at.
+    :param int num_threads: The number of worker threads to use. If this is <= 1 then
+        match sequentially.
+    :return: The tree sequence representing the inferred ancestors for the last group
+        matched
+    :rtype: tskit.TreeSequence
+    """
     metadata_path = os.path.join(work_dir, "metadata.json")
     with open(metadata_path) as f:
         metadata = json.load(f)
@@ -756,6 +845,24 @@ def match_ancestors_batch_groups(
 
 
 def match_ancestors_batch_group_partition(work_dir, group_index, partition_index):
+    """
+    match_ancestors_batch_group_partition(work_dir, group_index, partition_index)
+
+    Match a single partition of ancestors from a group in a batched ancestor matching
+    job. See :ref:`large scale inference<sec_large_scale>` for more details. The
+    tree sequence for the group before must exist in `work_dir`. After matching the
+    results for the partition are written to `work_dir`. Once all partitions for a
+    group have been matched, the group can be finalised using
+    :meth:`match_ancestors_batch_group_finalise`. The number of partitions in a
+    group is recorded in `metadata.json` in the work dir under the
+    `ancestor_grouping` key. This method uses a single thread.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_ancestors_batch_init`.
+    :param int group_index: The group index that contains the partition to match.
+    :param int partition_index: The partition index to match. Must be less than the
+        number of partitions in the batch job metadata for this group.
+    """
     metadata_path = os.path.join(work_dir, "metadata.json")
     with open(metadata_path) as f:
         metadata = json.load(f)
@@ -781,6 +888,20 @@ def match_ancestors_batch_group_partition(work_dir, group_index, partition_index
 
 
 def match_ancestors_batch_group_finalise(work_dir, group_index):
+    """
+    match_ancestors_batch_group_finalise(work_dir, group_index)
+
+    Finalise a group of partitioned ancestors in a batched ancestor matching job. See
+    :ref:`large scale inference<sec_large_scale>` for more details. The tree sequence
+    for the group before must exist in `work_dir`, along with the results for all
+    partitions in this group. Writes the tree sequence for the group to `work_dir`.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_ancestors_batch_init`.
+    :param int group_index: The group index to finalise.
+    :return: The tree sequence representing the inferred ancestors for the group
+    :rtype: tskit.TreeSequence
+    """
     metadata_path = os.path.join(work_dir, "metadata.json")
     with open(metadata_path) as f:
         metadata = json.load(f)
@@ -805,6 +926,19 @@ def match_ancestors_batch_group_finalise(work_dir, group_index):
 
 
 def match_ancestors_batch_finalise(work_dir):
+    """
+    match_ancestors_batch_finalise(work_dir)
+
+    Finalise a batched ancestor matching job. This method should be called after all
+    groups have been matched, either by :meth:`match_ancestors_batch_groups` or
+    :meth:`match_ancestors_batch_group_finalise`. Returns the final ancestors
+    tree sequence for the batch job. `work_dir` is retained and not deleted.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_ancestors_batch_init`.
+    :return: The tree sequence representing the inferred ancestors for the batch job
+    :rtype: tskit.TreeSequence
+    """
     metadata_path = os.path.join(work_dir, "metadata.json")
     with open(metadata_path) as f:
         metadata = json.load(f)
@@ -1023,6 +1157,79 @@ def match_samples_batch_init(
     record_provenance=True,
     map_additional_sites=None,
 ):
+    """
+    match_samples_batch_init(work_dir, sample_data_path, ancestral_state,
+    ancestor_ts_path, min_work_per_job, \\*, max_num_partitions=None,
+    sample_mask=None, site_mask=None, recombination_rate=None, mismatch_ratio=None,
+    path_compression=True, indexes=None, post_process=None, force_sample_times=False)
+
+    Initialise a batched sample matching job. Creates `work_dir` and writes job
+    details to `metadata.json`. The job can then be run using parallel calls to
+    :meth:`match_samples_batch_partition` and once those are complete
+    finally :meth:`match_samples_batch_finalise`.
+
+    The `num_partitions` key in the metadata dict contains the number of partitions
+    that need to be processed.
+
+    :param str work_dir: The directory in which to store the working files.
+    :param str sample_data_path: The input dataset in
+        `VCF Zarr <https://github.com/sgkit-dev/vcf-zarr-spec>`_ format.
+        Path to the Zarr dataset saved on disk. See :class:`VariantData`.
+    :param Union(array, str) ancestral_state: A numpy array of strings specifying
+        the ancestral states (alleles) used in inference. This must be the same
+        length as the number of unmasked sites in the dataset. Alternatively, a
+        single string can be provided, giving the name of an array in the input
+        dataset which contains the ancestral states. Unknown ancestral states can
+        be specified using "N". Any ancestral states which do not match any of the
+        known alleles at that site, will be tallied, and a warning issued
+        summarizing the unknown ancestral states.
+    :param str ancestor_ts_path: The path to the tree sequence file containing the
+        ancestors generated by :meth:`match_ancestors_batch_finalise`, or
+        :meth:`match_ancestors`.
+    :param int min_work_per_job: The minimum amount of work (as a count of
+        genotypes) to allocate to a single parallel job. If the amount of work in
+        a group of samples exceeds this level it will be broken up into parallel
+        partitions, subject to the constriant of `max_num_partitions`.
+    :param int max_num_partitions: The maximum number of partitions to split a
+        group of samples into. Useful for limiting the number of jobs in a
+        workflow to avoid job overhead. Defaults to 1000.
+    :param Union(array, str) sample_mask: A numpy array of booleans specifying
+        which samples to mask out (exclude) from the dataset. Alternatively, a
+        string can be provided, giving the name of an array in the input dataset
+        which contains the sample mask. If ``None`` (default), all samples are
+        included.
+    :param Union(array, str) site_mask: A numpy array of booleans specifying which
+        sites to mask out (exclude) from the dataset. Alternatively, a string can
+        be provided, giving the name of an array in the input dataset which
+        contains the site mask. If ``None`` (default), all sites are included.
+    :param recombination_rate: Either a floating point value giving a constant
+        rate :math:`\\rho` per unit length of genome, or an
+        :class:`msprime.RateMap` object. This is used to calculate the
+        probability of recombination between adjacent sites. If ``None``, all
+        matching conflicts are resolved by recombination and all inference sites
+        will have a single mutation (equivalent to mismatch_ratio near zero)
+    :type recombination_rate: float, msprime.RateMap
+    :param float mismatch_ratio: The probability of a mismatch relative to the
+        median probability of recombination between adjacent sites: can only be
+        used if a recombination rate has been set (default: ``None`` treated as 1
+        if ``recombination_rate`` is set).
+    :param bool path_compression: Whether to merge edges that share identical paths
+        (essentially taking advantage of shared recombination breakpoints).
+    :param indexes: The sample indexes to match. If ``None`` (default), all
+        samples are matched.
+    :type indexes: arraylike
+    :param bool post_process: Whether to run the :func:`post_process` method on
+        the the tree sequence which, among other things, removes ancestral
+        material that does not end up in the current samples (if not specified,
+        defaults to ``True``)
+    :param bool force_sample_times: After matching, should an attempt be made to
+        adjust the time of "historical samples" (those associated with an
+        individual having a non-zero time) such that the sample nodes in the tree
+        sequence appear at the time of the individual with which they are
+        associated.
+    :return: A dictionary of the job metadata, as written to `metadata.json` in
+        `work_dir`.
+    """
     if max_num_partitions is None:
         max_num_partitions = 1000
 
@@ -1079,13 +1286,26 @@ def match_samples_batch_init(
         num_samples_per_partition = 1
     wd.num_samples_per_partition = num_samples_per_partition
     wd.num_partitions = math.ceil(len(sample_indexes) / num_samples_per_partition)
-    wd_path = work_dir / "wd.json"
+    wd_path = work_dir / "metadata.json"
     wd.save(wd_path)
     return wd
 
 
 def match_samples_batch_partition(work_dir, partition_index):
-    wd_path = pathlib.Path(work_dir) / "wd.json"
+    """
+    match_samples_batch_partition(work_dir, partition_index)
+
+    Match a single partition of samples in a batched sample matching job. See
+    :ref:`large scale inference<sec_large_scale>` for more details. Match data
+    for the partition is written to `work_dir`. Uses a single thread to perform
+    matching.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_samples_batch_init`.
+    :param int partition_index: The partition index to match. Must be less than
+        the number of partitions in the batch job metadata key `num_partitions`.
+    """
+    wd_path = pathlib.Path(work_dir) / "metadata.json"
     wd = SampleBatchWorkDescriptor.load(wd_path)
     if partition_index >= wd.num_partitions or partition_index < 0:
         raise ValueError(f"Partition {partition_index} is out of range")
@@ -1113,7 +1333,19 @@ def match_samples_batch_partition(work_dir, partition_index):
 
 
 def match_samples_batch_finalise(work_dir):
-    wd_path = os.path.join(work_dir, "wd.json")
+    """
+    match_samples_batch_finalise(work_dir)
+
+    Finalise a batched sample matching job. This method should be called after all
+    partitions have been matched by :meth:`match_samples_batch_partition`. Returns
+    the final tree sequence for the batch job. `work_dir` is retained and not deleted.
+
+    :param str work_dir: The working directory for the batch job, as written by
+        :meth:`match_samples_batch_init`.
+    :return: The tree sequence representing the inferred history of the samples.
+    :rtype: tskit.TreeSequence
+    """
+    wd_path = os.path.join(work_dir, "metadata.json")
     wd = SampleBatchWorkDescriptor.load(wd_path)
     variant_data, ancestor_ts, matcher = load_variant_data_and_ancestors_ts(wd)
     results = []

From d03946128c0427981a3c6f8d65f12029ab177c2c Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Thu, 6 Feb 2025 13:47:15 +0000
Subject: [PATCH 02/10] Add example flow

---
 docs/_static/example_flow.svg |  1 +
 docs/large_scale.md           | 40 +++++++++++++++++++++++++++++++++++
 tsinfer/inference.py          | 11 ----------
 3 files changed, 41 insertions(+), 11 deletions(-)
 create mode 100644 docs/_static/example_flow.svg

diff --git a/docs/_static/example_flow.svg b/docs/_static/example_flow.svg
new file mode 100644
index 00000000..90fdca29
--- /dev/null
+++ b/docs/_static/example_flow.svg
@@ -0,0 +1 @@
+<svg aria-roledescription="flowchart-v2" role="graphics-document document" viewBox="0 0 710.328125 875" style="max-width: 710.328125px;" class="flowchart" xmlns="http://www.w3.org/2000/svg" width="100%" id="export-svg"><style xmlns="http://www.w3.org/1999/xhtml">@import url("https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.2.0/css/all.min.css"); p {margin: 0;}</style><style>#export-svg{font-family:arial,sans-serif;font-size:14px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#export-svg .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#export-svg .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#export-svg .error-icon{fill:#552222;}#export-svg .error-text{fill:#552222;stroke:#552222;}#export-svg .edge-thickness-normal{stroke-width:1px;}#export-svg .edge-thickness-thick{stroke-width:3.5px;}#export-svg .edge-pattern-solid{stroke-dasharray:0;}#export-svg .edge-thickness-invisible{stroke-width:0;fill:none;}#export-svg .edge-pattern-dashed{stroke-dasharray:3;}#export-svg .edge-pattern-dotted{stroke-dasharray:2;}#export-svg .marker{fill:#333333;stroke:#333333;}#export-svg .marker.cross{stroke:#333333;}#export-svg svg{font-family:arial,sans-serif;font-size:14px;}#export-svg p{margin:0;}#export-svg .label{font-family:arial,sans-serif;color:#333;}#export-svg .cluster-label text{fill:#333;}#export-svg .cluster-label span{color:#333;}#export-svg .cluster-label span p{background-color:transparent;}#export-svg .label text,#export-svg span{fill:#333;color:#333;}#export-svg .node rect,#export-svg .node circle,#export-svg .node ellipse,#export-svg .node polygon,#export-svg .node path{fill:#ECECFF;stroke:#B8B8FF;stroke-width:1px;}#export-svg .rough-node .label text,#export-svg .node .label text,#export-svg .image-shape .label,#export-svg .icon-shape .label{text-anchor:middle;}#export-svg .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#export-svg .rough-node .label,#export-svg .node .label,#export-svg .image-shape .label,#export-svg .icon-shape .label{text-align:center;}#export-svg .node.clickable{cursor:pointer;}#export-svg .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#export-svg .arrowheadPath{fill:#333333;}#export-svg .edgePath .path{stroke:#333333;stroke-width:1px;}#export-svg .flowchart-link{stroke:#333333;fill:none;}#export-svg .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#export-svg .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#export-svg .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#export-svg .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#export-svg .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#export-svg .cluster text{fill:#333;}#export-svg .cluster span{color:#333;}#export-svg div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#export-svg .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#export-svg rect.text{fill:none;stroke-width:0;}#export-svg .icon-shape,#export-svg .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#export-svg .icon-shape p,#export-svg .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#export-svg .icon-shape rect,#export-svg .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#export-svg .node .neo-node{stroke:#B8B8FF;}#export-svg [data-look="neo"].node rect,#export-svg [data-look="neo"].cluster rect,#export-svg [data-look="neo"].node polygon{stroke:#B8B8FF;filter:drop-shadow( 1px 2px 2px rgba(185,185,185,1));}#export-svg [data-look="neo"].node path{stroke:#B8B8FF;}#export-svg [data-look="neo"].node .outer-path{filter:drop-shadow( 1px 2px 2px rgba(185,185,185,1));}#export-svg [data-look="neo"].node .neo-line path{stroke:hsl(240, 60%, 86.2745098039%);filter:none;}#export-svg [data-look="neo"].node circle{stroke:#B8B8FF;filter:drop-shadow( 1px 2px 2px rgba(185,185,185,1));}#export-svg [data-look="neo"].node circle .state-start{fill:#000000;}#export-svg [data-look="neo"].statediagram-cluster rect{fill:#ECECFF;stroke:#B8B8FF;stroke-width:1px;}#export-svg [data-look="neo"].icon-shape .icon{fill:#B8B8FF;filter:drop-shadow( 1px 2px 2px rgba(185,185,185,1));}#export-svg [data-look="neo"].icon-shape .icon-neo path{stroke:#B8B8FF;filter:drop-shadow( 1px 2px 2px rgba(185,185,185,1));}#export-svg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}</style><g><marker orient="auto" markerHeight="14" markerWidth="10.5" markerUnits="userSpaceOnUse" refY="7" refX="7.75" viewBox="0 0 11.5 14" class="marker flowchart-v2" id="export-svg_flowchart-v2-pointEnd"><path style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 0 L 11.5 7 L 0 14 z"/></marker><marker orient="auto" markerHeight="14" markerWidth="11.5" markerUnits="userSpaceOnUse" refY="7" refX="4" viewBox="0 0 11.5 14" class="marker flowchart-v2" id="export-svg_flowchart-v2-pointStart"><polygon style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" points="0,7 11.5,14 11.5,0"/></marker><marker orient="auto" markerHeight="14" markerWidth="10.5" markerUnits="userSpaceOnUse" refY="7" refX="11.5" viewBox="0 0 11.5 14" class="marker flowchart-v2" id="export-svg_flowchart-v2-pointEnd-margin"><path style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 0 L 11.5 7 L 0 14 z"/></marker><marker orient="auto" markerHeight="14" markerWidth="11.5" markerUnits="userSpaceOnUse" refY="7" refX="1" viewBox="0 0 11.5 14" class="marker flowchart-v2" id="export-svg_flowchart-v2-pointStart-margin"><polygon style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" points="0,7 11.5,14 11.5,0"/></marker><marker orient="auto" markerHeight="14" markerWidth="14" markerUnits="userSpaceOnUse" refX="10.75" refY="5" viewBox="0 0 10 10" class="marker flowchart-v2" id="export-svg_flowchart-v2-circleEnd"><circle style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="14" markerWidth="14" markerUnits="userSpaceOnUse" refY="5" refX="0" viewBox="0 0 10 10" class="marker flowchart-v2" id="export-svg_flowchart-v2-circleStart"><circle style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="14" markerWidth="14" markerUnits="userSpaceOnUse" refX="12.25" refY="5" viewBox="0 0 10 10" class="marker flowchart-v2" id="export-svg_flowchart-v2-circleEnd-margin"><circle style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="14" markerWidth="14" markerUnits="userSpaceOnUse" refY="5" refX="-2" viewBox="0 0 10 10" class="marker flowchart-v2" id="export-svg_flowchart-v2-circleStart-margin"><circle style="stroke-width: 0; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="7.5" refX="17.7" viewBox="0 0 15 15" class="marker cross flowchart-v2" id="export-svg_flowchart-v2-crossEnd"><path style="stroke-width: 2.5;" class="arrowMarkerPath" d="M 1,1 L 14,14 M 1,14 L 14,1"/></marker><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="7.5" refX="-3.5" viewBox="0 0 15 15" class="marker cross flowchart-v2" id="export-svg_flowchart-v2-crossStart"><path style="stroke-width: 2.5; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 L 14,14 M 1,14 L 14,1"/></marker><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="7.5" refX="17.7" viewBox="0 0 15 15" class="marker cross flowchart-v2" id="export-svg_flowchart-v2-crossEnd-margin"><path style="stroke-width: 2.5;" class="arrowMarkerPath" d="M 1,1 L 14,14 M 1,14 L 14,1"/></marker><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="7.5" refX="-3.5" viewBox="0 0 15 15" class="marker cross flowchart-v2" id="export-svg_flowchart-v2-crossStart-margin"><path style="stroke-width: 2.5; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 L 14,14 M 1,14 L 14,1"/></marker><g class="root"><g class="clusters"/><g class="edgePaths"><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjU5fSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo4NH0seyJ4IjozNTUuMTY0MDYyNSwieSI6MTA5fV0=" data-id="L_match_ancestors_init_s1_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_match_ancestors_init_s1_0" d="M355.1640625,59L355.1640625,84L355.1640625,105"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MjkyLjUyMjU4NjYzMzY2MzM3LCJ5IjoxNjB9LHsieCI6MjMxLjEwOTM3NSwieSI6MTg1fSx7IngiOjIzMS4xMDkzNzUsInkiOjIxMH1d" data-id="L_s1_group_7_1_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_s1_group_7_1_0" d="M292.52258663366337,160L239.40707235860893,181.62218554530838Q231.109375,185 231.109375,193.9588733636205L231.109375,206"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6NDE3LjgwNTUzODM2NjMzNjYzLCJ5IjoxNjB9LHsieCI6NDc5LjIxODc1LCJ5IjoxODV9LHsieCI6NDc5LjIxODc1LCJ5IjoyMTB9XQ==" data-id="L_s1_group_7_2_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_s1_group_7_2_0" d="M417.80553836633663,160L470.9210526413911,181.62218554530838Q479.21875,185 479.21875,193.9588733636205L479.21875,206"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MjMxLjEwOTM3NSwieSI6MjYxfSx7IngiOjIzMS4xMDkzNzUsInkiOjI4Nn0seyJ4IjoyOTIuNTIyNTg2NjMzNjYzMzcsInkiOjMxMX1d" data-id="L_group_7_1_finalise_7_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_7_1_finalise_7_0" d="M231.109375,261L231.109375,277.0411266363795Q231.109375,286 239.40707235860893,289.3778144546916L288.8177916123146,309.49185748359474"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6NDc5LjIxODc1LCJ5IjoyNjF9LHsieCI6NDc5LjIxODc1LCJ5IjoyODZ9LHsieCI6NDE3LjgwNTUzODM2NjMzNjYzLCJ5IjozMTF9XQ==" data-id="L_group_7_2_finalise_7_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_7_2_finalise_7_0" d="M479.21875,261L479.21875,277.0411266363795Q479.21875,286 470.9210526413911,289.3778144546916L421.5103333876854,309.49185748359474"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjM2Mn0seyJ4IjozNTUuMTY0MDYyNSwieSI6Mzg3fSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo0MTJ9XQ==" data-id="L_finalise_7_group_85_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_finalise_7_group_85_0" d="M355.1640625,362L355.1640625,387L355.1640625,408"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjQ2M30seyJ4IjozNTUuMTY0MDYyNSwieSI6NDg4fSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo1MTN9XQ==" data-id="L_group_85_group_8_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_85_group_8_0" d="M355.1640625,463L355.1640625,488L355.1640625,509"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MjgxLjM5ODQzNzUsInkiOjU1My41MTQyMDExNDYxNjc5fSx7IngiOjEwNy4wNTQ2ODc1LCJ5Ijo1ODl9LHsieCI6MTA3LjA1NDY4NzUsInkiOjYxNH1d" data-id="L_group_8_group_70_1_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_8_group_70_1_0" d="M281.3984375,553.5142011461679L114.79887811434716,587.4237531289394Q107.0546875,589 107.0546875,596.9029768106626L107.0546875,610"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjU2NH0seyJ4IjozNTUuMTY0MDYyNSwieSI6NTg5fSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo2MTR9XQ==" data-id="L_group_8_group_70_2_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_8_group_70_2_0" d="M355.1640625,564L355.1640625,589L355.1640625,610"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6NDI4LjkyOTY4NzUsInkiOjU1My41MTQyMDExNDYxNjc5fSx7IngiOjYwMy4yNzM0Mzc1LCJ5Ijo1ODl9LHsieCI6NjAzLjI3MzQzNzUsInkiOjYxNH1d" data-id="L_group_8_group_70_3_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_8_group_70_3_0" d="M428.9296875,553.5142011461679L595.5292468856528,587.4237531289394Q603.2734375,589 603.2734375,596.9029768106626L603.2734375,610"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MTA3LjA1NDY4NzUsInkiOjY2NX0seyJ4IjoxMDcuMDU0Njg3NSwieSI6NjkwfSx7IngiOjI2NywieSI6NzIyLjU1NTE1MTQ1NzkwMDR9XQ==" data-id="L_group_70_1_finalise_8_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_70_1_finalise_8_0" d="M107.0546875,665L107.0546875,682.0970231893374Q107.0546875,690 114.79887811434716,691.5762468710606L263.08036779057795,721.7573523961929"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjY2NX0seyJ4IjozNTUuMTY0MDYyNSwieSI6NjkwfSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo3MTV9XQ==" data-id="L_group_70_2_finalise_8_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_70_2_finalise_8_0" d="M355.1640625,665L355.1640625,690L355.1640625,711"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6NjAzLjI3MzQzNzUsInkiOjY2NX0seyJ4Ijo2MDMuMjczNDM3NSwieSI6NjkwfSx7IngiOjQ0My4zMjgxMjUsInkiOjcyMi41NTUxNTE0NTc5MDA0fV0=" data-id="L_group_70_3_finalise_8_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_group_70_3_finalise_8_0" d="M603.2734375,665L603.2734375,682.0970231893374Q603.2734375,690 595.5292468856528,691.5762468710606L447.24775720942205,721.7573523961929"/><path marker-end="url(#export-svg_flowchart-v2-pointEnd)" data-points="W3sieCI6MzU1LjE2NDA2MjUsInkiOjc2Nn0seyJ4IjozNTUuMTY0MDYyNSwieSI6NzkxfSx7IngiOjM1NS4xNjQwNjI1LCJ5Ijo4MTZ9XQ==" data-id="L_finalise_8_batch_finalise_0" data-et="edge" data-edge="true" style=";" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_finalise_8_batch_finalise_0" d="M355.1640625,766L355.1640625,791L355.1640625,812"/></g><g class="edgeLabels"><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_match_ancestors_init_s1_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_s1_group_7_1_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_s1_group_7_2_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_7_1_finalise_7_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_7_2_finalise_7_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_finalise_7_group_85_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_85_group_8_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_8_group_70_1_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_8_group_70_2_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_8_group_70_3_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_70_1_finalise_8_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_70_2_finalise_8_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_group_70_3_finalise_8_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" data-id="L_finalise_8_batch_finalise_0" class="label"><foreignObject height="0" width="0"><div class="labelBkg" xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="edgeLabel"></span></div></foreignObject></g></g></g><g class="nodes"><g transform="translate(355.1640625, 33.5)" data-look="classic" data-et="node" data-node="true" data-id="match_ancestors_init" id="flowchart-match_ancestors_init-0" class="node default"><rect stroke="url(#gradient)" height="51" width="99.265625" y="-25.5" x="-49.6328125" ry="5" data-id="match_ancestors_init" rx="5" style="" class="basic label-container"/><g transform="translate(-34.6328125, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="69.265625"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_init()</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 134.5)" data-look="classic" data-et="node" data-node="true" data-id="s1" id="flowchart-s1-1" class="node default"><rect stroke="url(#gradient)" height="51" width="147.53125" y="-25.5" x="-73.765625" ry="5" data-id="s1" rx="5" style="" class="basic label-container"/><g transform="translate(-58.765625, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="117.53125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_groups(0, 5)</p></span></div></foreignObject></g></g><g transform="translate(231.109375, 235.5)" data-look="classic" data-et="node" data-node="true" data-id="group_7_1" id="flowchart-group_7_1-3" class="node default"><rect stroke="url(#gradient)" height="51" width="198.109375" y="-25.5" x="-99.0546875" ry="5" data-id="group_7_1" rx="5" style="" class="basic label-container"/><g transform="translate(-84.0546875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="168.109375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_partition(5, 0)</p></span></div></foreignObject></g></g><g transform="translate(479.21875, 235.5)" data-look="classic" data-et="node" data-node="true" data-id="group_7_2" id="flowchart-group_7_2-5" class="node default"><rect stroke="url(#gradient)" height="51" width="198.109375" y="-25.5" x="-99.0546875" ry="5" data-id="group_7_2" rx="5" style="" class="basic label-container"/><g transform="translate(-84.0546875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="168.109375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_partition(5, 1)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 336.5)" data-look="classic" data-et="node" data-node="true" data-id="finalise_7" id="flowchart-finalise_7-7" class="node default"><rect stroke="url(#gradient)" height="51" width="176.328125" y="-25.5" x="-88.1640625" ry="5" data-id="finalise_7" rx="5" style="" class="basic label-container"/><g transform="translate(-73.1640625, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="146.328125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_finalise(5)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 437.5)" data-look="classic" data-et="node" data-node="true" data-id="group_85" id="flowchart-group_85-11" class="node default"><rect stroke="url(#gradient)" height="51" width="147.53125" y="-25.5" x="-73.765625" ry="5" data-id="group_85" rx="5" style="" class="basic label-container"/><g transform="translate(-58.765625, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="117.53125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_groups(6, 7)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 538.5)" data-look="classic" data-et="node" data-node="true" data-id="group_8" id="flowchart-group_8-13" class="node default"><rect stroke="url(#gradient)" height="51" width="147.53125" y="-25.5" x="-73.765625" ry="5" data-id="group_8" rx="5" style="" class="basic label-container"/><g transform="translate(-58.765625, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="117.53125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_groups(7, 8)</p></span></div></foreignObject></g></g><g transform="translate(107.0546875, 639.5)" data-look="classic" data-et="node" data-node="true" data-id="group_70_1" id="flowchart-group_70_1-15" class="node default"><rect stroke="url(#gradient)" height="51" width="198.109375" y="-25.5" x="-99.0546875" ry="5" data-id="group_70_1" rx="5" style="" class="basic label-container"/><g transform="translate(-84.0546875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="168.109375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_partition(8, 0)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 639.5)" data-look="classic" data-et="node" data-node="true" data-id="group_70_2" id="flowchart-group_70_2-17" class="node default"><rect stroke="url(#gradient)" height="51" width="198.109375" y="-25.5" x="-99.0546875" ry="5" data-id="group_70_2" rx="5" style="" class="basic label-container"/><g transform="translate(-84.0546875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="168.109375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_partition(8, 1)</p></span></div></foreignObject></g></g><g transform="translate(603.2734375, 639.5)" data-look="classic" data-et="node" data-node="true" data-id="group_70_3" id="flowchart-group_70_3-19" class="node default"><rect stroke="url(#gradient)" height="51" width="198.109375" y="-25.5" x="-99.0546875" ry="5" data-id="group_70_3" rx="5" style="" class="basic label-container"/><g transform="translate(-84.0546875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="168.109375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_partition(8, 2)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 740.5)" data-look="classic" data-et="node" data-node="true" data-id="finalise_8" id="flowchart-finalise_8-21" class="node default"><rect stroke="url(#gradient)" height="51" width="176.328125" y="-25.5" x="-88.1640625" ry="5" data-id="finalise_8" rx="5" style="" class="basic label-container"/><g transform="translate(-73.1640625, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="146.328125"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_group_finalise(8)</p></span></div></foreignObject></g></g><g transform="translate(355.1640625, 841.5)" data-look="classic" data-et="node" data-node="true" data-id="batch_finalise" id="flowchart-batch_finalise-27" class="node default"><rect stroke="url(#gradient)" height="51" width="124.9375" y="-25.5" x="-62.46875" ry="5" data-id="batch_finalise" rx="5" style="" class="basic label-container"/><g transform="translate(-47.46875, -10.5)" style="" class="label"><rect/><foreignObject height="21" width="94.9375"><div xmlns="http://www.w3.org/1999/xhtml" style="display: table-cell; white-space: normal; line-height: 1.5; max-width: 200px; text-align: center;"><span class="nodeLabel"><p>batch_finalise()</p></span></div></foreignObject></g></g></g></g></g><defs><filter width="130%" height="130%" id="drop-shadow"><feDropShadow flood-color="#FFFFFF" flood-opacity="0.06" stdDeviation="0" dy="4" dx="4"/></filter></defs><defs><filter width="150%" height="150%" id="drop-shadow-small"><feDropShadow flood-color="#FFFFFF" flood-opacity="0.06" stdDeviation="0" dy="2" dx="2"/></filter></defs></svg>
\ No newline at end of file
diff --git a/docs/large_scale.md b/docs/large_scale.md
index ee343d8c..16c76d52 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -119,6 +119,46 @@ Calls to {meth}`match_ancestors_batch_group_partition` will only use a single co
 Therefore this value and cluster resources requested should be scaled with the number of ancestors,
 which can be read from the metadata dictionary.
 
+As an example of how the API methods can be used together, suppose the metadata dictionary
+created by {meth}`match_ancestors_batch_init` contains the following:
+
+```python
+{
+    "ancestor_grouping": [
+        {"ancestors": [0, ... 9], "partitions": None},
+        {
+          "ancestors": [10, ... 15],
+          "partitions": [[10, 11, 12], [13, 14, 15]]
+        },
+        {"ancestors": [16, ... 19], "partitions": None},
+        {"ancestors": [20, ... 25], "partitions": None},
+        {"ancestors": [26, ... 30], "partitions": None},
+        {
+          "ancestors": [31, ... 41],
+          "partitions": [[31, 32, 33, 34, 35, 36], [37, 38, 39, 40, 41]]
+        },
+        {"ancestors": [42, ... 45], "partitions": None},
+        {"ancestors": [46, ... 50], "partitions": None},
+        {
+          "ancestors": [51, ... 65],
+          "partitions": [
+            [51, 52, 53, 54],
+            [55, 56, 57, 58],
+            [59, 60, 61, 62, 63, 64, 65]
+          ]
+        },
+    ]
+}
+```
+Then the flow could look like the following diagram: (calls on the same horizontal line can be
+done in parallel, note that method names are shortened):
+
+```{figure} _static/example_flow.svg
+:width: 80%
+```
+
+Note that groups 1, 5 and 8 can be partitioned, but only groups 5 and 8 are actually partitioned in this example, as stated above partitioning for groups is optional. Groups 0-4 are matched in one call, groups 6 and 7 are matched in two calls, but
+could have been matched in one. By splitting 6 and 7 the flow makes an additional resume point in the case of job failure at the cost of job start up and queueing time.
 
 
 ## Sample matching 
diff --git a/tsinfer/inference.py b/tsinfer/inference.py
index b948d060..4d5e5864 100644
--- a/tsinfer/inference.py
+++ b/tsinfer/inference.py
@@ -1899,17 +1899,6 @@ def run(self):
         return self.ancestor_data
 
 
-@dataclasses.dataclass
-class StoredMatchData:
-    """
-    A class to store the results of a matching run to disk, for later use.
-    """
-
-    group_id: str
-    num_sites: int
-    results: dict
-
-
 class Matcher:
     """
     A matching instance, used in both ``tsinfer.match_ancestors`` and

From 6c02df19f502be6ff5b9833ceb6f17a8b2818789 Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Thu, 6 Feb 2025 13:53:48 +0000
Subject: [PATCH 03/10] Nits

---
 docs/large_scale.md  | 11 ++++++-----
 tsinfer/inference.py |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 16c76d52..eb4dca59 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -45,7 +45,8 @@ Note the `genotype_encoding` argument, setting this to
 {class}`tsinfer.GenotypeEncoding.ONE_BIT` reduces the memory footprint of
 the genotype array by a factor of 8, for a surprisingly small increase in
 runtime. With this encoding, the RAM needed is roughly 
-`num_sites * num_samples * ploidy / 8 bytes.`
+`num_sites * num_samples * ploidy / 8 bytes.` However this encoding
+only supports biallelic sites, with no missingness.
 
 ## Ancestor matching
 
@@ -57,7 +58,7 @@ of a sample must be matched in an earlier group. For a typical human data set
 the number of samples per group varies from single digits up to approximately
 the number of samples.
 The plot below shows the number of ancestors matched in each group for a typical
-human data set:
+human data set, earlier groups are older ancestors:
 
 ```{figure} _static/ancestor_grouping.png
 :width: 80%
@@ -103,9 +104,9 @@ the `working_dir`. Once all are complete a single call to
 {meth}`match_ancestors_batch_group_finalise` will then insert the matches and
 output the tree sequence to `work_dir`.
 
-At anypoint the process can be resumed from the last successfully completed call to 
-{meth}`match_ancestors_batch_groups`. As the tree sequences in `work_dir` checkpoint the
-progress.
+Each call to {meth}`match_ancestors_batch_groups` and {meth}`match_ancestors_batch_group_finalise` results in a tree sequence being written to `work_dir`.
+These tree sequences are essentially checkpoints from with the batch matching workflow
+can be resumed on job failure.
 
 Finally after the final group, call {meth}`match_ancestors_batch_finalise` to
 combine the groups into a single tree sequence.
diff --git a/tsinfer/inference.py b/tsinfer/inference.py
index 4d5e5864..858a91fd 100644
--- a/tsinfer/inference.py
+++ b/tsinfer/inference.py
@@ -647,7 +647,7 @@ def match_ancestors_batch_init(
     :param int min_work_per_job: The minimum amount of work (as a count of genotypes) to
         allocate to a single parallel job. If the amount of work in a group of ancestors
         exceeds this level it will be broken up into parallel partitions, subject to
-        the constriant of `max_num_partitions`.
+        the constraint of `max_num_partitions`.
     :param int max_num_partitions: The maximum number of partitions to split a group of
         ancestors into. Useful for limiting the number of jobs in a workflow to
         avoid job overhead. Defaults to 1000.
@@ -1189,7 +1189,7 @@ def match_samples_batch_init(
     :param int min_work_per_job: The minimum amount of work (as a count of
         genotypes) to allocate to a single parallel job. If the amount of work in
         a group of samples exceeds this level it will be broken up into parallel
-        partitions, subject to the constriant of `max_num_partitions`.
+        partitions, subject to the constraint of `max_num_partitions`.
     :param int max_num_partitions: The maximum number of partitions to split a
         group of samples into. Useful for limiting the number of jobs in a
         workflow to avoid job overhead. Defaults to 1000.

From edcf3ff680c7f4fe7fae608e8a12555861e1470f Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:07:00 +0000
Subject: [PATCH 04/10] Add link

---
 docs/large_scale.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index eb4dca59..8428c2d6 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -21,7 +21,7 @@ kernelspec:
 tsinfer scales well and has been successfully used with datasets up to half a
 million samples. Here we detail considerations and tips for each step of the
 inference process to help you scale up your analysis. A snakemake pipeline
-which implements this parallelisation scheme is available at https://github.com/benjeffery/tsinfer-snakemake.
+which implements this parallelisation scheme is available as [tsinfer-snakemake](https://github.com/benjeffery/tsinfer-snakemake).
 
 (sec_large_scale_ancestor_generation)=
 

From abc170fea87a4677220b710cc9205d04e417effd Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:07:13 +0000
Subject: [PATCH 05/10] Add tut todo

---
 docs/large_scale.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 8428c2d6..2f58addb 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -32,6 +32,10 @@ format, read by the {class}`VariantData` class. [bio2zarr](https://github.com/sg
 is recommended for conversion from VCF. [sgkit](https://github.com/sgkit-dev/sgkit) can then
 be used to perform initial filtering.
 
+:::{todo}
+An upcoming tutorial will detail conversion from VCF to a VCF Zarr suitable for tsinfer.
+:::
+
 
 ## Ancestor generation
 

From c6b5e1e53a95a260a33a767ae0bae65f215815cd Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:14:43 +0000
Subject: [PATCH 06/10] Add more context

---
 docs/large_scale.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 2f58addb..5586b310 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -18,10 +18,16 @@ kernelspec:
 
 # Large Scale Inference
 
-tsinfer scales well and has been successfully used with datasets up to half a
-million samples. Here we detail considerations and tips for each step of the
+Generally, for up to a few thousand samples a single multi-core machine
+can infer a tree seqeunce in a few days. However, tsinfer has been
+successfully used with datasets up to half a million samples, where
+ancestor and sample matching can take several CPU-years.
+At this scale inference must be scaled across many machines.
+tsinfer provides specific APIs to enable this.
+Here we detail considerations and tips for each step of the
 inference process to help you scale up your analysis. A snakemake pipeline
-which implements this parallelisation scheme is available as [tsinfer-snakemake](https://github.com/benjeffery/tsinfer-snakemake).
+which implements this parallelisation scheme is available as
+[tsinfer-snakemake](https://github.com/benjeffery/tsinfer-snakemake).
 
 (sec_large_scale_ancestor_generation)=
 

From 587c110705e1a9ce060baef9d5d7a3e376b17bf8 Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:15:00 +0000
Subject: [PATCH 07/10] Explain out-of-core

---
 docs/large_scale.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 5586b310..4b89958a 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -46,7 +46,8 @@ An upcoming tutorial will detail conversion from VCF to a VCF Zarr suitable for
 ## Ancestor generation
 
 Ancestor generation is generally the fastest step in inference and is not yet
-parallelised out-of-core in tsinfer. However it scales well on machines with
+parallelised out-of-core in tsinfer and must be performed on a single machine.
+However it scales well on machines with
 many cores and hyperthreading via the `num_threads` argument to
 {meth}`generate_ancestors`. The limiting factor is often that the
 entire genotype array for the contig being inferred needs to fit in RAM.

From 61292ce096cf02ad0a1741b287aa796d0d1a232e Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:17:16 +0000
Subject: [PATCH 08/10] Full stop

---
 docs/large_scale.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 4b89958a..05ce4837 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -127,7 +127,7 @@ not matching to a large tree sequence it is often faster to not partition the fi
 groups, depending on job set up and queueing time on your cluster.
 
 Calls to {meth}`match_ancestors_batch_group_partition` will only use a single core, but 
-{meth}`match_ancestors_batch_groups` will use as many cores as `num_threads` is set to
+{meth}`match_ancestors_batch_groups` will use as many cores as `num_threads` is set to.
 Therefore this value and cluster resources requested should be scaled with the number of ancestors,
 which can be read from the metadata dictionary.
 

From 6a65530d6ed34c539a53bcc5c57ad63995039199 Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:19:03 +0000
Subject: [PATCH 09/10] List methods

---
 docs/large_scale.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 05ce4837..204304b3 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -78,6 +78,14 @@ human data set, earlier groups are older ancestors:
 There are five tsinfer API methods that can be used to parallelise ancestor
 matching. 
 
+The five methods are:
+
+1. {meth}`match_ancestors_batch_init`
+2. {meth}`match_ancestors_batch_groups`
+3. {meth}`match_ancestors_batch_group_partition`
+4. {meth}`match_ancestors_batch_group_finalise`
+5. {meth}`match_ancestors_batch_finalise`
+
 Initially {meth}`match_ancestors_batch_init` should be called to 
 set up the batch matching and to determine the groupings of ancestors.
 This method writes a file `metadata.json` to the `work_dir` that contains
@@ -177,6 +185,11 @@ could have been matched in one. By splitting 6 and 7 the flow makes an additiona
 
 Sample matching is far simpler than ancestor matching as it is essentially the same as a single group
 of ancestors. There are three API methods that work together to enable distributed sample matching.
+
+1. {meth}`match_samples_batch_init`
+2. {meth}`match_samples_batch_partition`
+3. {meth}`match_samples_batch_finalise`
+
 {meth}`match_samples_batch_init` should be called to set up the batch matching and to determine the
 groupings of samples. Similar to {meth}`match_ancestors_batch_init` is has a `min_work_per_job` and
 `max_num_partitions` arguments to control the level of parallelism. The method writes a file

From d6725ce63d1b612389cc266946be6cd6e6942f46 Mon Sep 17 00:00:00 2001
From: Ben Jeffery <ben.jeffery@bdi.ox.ac.uk>
Date: Tue, 11 Feb 2025 15:24:24 +0000
Subject: [PATCH 10/10] Reduce confusion?

---
 docs/large_scale.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/docs/large_scale.md b/docs/large_scale.md
index 204304b3..1ae68d71 100644
--- a/docs/large_scale.md
+++ b/docs/large_scale.md
@@ -95,13 +95,17 @@ list of ancestors in that group (key:`ancestors`) and a proposed partioning of
 those ancestors into sets that can be matched in parallel (key:`partitions`).
 The dictionary is also returned by the method.
 The partitioning is controlled by the `min_work_per_job` and `max_num_partitions`
-arguments. Ancestors are placed in a partition until the sum of their lengths exceeds
-`min_work_per_job`, when a new partition is started. However, the number of partitions
-is not allowed to exceed `max_num_partitions`. It is suggested to set `max_num_partitions`
-to around 3-4x the number of worker nodes available, and `min_work_per_job` to around
-2,000,000 for a typical human data set.
+arguments. For each group, ancestors are placed in a partition until the sum of their
+lengths exceeds `min_work_per_job`, when a new partition is started. However, the
+number of partitions is not allowed to exceed `max_num_partitions`. It is suggested
+to set `max_num_partitions` to around 3-4x the number of worker nodes available,
+and `min_work_per_job` to around 2,000,000 for a typical human data set.
 
-Each group is then matched in turn, either by calling {meth}`match_ancestors_batch_groups`
+Groups vs partitions is a point of common confusion. Note that groups of ancestors
+are matched serially, and each group is split into partitions that can be
+matched in parallel.
+
+Each group is matched in turn, either by calling {meth}`match_ancestors_batch_groups`
 to match without partitioning, or by calling {meth}`match_ancestors_batch_group_partition`
 many times in parallel followed by a single call to {meth}`match_ancestors_batch_group_finalise`.
 Each call to {meth}`match_ancestors_batch_groups` or {meth}`match_ancestors_batch_group_finalise`