From 2a9d3bf13585c5378e5b1bddb77106ee86719d29 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sat, 18 Nov 2023 19:43:10 +0000 Subject: [PATCH] factorize paralellization in C code out - Conceptually clearer - Allows for composing multiple mixtures together - Considering incorporating it into squiggle.c --- .../makefile | 0 .../notes.md | 0 .../out/xorshift-pointer | Bin .../out/xorshift-struct | Bin .../xorshift-pointer.c | 0 .../xorshift-struct.c | 0 .../makefile | 0 .../04-factor-out-paralellization/out/samples | Bin 0 -> 22560 bytes .../samples.c | 95 +++++------------- 9 files changed, 23 insertions(+), 72 deletions(-) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/makefile (100%) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/notes.md (100%) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/out/xorshift-pointer (100%) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/out/xorshift-struct (100%) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/xorshift-pointer.c (100%) rename C/alt/{xorshift-scratchpad => 03-xorshift-scratchpad}/xorshift-struct.c (100%) rename C/alt/{03-factor-out-parallelization => 04-factor-out-paralellization}/makefile (100%) create mode 100755 C/alt/04-factor-out-paralellization/out/samples rename C/alt/{03-factor-out-parallelization => 04-factor-out-paralellization}/samples.c (68%) diff --git a/C/alt/xorshift-scratchpad/makefile b/C/alt/03-xorshift-scratchpad/makefile similarity index 100% rename from C/alt/xorshift-scratchpad/makefile rename to C/alt/03-xorshift-scratchpad/makefile diff --git a/C/alt/xorshift-scratchpad/notes.md b/C/alt/03-xorshift-scratchpad/notes.md similarity index 100% rename from C/alt/xorshift-scratchpad/notes.md rename to C/alt/03-xorshift-scratchpad/notes.md diff --git a/C/alt/xorshift-scratchpad/out/xorshift-pointer b/C/alt/03-xorshift-scratchpad/out/xorshift-pointer similarity index 100% rename from C/alt/xorshift-scratchpad/out/xorshift-pointer rename to C/alt/03-xorshift-scratchpad/out/xorshift-pointer diff --git a/C/alt/xorshift-scratchpad/out/xorshift-struct b/C/alt/03-xorshift-scratchpad/out/xorshift-struct similarity index 100% rename from C/alt/xorshift-scratchpad/out/xorshift-struct rename to C/alt/03-xorshift-scratchpad/out/xorshift-struct diff --git a/C/alt/xorshift-scratchpad/xorshift-pointer.c b/C/alt/03-xorshift-scratchpad/xorshift-pointer.c similarity index 100% rename from C/alt/xorshift-scratchpad/xorshift-pointer.c rename to C/alt/03-xorshift-scratchpad/xorshift-pointer.c diff --git a/C/alt/xorshift-scratchpad/xorshift-struct.c b/C/alt/03-xorshift-scratchpad/xorshift-struct.c similarity index 100% rename from C/alt/xorshift-scratchpad/xorshift-struct.c rename to C/alt/03-xorshift-scratchpad/xorshift-struct.c diff --git a/C/alt/03-factor-out-parallelization/makefile b/C/alt/04-factor-out-paralellization/makefile similarity index 100% rename from C/alt/03-factor-out-parallelization/makefile rename to C/alt/04-factor-out-paralellization/makefile diff --git a/C/alt/04-factor-out-paralellization/out/samples b/C/alt/04-factor-out-paralellization/out/samples new file mode 100755 index 0000000000000000000000000000000000000000..7157dc0bada9100dee55a618703db13977e02f3a GIT binary patch literal 22560 zcmeHPe{|HvwV(YFSS0Lssfmq0TK3tcja5Px(PkAS3mf>ZT_8$$Mn#vHY)IPt>SkAh z&lhP^DBp%feCM>qK6|w9^}N#7_lmDSeQGtD*JKvej zcQ>21&*S^6nR9mU+&lN)nYnZ4&U|P0I}elvmYPi_CXu0@Jx6-U+0rhmZ$0~*2`H2EF=RUNobU#qT+?Tm=!Nihl9c{aRH?qp(Ix-nKEW>`)j?_1@kq_bsjpnfZ4q|NgjO;* zQVQxQy9IK>)PnMtfgI*rMST;EL%*=6gra&jx1s)?>*vpHsF~AH-xO_|(^hi*oa^To zwKf;c;}uiisSfg~Wh+)QsOch_Fe4v}Jb_dBAKv)X`^RTL`tbT6bUuGNoV@wT*FSnu z>YzG_hZ4z)CUKnd7vn=bdjG%9=|rP65PGgr&t?C<1cG8Jr!l+RThBaK-W^4b4rVimH1WLM#|u*VxiZfBf+4~nw7Dkc^zy0S~yb6kgH{(ww7AfTHi#~tZQy;p_<$bnc%umB-mKh7L3$| zLsd1cDVmN4o1%?u*~;bR!Ir9URYOClA;o}_ln}I5i^d0A!u3s&+F*6vSJ7HDwEC-* z57t)IH?Wpyq`Iyu%o?Gpxfor!MbcXY z-BwOgW9x*(kF3<`6pLi6Bg&wGev!_?zvBjau7N&mpwBeW&l>15#!+#dpwk!#lW?gP z1Kk)m+YEGyvodWo(D{Bq6l^ij^*E~+Y&X!UZkcu%=+bsn_LzY#wjx|)r-4o}R;FDB zItDP6b{pvS6cvk%fvyu?zb*rxlY!69 zz(13L=V#zEGVq)X{5*~WjO}?Z1Aiw2Kc0cVl7YWq#Jv@(;R1yiWQ0>H1rHd8j*Y>E|zW6B^UON%c-c(}?S78w2uVO3bKHxs2 za?(cl#Xg^lZ%4ESS1$54k`8DtSG*P4o+iiHO2LOzn7MxV$0| z%Xt%9A2l|knS+Hks7&S`Tfo@q?P&Ql*g}ovmMyhX})Y+NX0CAHO#&+ zOj%Vs9?4ZZ7v2jbzTtc_8LvST`*IqAo9=>as(zABtVf|}58<`SsV<{_STU#W3f^Y! zzutfJuErRzQninKo$GSrJ5hnQH+~C&Zl5;z@cZXI1IpGMOk3sW`=RZSX}}lDwfQ^; zl-M>5Pv;WEhZ`UEzJCsUv7dnA^?;+&q!hm7RaOj?X|I%NFMGS+F?&1SIH$d!RuA~Q zk6Qz=yg!v~I;w1Y5E9Ef%^hdW%H~ew16t3QWAh7@!ryOK9A>qn2SZ-DUnv|^`UY1c zdyumEz`BxzQK9M?iT=^29V_#^9=Ter9vSs2mAShDvH8EE(ed=RU((TI9$lhT28yj- z&mY^T>Ump7w+Z3{{*E4-C!uU@gyp>G*0v`hrS0`kU_CeG@ng_J3>tOFqw$$Q(oa^LYDXVUwrtpYvrG99DEl z?}5XniY`#I9}J=WovR#b?8}Z?rRjv$kItQ_RoYJuD^IvRJ<%0+VlH^^^4=}|0V9;i zq307X?>n+8tANXIjFvlxXEvNTh^*t8e9dUK5%X7YIl{n z&xraOQ}w;)@95+8m9gcX3(+?+Dx|6&c$y-Z95ipRSGWDM6wA0in%~|E#IB3aAsEN5 zqw@m4c5Zd8sx);!O`#P7pr(cH5_cEE*(ipCnEH|b6NGA&h91I?C|4oGQmzlAUc#L5 zUw?o>>&$a0g*kT?P&nF)7&7A;WR;mr>!$$IJvhVNqxE;kXL2x@<+Wg@)x;%GFTLM!6d(m%E#`1B4c<+Nt=zo<+Z4>yojkPHY$q zSVpDD&h4<9_7qqJk`;RpEM}s3s9Tv;%G_N;34(kT8G1Fo73vHg%hv-~sGxHyI&pwH z0_FKD&`7PP``8S3KfsBZ930CbK%MHMk{)dUee3B?%yd7a4RpVj<9>$9p3y!%s@wrB zw?Bh+1YEYb2UU*13T`N+N=8tu6dqBE_KzyJ90k)FSMK_yyiYc=){B?WLWiYoEwxoC zq=*ZBcc4#D(@o;U_G`{one*R(FhxP)q4%*aoWs}9FUF=M`N#PFH9_VR>)-9DlKOWG zl>ROK`ygNRZzBXJ?%%7Ntup5&K;FM6&Y*u+Wa;0gGwpTW)xKC$(u>7>_HqcU#-?PI z`zU&D)8~oy5?K*jk+D4mne+!NegbM$?TFgZYw~;cMh~Md+tai|)Ms7dizpwnCof4{ zLbTNU+jK_RwiN=grAfYoX~=6Ms&^@;uyN_8_(}$d2p@7{*>shx_IH2?m(0bNG z*?eFJXg%v!3cHD@ditaN+Hl9~KkNChRC%<|G>oeB^@IFv(;;QskH`kYv=aEW3$(rPYR}UK0lN&$ zHU#b5O^4d=^Ln25#oA1M?L4*#&o8e|x{vz1-!f}2z__};fR?0zLHv!o-%BR_TG?68 z(ESOo=jHaR0@c_ws?`uVj;2fgQ*j$-w7yD13dRS9$bAekl*fgmLtO-oog-Y2rxZa~ z!L6>ukYZP)#v1ksKf*o-YHe}rX?TriD0=En1RX@7#ONsPxwt%fSm-{G7IU#(rZvcY zNQ<~^wENW-xUf}hhabruaQy2~$)kWHV;7r)_80cX2T%i#SDjP!xtktt`?aaP`u>*3 zvaYQ2n@2AVqkntOSFqXb75YSwnOKAbJP_cb-MwS;A$xqW6YD1vKXcP&~BH;N4>Uxcv@|Dr@#Jf`y>6M_-4=?12X5PvK3$Y6R~&g%RN^0(K&&4txS@ORg*a>btsW4r8I8 zz%-@oy>_u_E0b9w~VtpNa4S6hoXx35w z6nxC86!xm75wf%5aENt~?5FfSL+c(D-XzJnoYHPvdL}jJ@tNLo>(@YDXWsaJn6vMn~XRwC%n3?SS@vGH;Z(Gncl& zJKhls18sR7av{^>4H*|!&DKNH$L?XrIg|1rJr$z;%BBs-P%lz_C>DK4d`T}k>c)Py zHxa@^iS8f#`45Y_T8(ED+KSQn5H0+wO6ymd;*Q^K5vQ4avv4@xqvnj&`$cax4g-`KhuUs^}G^2fkxZLT5R0oQ2eHoTM@@8 z#*33&YT;y>w7{eVCM_^&fk_KYT42%wlNR`Uw*dW*u|5)NymNNVUCxEh*|p-&d#j?2 zg{>_O^^sszI9#o4SchRZ)KfEdLH&3ICE!zI=TT7WM;wPSG4+a*ktF0v&Dx=h8y4-Rj{ik;pkR5+c zCh32WTLzQKwSYSS+W^~MOD6O1GqZXunREg#c^wZVfGXgR04o9MtaIn_Wb!Ov!CQDt z!0!f?@no_Euq2U8ZUrpBHhU-F4!}16)gkZ$ZUM~2j}toqoq%=N!bb=Pd<@Wr$BW+r z76SGIRsy~ONY8I)0qgKUmWSt=oq&aaTkuHj1FXZt!dk-d*z!#($GO#hz#}*iET#V7 zJaY^E2Lg%yGfv5L->uBl<}h8JJJm*gBszZ5Wlq>&E;0C~;%tU77e4gQ)yh|s$zdYc z9ZT&6H!IWE+cvUAm)-EiYh70ondH~t9>J#o^g%$Gh-WK4kD)&Ldj%z; zJ%-OCpvnI#E&%>Ze0G4g7&c@}?T&5c#rA@jWwG74)#|kuZqD)Ai#w)Rj+-vB7kceZ z6fU+q7Tawq{+FRb6p6AzSdF-JtZJP zHCR&Gywm9+tQ0y9zL8AM7rM5ZZ?zX}wydzPHNSX~y}-*WUOJuOr!7Wj8H4bZ!^kt$ z?lkvYWVe;+Wm{18qc_Kwm4KCetq#6*AIaTpcf`yv`Bn=wY_|IBg&jFd?Zpv$;nH5a z)7yv69z>&gU>$q2eKEqQ5evP~jBZWN<-Lyrha zq-`=yT42%wlNOk?z@!EKt}P(no09KI(GpK76V~bE6#XfIUJJePSRe<&z)o*fmSm+{SY=kamEE6}o|E!(gL-Dcdz8|@)9F*0oCoQ2 z8C>6?^K?39y&NAyfd#YQnU!v5jx6J7VG46*r9%n+k&~G$ za0VGyjAa&LE;ds1yajKX%lGk(r52nLY;49cK*$&{a6fJR}M=*9T6BPD(& z0Z7lE+4L+0o%s1n`pCLE6QZ_!-&c^?A!M?(f1-Z9#`WYemwZqciYC%S#o6?q;&S=p{Q(^`VRgv#hiSay zF3>?AAb88U9%jXgM#i93no8H0n6bD^p6OB zhiJcCZ>OQ1WS?Q7hx~)mO5!({=&`~Nv$coT zMW>K8f-i9XOPO4cW!Ze98{#&OXn0xzI``+xbzGhymJ9l0Vgts{L%|Z{be-4u9Txm+ z#k>fK!f#B#e=+C|#H)ggIQ(6qr%P;P4+}lN7j~2P3dwQ&H^D#b)EW8N9I}rK`gSqy zav%7ls5f(7^a_4CnfQ4b^w2&a+x$8v_>1WW2c#o9V8fu3ofj1A1Zn3{$uHW&&p^Rq z0R!3J;newG)&Zm60kYLQTkt<7HuC%o88WvC{f;Yj1^oOGS-0Tt7xO|sFWdn7O!N5o zu$0rs`~Mvi(3?Sb>Q$zFYoKRq{|BO-PT_Cz9Q%iyA8|#b&Ex1agH_QsRt>EUp-8BvsHFIMcXlSaYNTHL;1z0$gg4-Vk*dZ} zuqN8rxB(&tEC^2d+nCPUTwB}Fj2lu!(}KaJw|bYC1bsQ%mjzZX_6CA0mo8mZRuQc5E)JBDR(>H$b!(KjoL`4hT57!h zL|mwnPBa>v-=OlBZj_;$VGQD5V!=>NRiuiAtD0(p)Z++?hJ#JbVcgl0y8C79a+uOm zc`J*tR=QNCA=I=kQpfc)HwL3k^|jD}E~Q@yGgkHdly<{)G-K)wz9M`pSgbd)I2dUT zy6K*qF=g{Ss7Z`i@ZKu;NT{iXUxj1fL0>a(oH3N*av69UOrc*$ljg?!`kJapDAi=c zWjJF^9&15uw5eKeLgocN!J5|QU>$5tm;3lvqOO|yreL%+RKxTiW~-x(m^8A>jrDDj zXgH)_?UUKql(UfUiaU1FdGtw%`+k}>$bpG8vo_43y87D4we!&9dsG9dZy1tniF-f0}`)$$|mnB6x8B(3lFkyhraLzc5I4kY+A!JW5R~!x#o5;tC{5 zBdbax-IneD_tfeOi>$Jj~b2F5T!Sf6}4_?M8$xSuny~F4)+{|!Y!<* zsW}oVTGtdUY6&;Dgu;;x2GTvzdfYKqUn5A~#r`>ws&$OB*HyLFv7(v{O{hYLk+5EJ zZz$Ya-`tdj1W^_aHB^y+$h9;?SP>uWB4mr!H3OwgYp9wPMM7;rDdK^P@RZz zbv4i?Fe{Gbn6!H?fB%^>>?$VMUm{DGyV@T*sFM>(DGNnD=CNVrSLQ~Rl%vi*Y0V#tt9 zrMx`1k+4fJ%KD|e?Em+K{9?f;&vzu0=bj`_GMVx}1dVJW^<%53CjrGaN)X47|05_d z$~%Regu^MxG_|Q}ew-mM&y`Z}ugLGpkay111tc8&EAs!7A)ne*ip}K<8RsLSdnv6? zXUNO*ED7a)SPEp$pS?m}j(_Uht5}!VA_WD^tpC{zd3io2p*6$dGwVN^AurF-B;1i9 zApJ)+>o_uarHA`A(d4;~yw5U|U*ZzJ3mL^AFVE#l7Lv9xf7P)uT*{wE0r{>7d#Upp zc|WN`7?$QAB`GKE_%qPRL{eU!^OY|mZpL=$AoVTfC7c3bqkQUlyi^yF^+*L$PU5)` zCYr2YJ~ui_b&gDas>_&Wiu#LPU(B0frtc=iwvUBjXR9 lgUGh)68d-B%{rw2kr|V$SJG&@W|J>nsdHSGA&`Nx{{?iVj!FOk literal 0 HcmV?d00001 diff --git a/C/alt/03-factor-out-parallelization/samples.c b/C/alt/04-factor-out-paralellization/samples.c similarity index 68% rename from C/alt/03-factor-out-parallelization/samples.c rename to C/alt/04-factor-out-paralellization/samples.c index a130045e..f07468fb 100644 --- a/C/alt/03-factor-out-parallelization/samples.c +++ b/C/alt/04-factor-out-paralellization/samples.c @@ -152,7 +152,7 @@ float random_to(float low, float high, uint32_t* seed) // Mixture function -float mixture_one_thread(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) +float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) { // You can see a slightly simpler version of this function in the git history @@ -167,68 +167,19 @@ float mixture_one_thread(float (*samplers[])(uint32_t*), float* weights, int n_d //create var holders float p1, result; int sample_index, i, own_length; - p1 = random_uniform(0, 1); + p1 = random_uniform(0, 1, seed); for (int i = 0; i < n_dists; i++) { - if (p1 < cummulative_weights[i]) { - result = samplers[i](); + if (p1 < cumsummed_normalized_weights[i]) { + result = samplers[i](seed); break; } } - free(normalized_weights); - free(cummulative_weights); + free(cumsummed_normalized_weights); return result; } -// mixture paralellized -void mixture_paralell(float (*samplers[])(uint32_t*), float* weights, int n_dists, float** results, int n_threads) -{ - // You can see a simpler version of this function in the git history - // or in alt/C-02-better-algorithm-one-thread/ - float sum_weights = array_sum(weights, n_dists); - float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); - cumsummed_normalized_weights[0] = weights[0] / sum_weights; - for (int i = 1; i < n_dists; i++) { - cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; - } - - //create var holders - float p1; - int sample_index, i, split_array_length; - - // uint32_t* seeds[n_threads]; - uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); - for (uint32_t i = 0; i < n_threads; i++) { - seeds[i] = malloc(sizeof(uint32_t)); - *seeds[i] = i + 1; // xorshift can't start with 0 - } - -#pragma omp parallel private(i, p1, sample_index, split_array_length) - { -#pragma omp for - for (i = 0; i < n_threads; i++) { - split_array_length = split_array_get_length(i, N, n_threads); - for (int j = 0; j < split_array_length; j++) { - p1 = random_uniform(0, 1, seeds[i]); - for (int k = 0; k < n_dists; k++) { - if (p1 < cumsummed_normalized_weights[k]) { - results[i][j] = samplers[k](seeds[i]); - break; - } - } - } - } - } - // free(normalized_weights); - // free(cummulative_weights); - free(cumsummed_normalized_weights); - for (uint32_t i = 0; i < n_threads; i++) { - free(seeds[i]); - } - free(seeds); -} - // Parallization function -void paralellize(float *sampler(uint32_t* seed), float** results, int n_threads){ +void paralellize(float (*sampler)(uint32_t* seed), float** results, int n_threads){ int sample_index, i, split_array_length; uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); @@ -237,18 +188,17 @@ void paralellize(float *sampler(uint32_t* seed), float** results, int n_threads) *seeds[i] = i + 1; // xorshift can't start with 0 } - #pragma omp parallel private(i, p1, sample_index, split_array_length) + #pragma omp parallel private(i, sample_index, split_array_length) { #pragma omp for for (i = 0; i < n_threads; i++) { split_array_length = split_array_get_length(i, N, n_threads); for (int j = 0; j < split_array_length; j++) { results[i][j] = sampler(seeds[i]); - break; } } } - free(cumsummed_normalized_weights); + for (uint32_t i = 0; i < n_threads; i++) { free(seeds[i]); } @@ -278,17 +228,8 @@ float sample_many(uint32_t* seed) return random_to(2, 10, seed); } -int main() -{ - - // Toy example - // Declare variables in play +float sample_mixture(uint32_t* seed){ float p_a, p_b, p_c; - int n_threads = omp_get_max_threads(); - // printf("Max threads: %d\n", n_threads); - // omp_set_num_threads(n_threads); - float** dist_mixture = malloc(n_threads * sizeof(float*)); - split_array_allocate(dist_mixture, N, n_threads); // Initialize variables p_a = 0.8; @@ -300,10 +241,20 @@ int main() float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; - mixture(samplers, weights, n_dists, dist_mixture, n_threads); - printf("Sum(dist_mixture, N)/N = %f\n", split_array_sum(dist_mixture, N, n_threads) / N); - // array_print(dist_mixture[0], N); - split_array_free(dist_mixture, n_threads); + return mixture(samplers, weights, n_dists, seed); +} +int main() +{ + int n_threads = omp_get_max_threads(); + // printf("Max threads: %d\n", n_threads); + // omp_set_num_threads(n_threads); + float** split_array_results = malloc(n_threads * sizeof(float*)); + split_array_allocate(split_array_results, N, n_threads); + + paralellize(sample_mixture, split_array_results, n_threads); + printf("Sum(split_array_results, N)/N = %f\n", split_array_sum(split_array_results, N, n_threads) / N); + + split_array_free(split_array_results, n_threads); return 0; }