From 15e65534e4d9d81999c7f8ee44f6df5c620139a6 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sat, 3 Jun 2023 00:41:09 -0600 Subject: [PATCH] normalize & cumsum array in one for loop. --- C/makefile | 8 ++++---- C/out/samples | Bin 22392 -> 22216 bytes C/samples.c | 24 ++++++++++++++++++------ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/C/makefile b/C/makefile index 94fcb5f2..5e5029b4 100644 --- a/C/makefile +++ b/C/makefile @@ -33,14 +33,12 @@ FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) ## make build build: $(SRC) $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT) - $(CC) $(OPTIMIZED) $(DEBUG) $(SRC_ONE_THREAD) $(OPENMP) $(MATH) -o $(OUTPUT_ONE_THREAD) format: $(SRC) $(FORMATTER) $(SRC) run: $(SRC) $(OUTPUT) OMP_NUM_THREADS=1 ./$(OUTPUT) && echo - ./$(OUTPUT_ONE_THREAD) multi: OMP_NUM_THREADS=1 ./$(OUTPUT) && echo @@ -48,7 +46,6 @@ multi: OMP_NUM_THREADS=4 ./$(OUTPUT) && echo OMP_NUM_THREADS=8 ./$(OUTPUT) && echo OMP_NUM_THREADS=16 ./$(OUTPUT) && echo - ./$(OUTPUT_ONE_THREAD) && echo time-linux: @echo "Requires /bin/time, found on GNU/Linux systems" && echo @@ -68,6 +65,10 @@ time-linux: @echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)" @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo +time-linux-fastest: + @echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)" + @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo + time-linux-simple: @echo "Requires /bin/time, found on GNU/Linux systems" && echo OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo @@ -75,7 +76,6 @@ time-linux-simple: OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo - /bin/time -f "Time: %es" ./$(OUTPUT_ONE_THREAD) && echo debian-install-dependencies: sudo apt-get install libomp-dev diff --git a/C/out/samples b/C/out/samples index 7ce57e22e57885644e56e7ac13ad49d2fca5c559..66adf472300bad5ef9b101618024db12fa7091cd 100755 GIT binary patch delta 5278 zcmZ`-4RjRM6@Ifn$z}~ZyZPJQKz;}dgn-G0-^3c&i5r~N013$gf=DEgLW>wAQI4e~ zG=Q$K7p6{;9`PKj!B9nuvM~gaQ%?g2EfKW=5jKDj2tQE?)9=l`u_4%Z&dh!H zyZ7CD-+g!P%x>SC!lAzjjSd@$C+hG-2r+FN^P0IYCc^c_Z2KbH?6*ZYzm3KT*>ydJ z2&++Ijqn0U6fSo`gm=%lop$LWe3(;h9b?%#4`1iu>ojWR`kd1J!jYJL5mRIz-JzQ` z{Qr6E;43?5jDAtV%l~*G`I*cW{_fW#*LlzCZPWgoxreURXC|@?Xu%V*UNs$0-_ws4 z9Q35#E}W#2!A`9PH=Sp2&^1Pf(5)g(2DcE!G`1VvR0iC`V4+PFHX3E)4VcQHyLAcl zvT<+Hisg$dvsXW!J((<8RQ<&Al{Hn1DwqA5{yS?u0~x zns6s1M%55-uLQ@k?#9(IVieS3ND8*VFprDz1~Q6vMyJ$8lWdL$w*3KFjw1JP{u!0m zlOphJ2=M(LC_hb7<6Mp^Fp(0jpRj!S5Q5kq=6r23lj~IbY`u2eCdOcNv9u=Q%H!v$ z37X>RbEZPUOg}Z*ov}=nkDghP2`dI0?Kjzlc)BpgPRGT>4yoocmIoaT0(}^0{Q%TR zKLEM~XyX7hntlg#AJC=&Xbep;+lSo6{W8)_ZDu>2XO4AR2B4t=9;8ZK(OM((UN-L)A2DtM z(<{yog}=XrXf86{$D`@Y_!L7009qZN^__#LAlsrh^Il`V}R-1U?TM3+|c0xa#>2DGbC%yRNPokrmXjU>^kW@dltsx}j z9_{`YY*uB9X&YGZYIXT>Xk|E5NBHX>aEO6R@O%t2?jqDHS`!!8H(p=2O!9k!D;24{ zI5_e`8lY_f#Y&Ro8ZfDul*d$P5aHr%7TT`!_va;!+ir&rAoTr>7#p{H|YyQ z)9I45xw<1a=m%+|b-QlRGii(CcV36V{PVS!7NFK(qnlo*3(~W6(KlM2Nw3q5y*=+N z<-MZK>xvDhTvgMJooC^C$k-W2k0hrIyV$Fx=QL&m&!Z`hnun%XKI1S;&$c)^X=p~B zKaQv8f-1{A6v^m(uge6j)<}x3;HhW}{}CGJ+Xt3{G*_Ba6gVjPrNC!+(#zyPFQS|$ z2iCewa-i5{z3sV|b3+@Cid$RoyafxDhCZFRX?PvpK&|I=a-fI#m0M34WZ_Dc+Q~$# zRc;7b#mq0n%)`CnyyN1*!+tTd6`YuOSUh-C0oaDC71u{{en;)6-axpa>!x(QU0*X+ zuIvDApurMB%KuE<`Vrj9l}8&wIu(}k+t)?QFV!)o!B^R@W_B2wkuH4TiUQNtcwAoB zFf=1H2iUS%rJ(ulfVev*4s4%WER z(KOcJ=)Hj&msbv)7n8g$t2eO1%1;B=LwPKEN=W9A>zn!f|nviEj=PF>Q02ddL(k?4{C;X z0>JL|tZT;N?XBj>M!HStO_8Toi#>q7@VDRmc;8(K{(SZNd zqbS{TVV0FgZm2s8E%)}*azGioG4Bxy8YPvT=B?%<%GH)!KYMY*a6J*6neA+P@ib--{?_Bqo8S`BJFqbMQJ4$uos zeinA%$Qo9Za?lXyGEma3C~HB}K^t{sGU>wPGy~2liWQDJE+|R`DEUrN)_^vH?gXvC zuG3Ly9iYo-Ztm#RW`vdlMdFQCKfGL+PQM=|buaAg9kqStb#S|s)*#6AcN*tATg@neNAaUVe& z$D4#|x?}u=>CE^`knZ_bQC`QK4Tik|tOV&|F|)ximcq5b(A3pYg8MOG&z_<0Pna2% zi9}sJLoE|WMX{us&eD4(PS0Sw+6i_UQkl(`$(Hn`7Lyv*KH&A;^nnQ(bu4)H1Hynz zH-nPt7VGo6LW^UozQ~duG!$7fw-_ahdsCET$!mzd-_j`PKNQTCJg>zqSuzVP>Ci2- zSPLzt2ee)1;bdO;N>K=6WcIe`B#UE{-fOAQoike;_o>>*k(YgpFM)F21^WKPLem@U zL~+vevM0W!c|^X+rhfCnNE822CD2#q*r}($D!fNa3({=za0oH*hE<1~t}jRv^5`oC zIl?Y_1oJ9-xgf_@!oNG&2H7Z~xl_~PcJVK0_6^KNIGQe=IupxxPfZgn^vKj-3LYvI z#?z3_Kue}ow;Z455McMR#}4suDN!Fo@<8oEm~n=?$SxC|Eln4!bT@<|Ne8tj6TE~5 z4?VFFc{n{KO?SF;2MgJ+mPF!)YdeCG3KR8q;-Ra+mC!)pQ#LC*TZ}5cofLYq(C%aP zrbE5Bd9G9y+_E@cF$ibB4w=4|TTQ==uDZ|eTMSk}6VQHaXwm>*rJan1|6bKc)cnVZ zhVNmq32KDSHdQBqT+oV}E2%ov?RC^0QIW(uyq;25A`648pXd66z1~vscv9_FCqLb6 zs7Qf0h*l4pARCpZBeC>jugy6cI&sL0hu4;oD@{>va2&cW zUU-(47TJZ(OxyP|P%CQG${ySS<5h0OgHLld)W;N*{TN^oRC9cnDn$=t)||Oas$=ED zVNQ;z@%5;81C^%Rox@Qd%uWwKd98_MK{_3Gi~dUbZ{g8%ULE z4>V4Kjit<2ZXKj7e)pjGf5%rf@>=;Te6X>dt*U!fv&obEbVNwr;&?iKBC?_OX3%#v zecl7Kmrw{e%j)17`=^TEdD2}GJnoK1d}S z5-_y%jR&s&KVw_{=wsC@mafjOB#$j$Q&aoI(w2k=o)hTbAKXn>6{pjk#XITz8TZlG zW-N%-6oi>qgqFTp6@soypz}+!ZM*oBr_n09qcq26;oVxJnY0sV9!E9WOr5iH%wdkA zvCp18*{1b-jh4_Iv+GPfkv$OFI@?aCl-Xjewg_USrDanb>73JO*Fg1m%WQGTL}YMU zB+@U+vd!A#i7p>kzDd9zYvnmMt=DTbL_5oKVzln%rZ)eS7_H;x(Nb_)$Meu_{wY7b o$4c-A2^-C{)2|2>v~$jsm?mEYn@OE>rds$xM?xA1V}cmcAd-ejOoeW!)&Ri}jPe)cXNW7DfMj$% zLWgaFl+Z5tU?({1oH#R%;vSti8F%rFGjt?`fa9LPsE4!eto$f;^CKY=2s@}JRp|^#7Zf8>#(MF{X$dUcUuiKBkR1N>o}xGh10ff zuA-YBGA)|$k1WRd$~d*9tjd1qcdnB2?SQBE$D8O{^3 zLsOkg-%Gh$D5d99T*9|hGP|h5Tq?Y$%__}qAb5_%W%Z`DWNYKbb@b`f zM}+zGzf-F=5EFg`Q8N<@0%gKAO{1|qn1~gB8?J=dF=IPPA!`6xbSv@ioAd)SzEkA4}^EqAF|8K}j8mZ;@^BSH)Rsv^(+35k~StMIMjWz;L z8o7LSW8+ws*k+D5=P|xX)6do$<-t5>qb3`d9Q?gJ{|3X6O*5@ko-s^_kN6^e!pbIt zZnV0}*UlP?$@3pzcrW>XK+vyIQi*B%`)Au3UktFM&QOG)zWMlL^WP5Gt9)9a- zyuPG0Ih{9bwU(D06Rmj<5Hg@nzKc$AT%Nfh1Cj#?K~>d#gW~wVY!Eg4#$OGbWosaD z-oUZ&tqOYBxh#y?N4NpSf&d|GCGg{;@ZXQZ-yMa&H41-i6#mL6{9qCuW(K241ctYc z!ncjWo0D;AiBGMQBUk0M@*eZ#BJ5LE0)=J1SbRI8AuA{1O9A6PhO8Nql{<3RS1Iv- z0jga7L*@J`5%MQON_V^iSrdyahw}-Ml(-zV{2Ij~N5{4Ste3@2y>j%qLWite5{u1+ zjL-#{4{;C4(Ht{*nCi2X~}|#LGiIz z{4(;Pe9dJ0(&^`!`4%5b&`Vjlb+?c}*bZx;ycc9Or|O4lj+Rl{kL4qgL=yVAU8RNnfw(le>2&7Yx@SNE)d&NE5=dJ7IY5LCB&AC^dq}!H>_08;-32A$uX}}4(fh&<|G>7!sH{f5 zHT~ExDKUA?DY2+m>WZ7>Xmyl`Jts-1>zp|x^lAo0r%jG@Ija3#&EF_pk@LgCDSz+Q zB6-aR%yKJdSoPmQZI1aY4#K*T)m8HU}7ekXupK%Z8 zvjX5dxOPj+1rk36|3@wBa_H*({BeH(Sx?q)bpcj@5OCy7&7m*n=V!de$t+nDa_I5= ziT?9w9NI1TzZ$QoS6}j2pwJ4bEdTS($ZRHpyKV%(5_G7!)0dPz*_|?-h&s#Gbyo!LV45g!G9yKx4>d{VB$k{e8`U z3Mlc&xnaqFT--ShmgF^kV5QS&jUf3y+3rHnBQYcaB*<(}liS!lQmx!Zh!?cQ_uWK| zz%2xCK^qQ5ahC!ua8E2c?Tf)VvF8kt!|+fnAfm&ZBCh;E5}jryc$g-XAFMjJAl&u7MVJNKhz+ECm z>;9^psWJ*nyN4yQ-n>(@vN!II>2@wU3YdFJ3pkA(yA3`59s(KiT^1U8c9z$&~Egz zM2=Kv50Qiwt-z3vHw}h0H(1YH$Cl|y(Q}g6hqc;9*)A{oPO=>~w_}$tqqI&0yWt37 z8`U`tnh`Z zt_5@eJ_FctNmY9RE5B0JZx}xgJ-Crt0G9*SB=GP6NG_}DF99n7kC;dWX~Lur1n(79 z9cBzb2acWouT`}Yu;aR_9$+|N46q$RtAs-iWMeli_T4?+eL+>HK{E-YY0v$InBIqf z$N#ad&`rLH!W(qI&*f>y9v#d#KA7}g{M%@kFUxZPYVc5`jcQzfi~l~*o}kx!6&XSH zsLvkY%uvul{nPI;1-_)Sr%$w%fw&yRvvmLT8R1f3ZNSDM9;YxT^D-QJO;Uz?S4tqm z8!^wzC^h}up5X;W%5YR=SVLp9x0OF)%@M3Q%+_Eq-_C)y1o32>VFPV}u-LYMR>BB! zhId_tRdXvHp=!TO&Ln#fI@$arzrv<7u|mGlOwWtUNfGgKLayu_5P~%&H~d zn9-(rW)Is%1J5Kca5C4B5Nhe>d9M8R42N7lKLfPm2--Irau#+VYuXq5g3Io^OE+&E z8@~tL!R|bE9wd!ZT)v?|C%(gZpBoy5VdjH{o#a~7LIs#PO&($$rD-ao!%xC9Y37>d zfX=*c;!(Sg(`C9tb_tKMt7jvZaZJ*g>$uD|#tn}F*Fo}#dz8ifd{qbQdl>KQfE?p{ zGQK`)Jq5g140#?K?f3-!b*|qrN`I#=pGg|{NuV8#Sa3vh{R~|U*FWJ(8@ym(yawZJ ze5tv9i*uWJ9-h^i#DwT*LE))4`qdcFS#U?Y;?n4Uf1BL%59qA1dI_POCMQBHg1*_-Es)fTi+{{Ag18NI?PFrrQ>!;NJ-P0wk~L&w<1<^Ex2#*WzVkPs7lhp8 zFN~Fy8s;l0rE?zojUAsISO4>>^^H$HNv}RM!OXrq(*6hYsJr@*lYN1Ncy*|1?yBh} z(@SR3j_QDIL^mC*Uf%i0f*S&jnJk_1q6WcKWTJ1@lsNnOZEnCYP1H{e+ z@GN%?#|&sZh-l(zwa!eVS`0YZNvG77*o|`m?c?#FOo3Y{n-`FMSo5ag-L(^Gjxpw8}Ep6)<{fah)o}Xz|inHu9iOJ473o&axj`sAG8o JePYr0{|DkYqLTms diff --git a/C/samples.c b/C/samples.c index 0a16ee9b..08020ebf 100644 --- a/C/samples.c +++ b/C/samples.c @@ -128,19 +128,29 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl // You can see a simpler version of this function in the git history // or in C-02-better-algorithm-one-thread/ float sum_weights = array_sum(weights, n_dists); - float* normalized_weights = malloc(n_dists * sizeof(float)); + /*float* normalized_weights = malloc(n_dists * sizeof(float)); + // float normalized_weights[n_dists]; for (int i = 0; i < n_dists; i++) { normalized_weights[i] = weights[i] / sum_weights; } float* cummulative_weights = malloc(n_dists * sizeof(float)); + // float cummulative_weights[n_dists]; array_cumsum(normalized_weights, cummulative_weights, n_dists); + */ + float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); + cumsummed_normalized_weights[0] = weights[0]/sum_weights; + for (int i = 1; i < n_dists; i++) { + cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights; + } //create var holders float p1; int sample_index, i, own_length; - unsigned int* seeds[n_threads]; - for (unsigned int i = 0; i < n_threads; i++) { + + // unsigned int* seeds[n_threads]; + unsigned int** seeds = malloc(n_threads * sizeof(unsigned int*)); + for (unsigned int i = 0; i < n_threads; i++) { seeds[i] = malloc(sizeof(unsigned int)); *seeds[i] = i; } @@ -153,7 +163,7 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl for (int j = 0; j < own_length; j++) { p1 = random_uniform(0, 1, seeds[i]); for (int k = 0; k < n_dists; k++) { - if (p1 < cummulative_weights[k]) { + if (p1 < cumsummed_normalized_weights[k]) { results[i][j] = samplers[k](seeds[i]); break; } @@ -161,11 +171,13 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl } } } - free(normalized_weights); - free(cummulative_weights); + // free(normalized_weights); + // free(cummulative_weights); + free(cumsummed_normalized_weights); for (unsigned int i = 0; i < n_threads; i++) { free(seeds[i]); } + free(seeds); } // Functions used for the BOTEC.