From 03421f953b6aac6086c4a6531ce87fdc0d78faad Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Mon, 29 May 2023 19:55:57 -0400 Subject: [PATCH] add one-threaded C example --- C-optimized/makefile | 13 +- C-optimized/out/samples-one-thread | Bin 0 -> 18008 bytes C-optimized/samples-one-thread.c | 183 +++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 7 deletions(-) create mode 100755 C-optimized/out/samples-one-thread create mode 100644 C-optimized/samples-one-thread.c diff --git a/C-optimized/makefile b/C-optimized/makefile index 38136a5b..75336782 100644 --- a/C-optimized/makefile +++ b/C-optimized/makefile @@ -12,6 +12,9 @@ CC=gcc SRC=samples.c OUTPUT=out/samples +SRC_ONE_THREAD=./samples-one-thread.c +OUTPUT_ONE_THREAD=out/samples-one-thread + ## Dependencies # Has no dependencies MATH=-lm @@ -30,24 +33,20 @@ FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) ## make build build: $(SRC) $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT) - -#fast: Has been removed, compilation of "build" is very fast and it outputs optimized code by default + $(CC) $(OPTIMIZED) $(DEBUG) $(SRC_ONE_THREAD) $(OPENMP) $(MATH) -o $(OUTPUT_ONE_THREAD) format: $(SRC) $(FORMATTER) $(SRC) run: $(SRC) $(OUTPUT) OMP_NUM_THREADS=1 ./$(OUTPUT) - -multi: $(SRC) $(OUTPUT) - OMP_NUM_THREADS=1 ./$(OUTPUT) && echo - OMP_NUM_THREADS=2 ./$(OUTPUT) && echo - OMP_NUM_THREADS=4 ./$(OUTPUT) + ./$(OUTPUT_ONE_THREAD) time: OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=2 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo + /bin/time -f "Time: %es" ./$(OUTPUT_ONE_THREAD) && echo linux-install: sudo apt-get install libomp-dev diff --git a/C-optimized/out/samples-one-thread b/C-optimized/out/samples-one-thread new file mode 100755 index 0000000000000000000000000000000000000000..b7e78ead09e288d9ec110400f3ec4d1923a9d203 GIT binary patch literal 18008 zcmeHPdvugVmaonOnpb~~5{<8+#oz{%CgEi=yFhnH;ENkH@=);AG)af#n3tVy0oP?S z6N0wTE*@uAXMGIgdXBn#<}AC;>@c%COF|?IjLZo->tXJ+TQwSf zxj(5-=u@Wru~HYxD7oaf5LUf4O3z9wl{{0bgVM_5q540hzFMW%>5^uImA+!Yf+^Q` z7xbuKe%Hjqyh7ENZ5*ueX;T%I0xHkB=ywP2pRqs=`>N|0Yv z2gTH~J5~z}OAleH8|7F`Q9P9&`t;OI`yRe9|G5d%%Rjih=r3M6{@EvNgX$z1N~F&r zK|JNj4#^np{|0gfM)XMF=Pg9DrLj=FB17SS0n3C>0Pe~l@5>_pLKZxn1@F&-*Ji=* z&4PcK1@Fm%KamCh6W}gI1>RhNg~CAh>GzlIFJ7z$*Xx=78||Hng<0`Z_}XV94h)L^BFDw`~v| z4+ldH0=Whe*wo%2I+|Lknhk9&?Nn2#K*qNr5b}i@g8_fNuQl9~f{<5mhgf#!@><_| ze=yh-2$F$1<)^Pb*wh+o@YOXwBszlr)_S-H&*~l`#Mj_&Y8LI`P+g-xC|dl@&24ok zuiNDJH8i#Qo0}e`){r&TnifCYYX}AcMt!C360xkNx?+iMj(d)K{&2p0ID1)(wTRa`LXMemwFOJT zmAn1Sz zS3_s_DP_3in;oGpaj-4n`54(MHP+WTbFlk;Z+ zP9&$_0-{Y7Bu~#hA`$=ZWHPx^^0Xl*;zv?EZN!OqZ;Gb}g+%f}FAXrF#8a+DD-Il zc?j##N&f@Vplhwi0DXnLHY2Zy_+FH~iU6T}tU+sjQ=3s)iP?YGA z6MFQqBBvhd(<8m!J)R0p#P%Q_^vIwdeTJ%R&`$R>xV5QUA&zRCcD<{|sTKcHD?V~Y zTQjKjA6cyx?*pe59MSp@CIR;0vk#wMy?kHzupXU1ZW8>bM(&}nGungse+J}?c5hGY zXP1*neLGrPug^8w9!3psdP;ll>x(Typ{eev9vL5{r>Y}I9@HZr>RlfV)vl_E<}W}* zqTPEDsCsnaYtTv-tiM%=FLr{BAcIc&CXw=Up#n_BMU=->9G5(%P3?qBqEAuCua)xi zR}h(TS&t0qy`L@Cd(Y+SwtoG{<s<>Iw;-!W zj)x}d+ZUb%6B~CanT*xL#s2(L;B5~;H&s9B$2Zcb8-5W_BVe|FMswy5V$P8sm9IqT zIAbrvY@{#tJO~sHk`e?9-hkj4ZRP=dFf3~i;e(->TvKG1-T&!DLp-6a&EE~t1ja-w z9@N}Wxa%kc?=PCTaTyF%lET{jMrjV?c9X29_<&K*y_H21VWOARNvukWt;zR-Vl*uUfJ!09Af{^U}%4X|F*Gq&fIsCl#~# zAC+Qj@*|*z|6Ld0|7CoQ^uPP;mPYUD%4loSi(s5S2|BCw$fcE~M``6Z;v@1NHK-$0wAEukEAAzbd>k0|alZ6%jp5_FTc)v(K;Yw*PcTBQ-G*h(M(fw9K2vjL zykb-fA%c*P&5{E_Gi|03_t-iS|0u#R7WOrm;yVYTi0=!(PM@DwN{tNjbE2G|HAMO@onEYSI$o;tj#}wwOcB7Nkvn9+i8BvSSA~Nu{jVp*-zu=T~!^qxO!#e3>xN= z4Ws7O6kQ*?d=5VEkG)EyH!}F0uP&7z(RP&3cJq%1Y=@!~o#ltLr~^EWM$A?nz4cqV zZQt@}{kQTsKCyBX#=Z>;(lQni0N2O1O2CO_$k;~^erpS+O6Tv#=s%c$wail|LvK}) zJ+_aW=!^X;h_N;pJ^l_JW~61OLTQ!D|EWYV%_kb-N2&h?02=*wKywdbeLEmHBA8{O=p z=^K9lMKbOYOr>wf`>`{{ek{eE`8SNL@aINRO%Yef3+ixX7tX56s>?ssx?hCBlTVuh zRF3C8-n+f4t0MbpVBcCz%WJV7d928;*PYiR7xdnbbGnaePtC?tLiEp*)se5dex0Lj zyAy2WJ#2UA$AhU^n^5ycb7CFraiYZswBiA+e}LxgA<7nj>!DpATg#dQG-=<73}Rs^ zAJn$KE+b>$j8|JT@mX41-q}*9=Bj*GS4E$ zFp85azb(kG_q6D}DvGi0Sp?wa71B#5Ee(cRw}{!~NI0uRb>u%_Nkms(POT&ee&cxw zeNx#iU=i!5v7!uwPgZrG+S1{TRwb85PE~U>&?;~-2d#K2soYGGfkE!nJ&s$!8BK&uH zx*|2YU=#y@FKnAQLX1c30yHswQoRtLiuf<> zi9Z8ziX>#u(*w^t7P+nqHm&<0gyNq`$y9$~J-VkRy71&wA-s22N8a_Wu8w@^UFF3R zrAKG|3h>IB8JB2hjeR!(mwGSdhNhMtRdHGqNz_C>tBic?O-}iM_Ef$6{qQHWrw+H) zyr9#3p*Jz2WRoO~H#6ErM$>2y{Bb=%f5&JF1zPT#UjKk=p=)}>c(F3vQe2NeA^BRG zHig2$z^$%3X3V}r6&}^9JwnLS@{*(4LC@*O&M%tslf8?ty!iKvR(*FOohmQh`Q!?Z zC<;}ZJnP~7-+t$s#fRUjS#=i=JOR2JkKX49$GYI5vkN5tTZHY=6~ea3X}fOXSo==cB%FSuV25uw zArYc-tUP`%LUtZLJ)aqE5f0~4N8#<-gpKx2vH04r&zxH{jl@a+ete!o+s)_pgop6i z3492Y6UorC>=URDZ<8_+whN!F!0_5ClMCNJz^4-!QBmb^cI8w$Tsbd|cR0NcJKlg? zfT_Q)NB?D$DMR^1eD*-*QBZNa!x_y%Q+DL)4%fE4N=I>5{!&Lt$Wgqs&*Ad+I|?h_ zayTmvJM8jrU1jK_i_oRN2U8+nYk)OktX`Lhfvp3Uhw+-HuqmJ+U?sqCR)PmOhbsV^ zE@XEA*J+HAA8yx^sdkkB-VK>IA+sM;EOj_{pzVd*a=i{$SKd;GC%4)*9^N_%z3|mx zzkPxZv5z3T4l(f~j6Lc6bxwt&@TuGihbx*_Hpc5H>l$0>SeH9FXS|~f zkk?TR5r|euTU8TWP^Y+h6JtM(>X&_A?WoP2fLfhP9Cq)7O28CzyAg8%qdoA4_W=K1 zihmzPk9Cx+w9$Z5I6vb~RJeR#L!O`0U%SC5xJH%p@3gK{JdRXlx>iyCy%xQ5P~!40 zFDKiG-=*GH_*t+=WpI`!(*adZ2l$khs|MijATkvyTA(Nsr6zRsyCHf+rqrnP>3N+J zPDN$9TG7{(gN(;ihW+kQa{MmC^?yy_EQdcr$o8Q+3E@me@@2{%4s|5Ye&W?p@(xvw z$K$keW&Zz+@b9y&221FL2g##ot)lA`ZCA8Y(VdF!QgpYXJ&Fz}dQ8zFMTPotm(%D%rx#Fi~t@^x47>h6SSq?vi=#(isN++s)E19t(Lu zOycyLO8$Mi2^P8d$G<;M$MeKv8SUh!;yqn{jNmv=$Hxku59zpFaGa#$;{?Z7IzC=- z9H-+G1jlbsao_hgk5ID z$mik|rZXcxMHJtdNxmTUyHay$uDB}oyV7+0YLRK437^D&-(&cnQHKDGx))iEDdHE@5L3!#heR^jKO}VvMvU)K;5IRG zd_Pk1yst?XUth`0$aou%j>4)jA|5Vc!^sgN=aCEYSD@c^seP39y%LFYZNs!83w{r9 z*RWVB_YmrJiIH*Gk;Tr|Eco-lov>q#+n1%hQw*pLQ=WC9_U|hkT_MvhgNPqxu|JdW zoRRbWohO>Xw<-IzYF(PG@M(}IJJxt} zD?2V#xjdhQ&fF~iEC){gP~tKSb3fE(k@qY6$J7I)JTr!Bi^AL0gRDGb1s+y-t!fuP z7jFSRIcMa2c~asd{_#42L>qZ0A_hI02-B9yOo)e?&eT5%W@yyTfA1mCY+Qt2! z06s0vvccRV$0b8Shrgx0InXf+S94|=_h;O7qAnQf2!-)y@;WhmfyWnW@zv24o(^1i zsc-XbXl`5YZ}!!P+JYTEe|VFq!#$klKqye}E-P73npuc0%{2LN2gkqJ7ibLyH{+^| zza`+S54W^zhKh;vK{Avk+Sbs}+~%)m*L=REE4<6Ad{uW;(jB67d0gVDTziLidG(U? z5_#na5H7g+s&r*RuUsK~%WCed@YeY5T)K2+)hgdAZ$(WNS(R6N>N>*Gb9u4H<1t_8 zQCEY~!QXWEiS96&q;dJjbT0`7#L=xGU!dL}@(a3<4dZjl(N5L5zDN8R&Hye!8H3c&)Z8rZUYW#i<8CqTZ_$k> zQz9UaPVdAG}Fx-T}YMmM)=tLbjq zFh=*ij9!#C%!K*_CXWJ55!{MP#VM~xIGwPy%X~V6zHdVLm z2RX}q4S|iQ(|mU=-C5!8*xZ7uK|?`752!|wKATYDfF+H8W> zhnsLiwW(e~-iqp3A^!#;#T)${jlx~OxfN9yG!!&S9ti|Hn%Y{^7$3@lfo4A`s9bw< zNVw&Yxsi2mXah@`jzFDohXR|x(&7T?HaT(Jfkw5QG}gl!bA}CLT`@E`kE;AFO)zGt zW7vfoYg!A|sSFF}?+4MF03K!V%xENj4`s^dC#qP=a-Qf-_|VY->+}06Qy0q2iO(q* zqxS-{{u=BPl$aJ7Z0ei$RR*e&G3)dDFH;u_Dr(ig2RQ!hO^*jM@wpIFuAk3^tp59< zMQ1{+&+pSr={b=S`AAvT=kuE;VAN#R=l5==eC`AhnUpzZ#ly(anGWmo`#IB@N|OC& zIi_1sPG>~S^Lsqg-AbSQCqLPL1&b&$)TgY^@B2)9lpxp7`rQ6!m41bi<8uL~d~Qbi zq+`|p9x&d^KvOT6hw&?ToD${O0u(as2*}WEhJ_ljS&zUT5?Z5p>pT}Rgl%%4aie>A^ zGqnt{6!a*ACF+HpUMmS=e|Y_&V)k7ss;~BPBjd0H9M{XTG~F}lZ&_$aTx(IVu;PCK Dr8Xqg literal 0 HcmV?d00001 diff --git a/C-optimized/samples-one-thread.c b/C-optimized/samples-one-thread.c new file mode 100644 index 00000000..2ae84321 --- /dev/null +++ b/C-optimized/samples-one-thread.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include + +const float PI = 3.14159265358979323846; + +#define N 1000000 + +//Array helpers + +void array_print(float* array, int length) +{ + for (int i = 0; i < length; i++) { + printf("item[%d] = %f\n", i, array[i]); + } + printf("\n"); +} + +void array_fill(float* array, int length, float item) +{ + int i; + { + for (i = 0; i < length; i++) { + array[i] = item; + } + } +} + +float array_sum(float* array, int length) +{ + float output = 0.0; + for (int i = 0; i < length; i++) { + output += array[i]; + } + return output; +} + +void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) +{ + array_cumsummed[0] = array_to_sum[0]; + for (int i = 1; i < length; i++) { + array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; + } +} + +float rand_float(float to) +{ + return ((float)rand() / (float)RAND_MAX) * to; +} + +float ur_normal() +{ + float u1 = rand_float(1.0); + float u2 = rand_float(1.0); + float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); + return z; +} + +inline float random_uniform(float from, float to) +{ + return ((float)rand() / (float)RAND_MAX) * (to - from) + from; +} + +inline float random_normal(float mean, float sigma) +{ + return (mean + sigma * ur_normal()); +} + +inline float random_lognormal(float logmean, float logsigma) +{ + return expf(random_normal(logmean, logsigma)); +} + +inline float random_to(float low, float high) +{ + const float NORMAL95CONFIDENCE = 1.6448536269514722; + float loglow = logf(low); + float loghigh = logf(high); + float logmean = (loglow + loghigh) / 2; + float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); + return random_lognormal(logmean, logsigma); +} + +void array_random_to(float* array, int length, float low, float high) +{ + int i; + #pragma omp private(i) + { + #pragma omp for + for (i = 0; i < length; i++) { + array[i] = random_to(low, high); + } + } +} + +void mixture(float (*samplers[])(void), float* weights, int n_dists, float* results, int results_length) +{ + float sum_weights = array_sum(weights, n_dists); + float* normalized_weights = malloc(n_dists * sizeof(float)); + for (int i = 0; i < n_dists; i++) { + normalized_weights[i] = weights[i] / sum_weights; + } + + float* cummulative_weights = malloc(n_dists * sizeof(float)); + array_cumsum(normalized_weights, cummulative_weights, n_dists); + + //create var holders + float p1; + int sample_index, i, own_length; + + { + for (int i = 0; i < results_length; i++) { + p1 = random_uniform(0, 1); + for (int j = 0; j < n_dists; j++) { + if (p1 < cummulative_weights[j]) { + results[i] = samplers[j](); + break; + } + } + } + } + free(normalized_weights); + free(cummulative_weights); +} + +float sample_0() +{ + return 0; +} + +float sample_1() +{ + return 1; +} + +float sample_few() +{ + return random_to(1, 3); +} + +float sample_many() +{ + return random_to(2, 10); +} + +int main() +{ + //initialize randomness + srand(1); + + // clock_t start, end; + // start = clock(); + + // Toy example + // Declare variables in play + float p_a, p_b, p_c; + // printf("Max threads: %d\n", n_threads); + // omp_set_num_threads(n_threads); + + // Initialize variables + p_a = 0.8; + p_b = 0.5; + p_c = p_a * p_b; + + // Generate mixture + int n_dists = 4; + float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; + float (*samplers[])(void) = { sample_0, sample_1, sample_few, sample_many }; + + float* results = malloc(N * sizeof(float)); + mixture(samplers, weights, n_dists, results, N); + printf("Sum(dist_mixture, N)/N = %f\n", array_sum(results, N) / N); + // array_print(dist_mixture[0], N); + + // end = clock(); + // printf("Time (ms): %f\n", ((double)(end - start)) / (CLOCKS_PER_SEC * 10) * 1000); + // ^ Will only measure how long it takes the inner main to run, not the whole program, + // including e.g., loading the program into memory or smth. + // Also CLOCKS_PER_SEC in POSIX is a constant equal to 1000000. + // See: https://stackoverflow.com/questions/10455905/why-is-clocks-per-sec-not-the-actual-number-of-clocks-per-second + return 0; +}