From 2acf129ef2ee2e065afd2c5d23936c67da2b18f6 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sun, 16 Jul 2023 13:02:11 +0200 Subject: [PATCH] add comparison with previous sampler. New approach turns out to be ~50x slower. Though it is much more general. --- scratchpad/makefile | 2 +- scratchpad/scratchpad | Bin 17144 -> 17448 bytes scratchpad/scratchpad.c | 47 ++++++++++++++++++++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/scratchpad/makefile b/scratchpad/makefile index cc2fe53..cf6d6b5 100644 --- a/scratchpad/makefile +++ b/scratchpad/makefile @@ -10,7 +10,7 @@ CC=gcc # required for nested functions # Main file SRC=scratchpad.c -OUTPUT=scratchpad +OUTPUT=./scratchpad ## Dependencies MATH=-lm diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad index 79ca1c2495f43680c09afbcd5b86d52c251a03dd..1516d8fc829a7f59f5ae97e2f8b23ee4122eadb8 100755 GIT binary patch literal 17448 zcmeHP3vgW3c|Ouwwrs536|sVWfG;jC2I7@$jIjaNlI*pwK*o--z);|Ny$@@LeTdz= zMkYMO$V%7>1v7oblc61vS83{*xRarAW^h%A`NZ!D;)WY7q*V#JZ&n$Q zRCIX3l>Z{LZW0>E$vqYFcm>d zlkDm!t9UQ1n{VkcWw~9RX32$z?pMJub=pze>{w>m?O5xkhgG|j6lFVN$*x=3bt^lj zg7p>U_M|ZAZ?$SqFV!HUq{&|)ymmX3otJJ<`b?<|O1Hmf#{MhiZB=$+vE|20_bHeu zm-iLeQM>%MiMx55Dz92UJg3@`DfKs{=2UXms+G;D*s@eIQ|MjRyL#2KRVzdJY-oj) zQ(ROAjj2tyby2sJ2vf}{`(lCON&d6m#jE{&&zt@Tz+x{*JjPL)hRrHrbZp;j#NxU5 z?quGK=eBQNpUP(9+atSDabXy{)7gxXHzPUIFf7$+6;s*WBL6_nOo(_cA@a!#6%e%w zi1#K<(Ni$B%KCC}UG+NBeqXMQEj+TM@c`D!+y*NF&sTf3xIoof^SD;53%`dZ+=v3p|b67W^$p zCL3tR`l-SSmn7$s_6hKmNWTV@$9eJTKf&;?6TeWHrC)37^Zd&5TAvyOJJtNjc&oze zTj2J8+sqIF%f@x|K@N2?vf2x6?@n z?zhpI%uEZ+w7^UY%(TEv3(T~@|3(X(30(fE-haAYAFTVQW+C)}l380hsrSEJe@qr% zx&9Ww-`((fk8_~^e>DwX|`rv=OS=o>aq3#akXqc%?qm+{fRvUystjE_ER z^R!SIAAQ>9X@N36`nb*0!eo5(0h^}<$@pl*=4nATKKd1#r-j(~XdCm0zClL6LCE;Q z4}_>TQ~VqB; z6>6vtHN1$5>qBdY(O#9N3t@70KiDbleJzozKhuk2`q2;9=|?Agdd;i)8=sjA;NTVIpuRGaknP6y)c<{JNh}K2cIo|V zzfD=a_?|gmA6)x=Fy+0URVw8eI^)&4KLuZNFKq4d$$s=f2r1JAyEB2t@MI$J>r$XO zJh^8!*p1C#iLx)}Wdeg>tIKKRGv zG7-p>Dt`wLL*zKnd?N78QsA#k#erF9&cOjPcWDNiWdy8M53T+qq$|v({v*T+bG}s| zUC^g)R?@;3QHF|c2O_p2J$0F~DJ&o{m3Agzi7DTILZA$@2KXe3cu$6{e2hxd$4KVFz;ty-jruLU~H(9;MSbut4D? z4}>KAP$MZi0CTYepvpe9&;u0a9F}ceCo5GPSO{7 z$Udb&zXT)ex{=vE7jjW{j~Rq0wb$q>a;Z>!r*4)WLQ?CYW(Q-|p3=MyoxIiOvW*MD z$B$4G!{55VH=Y6MKOzO@HDqB8^TvTIp<;C)^@_X8PTg*ggNI2t)NqfS>Y9##xW~a) zKV>)6P@gvr^uf&&G5xyv^71q5%2&YBak&;BDeHsb@}MY&CkMlmM@Jf#TbXjB%8b+@ zBXznai$P1-HEGoA#YZN9Phgamzk~V}OPFBX1yH{S0bsnO@YM9@pt z1ruzz6Vo}3H%B!6r(59cO}$t+35O@i;UCHleza5!pC0Tw4fIXS056fG_#YVa#qgM3 z?4shw^uewPmoE*zWbH`%R@={6D3=LZZ9@|@<((~G|L@i`_X^s3JYY?2wXBSRfoI6WLPRMlXiE7M`$j@z>~9fc+wHf z!ZJz(mYzuHvaQH^0?P^c_`Te0+0;qhp|w9j@a45uBxv!jHxbFNpo~a*Wh5t6B*a;f z43RGsFUs%y8%MH6Me++3NyLhTj69KK>`1-`YibjzJG3@Wk^BfhXp<2{Qbr_c8A-Tr zFg$`V7NhI>tQh9nF&K6X&s+Y&@2VJnNyf171n>b?4pviKNv5g$EPIME#0kxvLG6rOL7}819@=$IE|%drBgJD{LSIS-;dkFr=u0 z`TO!)AIoYSg3TcW^PC;b58#=DISlDx4rZ1e%!{yMg{TnLwc+IDB>(Au9} zAw=699mRiW>*^?e+_t@~IN7BSEqej*?VVSjqcwVY%UP6t^qkLJwEQi#e(NlbcNRa~ zQ2bR}Wx?Be|FIf<%`Xe@)B64Q?`^xM?cTQg+Kgj~M95hlt?a2>x$d`D`ZAoEpP3ez zX@UPoEkN&CDCMKMh#BqfiNr!t;yW^UMYSiA(xTaniFZQD%x=x>j%#>9wL5NVkQCxt zCTnVmY#|fVk{K;g$VAO#HiP`0crG6|qOru6Nw6fZEnO13n!I8zi$`+NZmlPmjmGnN zEtZT)FDL~@yW`T}xAF8=praK97==tSkn%jK}7o zS~4~*n;>b#)RrW)Wd25wd4k;KA1Fj}@tAZ+d5{LvzSS7~EexpD5RBeZU;WvC?zcw^0>|gq1-3EWl*JkVf)%|le z`CBvo)ouQkHviIgzt-+w*zOOu`|IU<-Lvr113%99l;m3l-m_C=+ESy48+EE$cewWVFV3K_GYtqH9RwJcj+klgY|SG0s$R)&^eZQ;MsCBEjYvA^$kYSXu&{C&C` z_X+;~+>Or?4^PuEK8#`>&u;zMg2%BNpCfp@yYYI#>0CLhh#B zHa^iLcs_DUeBz6O=OH(Kk(ll}@QH=?yy4cztk>#9th`TLB7)Q6JS$Gw(DXA)d{mi2 ztQ^g^wdjYNM9@$OyXLVYL9_!SE0`JC|^kg0LRL*-H0h`*X?+93^`%I)*`-z)Xc zZ_fk3r;EQ@y}l>?ED%r90}@Y~&d-mePSbh)^)m1paejZjuJpbAJ|gk+$Ilt*=luT7 zVQPsb2Vz8c69^YEK3*Sg(~_#SNd-${UN3Q zb`|~4l>TdKe9C8Bu<@fkseQcH+YSF*iT^k0NLi0gD=tHTMh3zQ!|@7AouLkh1^?bfF71Hj3T zcbt4z>36FIihOnko4)`~^*XFs4!B+irM`0NAV-v+r&PI}O7=<>{eL9?XlHLc?*P{v zemQ#%_+n-p^kUp!sb3n@cn-5Hnltle0dEkaV(RRQVWtfnDapj?s7Wkq>`rBO;W2s4 z%;xe&q|hs(S$ZyxNA983EvuGKuS6$Ml13z#i|jSAf&$bMd9qZi^dDaFHPF@*dU3t(qY!k+&&Mob2 zoyL}p8*dM9H@3I6cZSKUJS7v&7i8RY{HC>)&&SZIn9n=jLx*xa+VNP#j9BM#+!Dv@ z6BOz`$1|n5(mtd!g;5l_NQT~Cm|46J!C4`PaE+MeeZqNXe;fwz=6xlC&PLSMQ6U89 zKC@)R@>!!B<>Ne)_tlKiu?1GKWX34u<1y<5le;KK`%-7Cvr2BAUJS49WWu~|1q$Dl z&#OS>nWe2AbQ;R7uPQ9)V3b>87&v~#Z{{r8 z%{d&UisyPn2rr}Jq1~B62yef7;yH7#1KL$c;;dmZrl7X=j%8+Kw~*@Hk$krZ#r9@U zghkDqC83vDc;)6|49IfvRD=vvt|w)RkZjcuvZ38su$0NiqatL+d%@DR2<@yKm7#dI zy860f@W!0w!@3MD3(ljcNID5;mN~jcgfIo9aar>`^53L1&EwXDHG`EFs{H^{-tQ{0 z)a5+U-@za4G1;E?A51mKoQe09jM2G2r~M99KGQ2%QBj`f8Se1d^L~b@#)^u1?e7GR z*JoLdpZ7gXxqP*qwfFt(Hi8W8;n|+|M@$b;<04TVDa-Ze{dN)|P?8(M!|128ebV~PY0vvwrVXs9sJH!I zQTE*bLQ{%NZGnYz|0yl;Ix~i_MkLUlVfE$LcKT2%J{rH!_ zsQ$M7I?}>70cyd0z+&xRwr4sU#!h?Ds#IGn%H^;Fwqt%ij0xlNdA%G|_FjD|%b6Ng z`AXduvHi6+VrAIgNj+UG66~S>kq9Y3`DkvpcRr>b!0wTBdLZDCt#GB@eycgauRfkSOWRRwl_I z6sE#u@}yTnRmI!s#%oMDrre(G&uz8~8{Mw_XKK|WSB7d$y`j~)biazr?<>lB#FE~y z(i>KKOa;p;%JHO_(63SXW2bJkQE|)vLfG{-DLp%FQSwZw4=VSc*F*o4{x&GRF}E}$ z?DT*M3#Q!P??O)vt&;d`!oz%@>TkAj=vQ%MO5;tXt~at{)rz{_P;GA{n%q;nr*T#7 zsukWu%)4B+lV8*a<vp)?i|Eq4uRy8{~~a= zDU$QQA6!29ZL{DH&w{T4?#9nvb_2M{4ib;?>7(ITB?9f8_XI-Wcz9@V^y8~T44~y`gh%Wkqy}hw6 z*z4L840K1L!QRNDWS(@C`MzKzD%N$hH?IvW_uk=Mk*zQHu7txhk~F#wP>)d!h9n7@mDtSr6P3hCPRXmv14i3S? zDa}>&3PArz#@(&`>L>oeDhJqZEjsQWTv>+daXOA;30^o^V^@im-8YfGw6 zO#8K4)6?36CBH|;FOsXN5&y`9`(Qfx9USqSWTGSWZ)PAVns940(`a^*JVI&JkbY>Z zZ;Nm1DEuF&P=3ItcTsOhXUgdiNHcRS`94^61o(xwQ29DlPE)=$yx*QWz0IFG=O6gx zz*WVyLR=aGdkZT5!IGPZ+<95ALaP7E?WiNwuQA58 zrTfvF7*!PVA5r7r$|)kZT=AzS{UaZ&@sC_~_={fgpT43mfPo(>17*fILW?|Ree!Sr zYHAkA+cx?KR)2%4{?vKB+<$oWAA!ky^oe0)LU8ewl0ooATcMkyKd~E)${(_)c}`Ja zwYrhr<&=zP%PFv1M@Wy_De1JjQ@bnBSn(1XA-QX=<{2679)d0g>=ZH5Pm(uJkP91PLFnXdpUW~ADv%roJtjkA%p&wte3R1rH0PB zWvitHS4kLD_D5#@VBq6FK%lnPeAhnou<{q+wYp2Y`m@Ja zjbnV7j_y@7Wxn~boWCGHrD2R*rr73D68JL`_^L$!2a5=~LN(PBtvLDQ_+bEc4WQ$G z+3}`d%1D*(9w3L0(RoT9BmXro#Q~wzo-C&9J@^S(po4hFJdV>7?*Wn53ry0f-yn}X z&{#5G8g^BbN(vv#O?w`5pl~=C8ka$%-&;fJ4-QyM%>u11qj^S~v}aBlXS6yfawFVW zfO-cF%Ik2k>A?Hi0lJ*Yp>b6W1tAwfuR`VBi2<2&>Usd=Cl_9{oZ@mEad_I356N-dNdHOsEmQy8#M)f_8$Lc< ziVMcMiiQ)Zv5`!rB;zFwW2sWiKQ!MQE~T_HWyaLCg*eE_D@v*qF(|nX-qZ0xe(q<> zlXM0Tby9OD{sL3I#d&i+^nImcaP?o+;6nL8d+O)DjqRxqeVxA4C%8?heID@r9k+Z+ zb702!HR3e#sY74d@V1(7J5o~}sSjFG|L!vu{HuRpw8($wFOu&em}TZ=%eT!Juuh5O zFQpdGt0YN$p%fxeh(I9%g$NWP@c%gi^!}+U)E!7hBi*rhU!XqFAljq5!tq4d9qV>4 zOSpr&ds(+TlDG?Go|H{Ik_^VfAt_6`Q78s`CEFU0$Kvit)SU?S_4kJ3?(Sr?OOM2& z90RtE&d7K)rn|d>y%IYK{tR7e`px{LHqv`_CSX~Uk6Qtj)RKJhEb1Yd_U+OdVGLF z?|!KiJ$j!g+T$u(TwYp6Paz1WZyFlWcO40|l+vp|f!h+X0>7aT&?f;-SDUl?ceMGt z%l3;ki|)Ghj+&bYCjBk=9fO=yBoKvg9lxW%n?Sjc%v1OsLw`#YNZ2rbF9W+xVWj`p z_)$!j6Xtii!p<^rJF0XKU541Kp=0woT?54}&U(iKMf04lR%e-ie)03DwK>b$u-sVz zkx%fezGN6brr5MPU5AQ&&gucjI%kvPi%_a=ce;GevgY}HsJF>_%Y5=_1>}z+);FL| zg)l#`zXK*?rLZl)&H}rGgisps>j!q}eZ!ap6)-wj-0rM?%Heam56$s8Jp(1=@^k3j zv)1W`JIzj4v$M=MzXjrd4xLl57e<|Q|A2I#cCaiA2(5Op^C zob|BT>~uG?A?YjS)FkZSDMXT==ysNh zopzeAC|6Y8fTE6G2>&( zG4?yHS+;jOQ2NJ_5ZvE}vN`W4=W zTTJ}Fx9vdIR_6(f1J4}z`*f=n2flOV@6U7bIpV8%?UZEmGFN`C;C#%*O9kg`E?y>h zJah4D1m{66K2LDI<>K?Rd6|pjEX^}64xFv-ytq^J=fyR#KQHbQ>AZNQ;CUd|AAGUx zwjyTTAwHM=9^cyPz|)0IH(C*vugcCV)>enOPVjt^i+^6^%V&qE&Yu6d@|eAvtcY26 zh(*Gc7w6e+Mu+CjIpU%*gP1jZz@ zcwdtuRacGUKLalkSC8X)CC}Fx?tz}Si9C5V9|w-v;i_@J0=$^6YPNj75%{$Lo74qO zO~0Z+;@sN|Z3gb1VY9XGOZ@7*+6LSu7G9NCVc^tn|8=Gb`AiGd+Xyed`uuuC$!}K| zoI2I>K8a`D$X1?H@@bVnx2YC-CPn_(^Z!NQ?yUAq74SfZdb} zN$z{VDQ=T0&dXUQ(G?HsT|NE5khe=j%qQysJT>=r#i9wlKOXD%){CyVp3sx{(xywy zY-9=OeSt38%96l7lTa+Mvp2Q_kJ&?dES?AilY2y0jGnRMxw*HoepN$$A=;}F2?XQu z;NCzus>k<=?s%{-90(=*`u0M_!UZ6y=ZFRZZTI=^X$`dA+d`XXas@&n(DLBDzI)o& z=C;VaHh{3BDbVUy7W^&uiNLy!_07JH!1}hf`&&B$oxbLdRm%#Gc7Vo0qlF?fuapjd3QPGZePl^V0)7zyotSidTfIRwOO-^pOL+D09&(LU$xL}2rkbTJ*NM_)oYp?9 z&-c?z-DtBGUhgnQ`|Ygyo7DX^Q+gj|Eqr}otQ{4rKHr}+b+e$NcKxpa$JaM8_Mg|6 zOu2ux%**aqWwSw4Xl=>*e810hQVEie6lXquby*M*V06f=&+7uFN6}_2WW!n>0Yhs; z*5~yDQ(m{S|18J!G3e7;lX+feFx{X!;~2C5?86``6jRpc^$63n66F3_pO60;rQfXN zcsARI4Q$BuW)oxz=sZF2POHA3F+4PUv z^!FN9O+cte(A2Kat!^(iYe_Ail8ca<)*>xoE)3(bq&-5a6HH$v4 zH=nHqh{AFvH^VdjDHeSdYj51z;4vtiF1FHLPxQx1-HkSfA-! zC|mVKlaj7CDfh!RSdaN~C=D%S0FKek({VVZo6YKLjgd14jN^L_dMVpjC ww+bAsHz*|R56?gJT#kL0it69?FPW7WZN|A@maRl3pZ>zTOo>G{1sf~=3-x3AR{#J2 diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c index c6bf9e2..58e5d41 100644 --- a/scratchpad/scratchpad.c +++ b/scratchpad/scratchpad.c @@ -4,6 +4,7 @@ #include // FLT_MAX, FLT_MIN #include #include // erf, sqrt +#include #define EXIT_ON_ERROR 0 @@ -41,9 +42,9 @@ float cdf_squared_0_1(float x) float cdf_normal_0_1(float x) { - float mean = 0; - float std = 1; - return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h + // float mean = 0; + // float std = 1; + return 0.5 * (1 + erf((x - 0) / (1 * sqrt(2)))); // erf from math.h } // Inverse cdf @@ -94,13 +95,9 @@ struct box inverse_cdf(float cdf(float), float p) while (!convergence_condition && (count < (INT_MAX / 2))) { float mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); - if (0) { - printf("while loop\n"); - printf("low: %f, high: %f\n", low, high); - printf("mid: %f\n", mid); - } - + // float width = high - low; if (mid_not_new) { + // if ((width < 1e-8) || mid_not_new){ convergence_condition = 1; } else { float mid_sign = cdf(mid) - p; @@ -166,6 +163,16 @@ struct box sampler(float cdf(float), uint32_t* seed) return result; } +// For comparison, raw sampler +const float PI = 3.14159265358979323846; +float sampler_normal_0_1(uint32_t* seed) +{ + float u1 = rand_0_to_1(seed); + float u2 = rand_0_to_1(seed); + float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); + return z; +} + // to do: add beta. // for the cdf, use this incomplete beta function implementation, based on continuous fractions: // @@ -208,16 +215,32 @@ int main() // set randomness seed uint32_t* seed = malloc(sizeof(uint32_t)); *seed = 1000; // xorshift can't start with 0 + int n = 1000000; printf("\n\nGetting some samples from %s:\n", name_3); - int n = 100; - for (int i = 0; i < n; i++) { + clock_t begin = clock(); + for (int i = 0; i < n; i++) { struct box sample = sampler(cdf_normal_0_1, seed); if (sample.empty) { printf("Error in sampler function"); } else { - printf("%f\n", sample.content); + // printf("%f\n", sample.content); } } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("Time spent: %f", time_spent); + + // Get some normal samples using the previous method. + clock_t begin_2 = clock(); + printf("\n\nGetting some samples from sampler_normal_0_1\n"); + for (int i = 0; i < n; i++) { + float normal_sample = sampler_normal_0_1(seed); + // printf("%f\n", normal_sample); + } + clock_t end_2 = clock(); + double time_spent_2 = (double)(end_2 - begin_2) / CLOCKS_PER_SEC; + printf("Time spent: %f", time_spent_2); + return 0; }