move squiggle.c to its own repo, fix readme typo.

This commit is contained in:
NunoSempere 2023-08-17 14:19:32 +02:00
parent 3b29ad7e45
commit 4e079e9015
12 changed files with 26 additions and 334 deletions

View File

@ -1,50 +0,0 @@
#include "../../squiggle.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
// Estimate functions
float sample_0(uint32_t* seed)
{
return 0;
}
float sample_1(uint32_t* seed)
{
return 1;
}
float sample_few(uint32_t* seed)
{
return random_to(1, 3, seed);
}
float sample_many(uint32_t* seed)
{
return random_to(2, 10, seed);
}
int main(){
// set randomness seed
uint32_t* seed = malloc(sizeof(uint32_t));
*seed = 1000; // xorshift can't start with 0
float p_a = 0.8;
float p_b = 0.5;
float p_c = p_a * p_b;
int n_dists = 4;
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
float result_one = mixture(samplers, weights, n_dists, seed);
printf("result_one: %f\n", result_one);
}
/*
Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum
- Average
- Std
*/

View File

@ -1,53 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record $(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,59 +0,0 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "../../squiggle.h"
// Estimate functions
float sample_0(uint32_t* seed)
{
return 0;
}
float sample_1(uint32_t* seed)
{
return 1;
}
float sample_few(uint32_t* seed)
{
return random_to(1, 3, seed);
}
float sample_many(uint32_t* seed)
{
return random_to(2, 10, seed);
}
int main(){
// set randomness seed
uint32_t* seed = malloc(sizeof(uint32_t));
*seed = 1000; // xorshift can't start with 0
float p_a = 0.8;
float p_b = 0.5;
float p_c = p_a * p_b;
int n_dists = 4;
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
int n_samples = 1000000;
float* result_many = (float *) malloc(n_samples * sizeof(float));
for(int i=0; i<n_samples; i++){
result_many[i] = mixture(samplers, weights, n_dists, seed);
}
printf("result_many: [");
for(int i=0; i<100; i++){
printf("%.2f, ", result_many[i]);
}
printf("]\n");
}
/*
Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum
- Average
- Std
*/

View File

@ -1,53 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record $(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,109 +0,0 @@
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
const float PI = 3.14159265358979323846;
// Pseudo Random number generator
uint32_t xorshift32
(uint32_t* seed)
{
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
// https://en.wikipedia.org/wiki/Xorshift
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
uint32_t x = *seed;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return *seed = x;
}
// Distribution & sampling functions
float rand_0_to_1(uint32_t* seed){
return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
}
float rand_float(float max, uint32_t* seed)
{
return rand_0_to_1(seed) * max;
}
float ur_normal(uint32_t* seed)
{
float u1 = rand_0_to_1(seed);
float u2 = rand_0_to_1(seed);
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
return z;
}
float random_uniform(float from, float to, uint32_t* seed)
{
return rand_0_to_1(seed) * (to - from) + from;
}
float random_normal(float mean, float sigma, uint32_t* seed)
{
return (mean + sigma * ur_normal(seed));
}
float random_lognormal(float logmean, float logsigma, uint32_t* seed)
{
return expf(random_normal(logmean, logsigma, seed));
}
float random_to(float low, float high, uint32_t* seed)
{
const float NORMAL95CONFIDENCE = 1.6448536269514722;
float loglow = logf(low);
float loghigh = logf(high);
float logmean = (loglow + loghigh) / 2;
float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
return random_lognormal(logmean, logsigma, seed);
}
// Array helpers
float array_sum(float* array, int length)
{
float output = 0.0;
for (int i = 0; i < length; i++) {
output += array[i];
}
return output;
}
void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
{
array_cumsummed[0] = array_to_sum[0];
for (int i = 1; i < length; i++) {
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
}
}
// Mixture function
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
{
// You can see a simpler version of this function in the git history
// or in C-02-better-algorithm-one-thread/
float sum_weights = array_sum(weights, n_dists);
float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float));
cumsummed_normalized_weights[0] = weights[0]/sum_weights;
for (int i = 1; i < n_dists; i++) {
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
}
float p = random_uniform(0, 1, seed);
float result;
for (int k = 0; k < n_dists; k++) {
if (p < cumsummed_normalized_weights[k]) {
result = samplers[k](seed);
break;
}
}
free(cumsummed_normalized_weights);
return result;
}

View File

@ -1,9 +0,0 @@
- [ ] Add example for only one sample
- [ ] Add example for many samples
- [ ] Use gcc extension to define functions nested inside main.
- [ ] Use OpenMP for acceleration
- [ ] Chain various mixture functions
- [ ] Have some more complicated & realistic example
- [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?)
- [ ] Add beta distribution

View File

@ -16,7 +16,7 @@ result = mixture(dists, weights) # should be 1M samples
mean(result) mean(result)
``` ```
I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. I don't particularly care about the speed of this particular example, but rather think that the speed in this simple example would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations.
The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation". The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".

6
python/beta/beta.py Normal file
View File

@ -0,0 +1,6 @@
import numpy as np
for i in range(1000* 1000):
x=np.random.beta(1,2)

View File

@ -0,0 +1,9 @@
import numpy as np
n = 1000 * 1000
def sample_beta_1_2():
return np.random.beta(1,2)
a = np.array([sample_beta_1_2() for _ in range(n)])
print(np.mean(a))

View File

@ -0,0 +1,10 @@
import squigglepy as sq
import numpy as np
a = sq.to(1, 3)
b = a / 2
c = b / a
c_samples = sq.sample(c, 10)
print(c_samples)