time-to-botec/C/C-01-simple/samples.c
2023-06-02 16:00:49 -06:00

158 lines
3.9 KiB
C

#include <gsl/gsl_randist.h>
#include <gsl/gsl_rng.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define N 1000000
/*
* For very high values of N, you will want to increase the maximum stack trace, otherwise you will suffer a segmentation fault
* In Ubuntu/bash you can do this with $ ulimit -Ss 256000 ## ~256Mbs
* And confirm it with $ ulimit -a
*/
/* Helpers */
void print(double* ys)
{
for (int i = 0; i < N; i++) {
printf("%f\n", ys[i]);
}
printf("\n");
}
void fill(double* ys, float f)
{
for (int i = 0; i < N; i++) {
ys[i] = f;
}
}
double sum(double* ps, int n)
{
double result = 0;
for (int i = 0; i < n; i++) {
result += ps[i];
}
return (result);
}
void cumsum(double* ps, double* rs, int n)
{
double counter = 0;
for (int i = 0; i < n; i++) {
counter += ps[i];
rs[i] = counter;
}
}
/* Distributions*/
void normal(gsl_rng* r, double* ys, double mean, double std)
{
for (int i = 0; i < N; i++) {
ys[i] = mean + gsl_ran_gaussian(r, std);
}
}
void lognormal(gsl_rng* r, double* ys, double zeta, double sigma)
{
for (int i = 0; i < N; i++) {
ys[i] = gsl_ran_lognormal(r, zeta, sigma);
}
}
void to(gsl_rng* r, double* ys, double low, double high)
{
double normal95confidencePoint = 1.6448536269514722;
double log_low = log(low);
double log_high = log(high);
double zeta = (log_low + log_high) / 2;
double sigma = (log_high - log_low) / (2.0 * normal95confidencePoint);
lognormal(r, ys, zeta, sigma);
}
/* Mixture of distributions */
void mixture(gsl_rng* r, double* dists[], double* weights, int n, double* results)
{
/* Get cummulative, normalized weights */
double sum_weights = sum(weights, n);
double normalized_weights[n];
for (int i = 0; i < n; i++) {
normalized_weights[i] = weights[i] / sum_weights;
}
double cummulative_weights[n];
cumsum(normalized_weights, cummulative_weights, n);
/* Get N samples, drawn from the different distributions in proportion to their weights. */
for (int i = 0; i < N; i++) {
double p_1 = gsl_rng_uniform(r);
double p_2 = gsl_rng_uniform(r);
int index_found = 0;
int index_counter = 0;
while ((index_found == 0) && (index_counter < n)) {
if (p_1 < cummulative_weights[index_counter]) {
index_found = 1;
} else {
index_counter++;
}
}
if (index_found == 0) {
printf("\nThis shouldn't be able to happen");
// gsl_rng_free (r);
// abort(); // this shouldn't have happened.
} else {
int sample_index = (int)floor(p_2 * N);
results[i] = dists[index_counter][sample_index];
}
}
}
/* Main */
int main(void)
{
// Start clock
clock_t start, end;
start = clock();
/* Initialize GNU Statistical Library (GSL) stuff */
const gsl_rng_type* T;
gsl_rng* r;
// gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc(T);
/* Toy example */
/* Declare variables in play */
double p_a, p_b, p_c;
double dist_none[N], dist_one[N], dist_few[N], dist_many[N], dist_mixture[N];
/* Initialize variables */
p_a = 0.8;
p_b = 0.5;
p_c = p_a * p_b;
fill(dist_none, 0);
fill(dist_one, 1);
to(r, dist_few, 1, 3);
to(r, dist_many, 2, 10);
/* Generate mixture */
int n = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double* dists[] = { dist_none, dist_one, dist_few, dist_many };
mixture(r, dists, weights, n, dist_mixture);
printf("%f\n", sum(dist_mixture, N) / N);
/* Clean up GSL */
gsl_rng_free(r);
// End clock
end = clock();
printf("Total time (ms): %f\n", ((double)(end - start)) / CLOCKS_PER_SEC * 1000);
/* Return success*/
return EXIT_SUCCESS;
}