158 lines
3.9 KiB
C
158 lines
3.9 KiB
C
#include <gsl/gsl_randist.h>
|
|
#include <gsl/gsl_rng.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
#define N 1000000
|
|
/*
|
|
* For very high values of N, you will want to increase the maximum stack trace, otherwise you will suffer a segmentation fault
|
|
* In Ubuntu/bash you can do this with $ ulimit -Ss 256000 ## ~256Mbs
|
|
* And confirm it with $ ulimit -a
|
|
*/
|
|
|
|
/* Helpers */
|
|
void print(double* ys)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
printf("%f\n", ys[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
void fill(double* ys, float f)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
ys[i] = f;
|
|
}
|
|
}
|
|
|
|
double sum(double* ps, int n)
|
|
{
|
|
double result = 0;
|
|
for (int i = 0; i < n; i++) {
|
|
result += ps[i];
|
|
}
|
|
return (result);
|
|
}
|
|
|
|
void cumsum(double* ps, double* rs, int n)
|
|
{
|
|
double counter = 0;
|
|
for (int i = 0; i < n; i++) {
|
|
counter += ps[i];
|
|
rs[i] = counter;
|
|
}
|
|
}
|
|
|
|
/* Distributions*/
|
|
void normal(gsl_rng* r, double* ys, double mean, double std)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
ys[i] = mean + gsl_ran_gaussian(r, std);
|
|
}
|
|
}
|
|
|
|
void lognormal(gsl_rng* r, double* ys, double zeta, double sigma)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
ys[i] = gsl_ran_lognormal(r, zeta, sigma);
|
|
}
|
|
}
|
|
|
|
void to(gsl_rng* r, double* ys, double low, double high)
|
|
{
|
|
double normal95confidencePoint = 1.6448536269514722;
|
|
double log_low = log(low);
|
|
double log_high = log(high);
|
|
double zeta = (log_low + log_high) / 2;
|
|
double sigma = (log_high - log_low) / (2.0 * normal95confidencePoint);
|
|
lognormal(r, ys, zeta, sigma);
|
|
}
|
|
|
|
/* Mixture of distributions */
|
|
void mixture(gsl_rng* r, double* dists[], double* weights, int n, double* results)
|
|
{
|
|
/* Get cummulative, normalized weights */
|
|
double sum_weights = sum(weights, n);
|
|
double normalized_weights[n];
|
|
for (int i = 0; i < n; i++) {
|
|
normalized_weights[i] = weights[i] / sum_weights;
|
|
}
|
|
double cummulative_weights[n];
|
|
cumsum(normalized_weights, cummulative_weights, n);
|
|
|
|
/* Get N samples, drawn from the different distributions in proportion to their weights. */
|
|
for (int i = 0; i < N; i++) {
|
|
double p_1 = gsl_rng_uniform(r);
|
|
double p_2 = gsl_rng_uniform(r);
|
|
|
|
int index_found = 0;
|
|
int index_counter = 0;
|
|
while ((index_found == 0) && (index_counter < n)) {
|
|
if (p_1 < cummulative_weights[index_counter]) {
|
|
index_found = 1;
|
|
} else {
|
|
index_counter++;
|
|
}
|
|
}
|
|
if (index_found == 0) {
|
|
printf("\nThis shouldn't be able to happen");
|
|
// gsl_rng_free (r);
|
|
// abort(); // this shouldn't have happened.
|
|
|
|
} else {
|
|
int sample_index = (int)floor(p_2 * N);
|
|
results[i] = dists[index_counter][sample_index];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Main */
|
|
int main(void)
|
|
{
|
|
// Start clock
|
|
clock_t start, end;
|
|
start = clock();
|
|
|
|
/* Initialize GNU Statistical Library (GSL) stuff */
|
|
const gsl_rng_type* T;
|
|
gsl_rng* r;
|
|
// gsl_rng_env_setup();
|
|
T = gsl_rng_default;
|
|
r = gsl_rng_alloc(T);
|
|
|
|
/* Toy example */
|
|
/* Declare variables in play */
|
|
double p_a, p_b, p_c;
|
|
double dist_none[N], dist_one[N], dist_few[N], dist_many[N], dist_mixture[N];
|
|
|
|
/* Initialize variables */
|
|
p_a = 0.8;
|
|
p_b = 0.5;
|
|
p_c = p_a * p_b;
|
|
|
|
fill(dist_none, 0);
|
|
fill(dist_one, 1);
|
|
to(r, dist_few, 1, 3);
|
|
to(r, dist_many, 2, 10);
|
|
|
|
/* Generate mixture */
|
|
int n = 4;
|
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
|
double* dists[] = { dist_none, dist_one, dist_few, dist_many };
|
|
|
|
mixture(r, dists, weights, n, dist_mixture);
|
|
printf("%f\n", sum(dist_mixture, N) / N);
|
|
|
|
/* Clean up GSL */
|
|
gsl_rng_free(r);
|
|
|
|
// End clock
|
|
end = clock();
|
|
printf("Total time (ms): %f\n", ((double)(end - start)) / CLOCKS_PER_SEC * 1000);
|
|
/* Return success*/
|
|
return EXIT_SUCCESS;
|
|
}
|