forked from personal/squiggle.c
		
	add many more comments, start adding to header file.
This commit is contained in:
		
							parent
							
								
									79dfcf79db
								
							
						
					
					
						commit
						56ab018469
					
				| 
						 | 
					@ -344,7 +344,9 @@ It emits one warning about something I already took care of, so by default I've
 | 
				
			||||||
- [x] Add a few functions for doing simple algebra on normals, and lognormals
 | 
					- [x] Add a few functions for doing simple algebra on normals, and lognormals
 | 
				
			||||||
  - [x] Add prototypes
 | 
					  - [x] Add prototypes
 | 
				
			||||||
  - [x] Use named structs
 | 
					  - [x] Use named structs
 | 
				
			||||||
 | 
					  - [x] Add to header file
 | 
				
			||||||
  - [ ] Test results
 | 
					  - [ ] Test results
 | 
				
			||||||
  - [ ] Provide example
 | 
					  - [ ] Provide example
 | 
				
			||||||
  - [ ] Add to headers
 | 
					  - [ ] Add conversion between 90%ci and parameters.
 | 
				
			||||||
  - [ ] Move to own file
 | 
					  - [ ] Move to own file? Or signpost in file?
 | 
				
			||||||
 | 
					- [ ] Disambiguate sample_laplace--successes vs failures || successes vs total trials as two distinct and differently named functions
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										85
									
								
								squiggle.c
									
									
									
									
									
								
							
							
						
						
									
										85
									
								
								squiggle.c
									
									
									
									
									
								
							| 
						 | 
					@ -7,15 +7,22 @@
 | 
				
			||||||
#include <sys/types.h>
 | 
					#include <sys/types.h>
 | 
				
			||||||
#include <time.h>
 | 
					#include <time.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Some error niceties; these won't be used until later
 | 
				
			||||||
#define MAX_ERROR_LENGTH 500
 | 
					#define MAX_ERROR_LENGTH 500
 | 
				
			||||||
#define EXIT_ON_ERROR 0
 | 
					#define EXIT_ON_ERROR 0
 | 
				
			||||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
 | 
					#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const double PI = 3.14159265358979323846; // M_PI in gcc gnu99
 | 
					const double PI = 3.14159265358979323846; // M_PI in gcc gnu99
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// # Key functionality
 | 
				
			||||||
 | 
					// Define the minimum number of functions needed to do simple estimation
 | 
				
			||||||
 | 
					// Starts here, ends until the end of the mixture function
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Pseudo Random number generator
 | 
					// Pseudo Random number generator
 | 
				
			||||||
uint64_t xorshift32(uint32_t* seed)
 | 
					uint64_t xorshift32(uint32_t* seed)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    // The reader isn't expected to understand this code immediately;
 | 
				
			||||||
 | 
					    // read the linked Wikipedia page!
 | 
				
			||||||
    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
 | 
					    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
 | 
				
			||||||
    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift64-works>
 | 
					    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift64-works>
 | 
				
			||||||
    // https://en.wikipedia.org/wiki/Xorshift
 | 
					    // https://en.wikipedia.org/wiki/Xorshift
 | 
				
			||||||
| 
						 | 
					@ -71,17 +78,40 @@ double sample_lognormal(double logmean, double logstd, uint64_t* seed)
 | 
				
			||||||
    return exp(sample_normal(logmean, logstd, seed));
 | 
					    return exp(sample_normal(logmean, logstd, seed));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline double sample_normal_from_95_confidence_interval(double low, double high, uint64_t* seed){
 | 
				
			||||||
 | 
					    // Explanation of key idea:
 | 
				
			||||||
 | 
					    // 1. We know that the 90% confidence interval of the unit normal is 
 | 
				
			||||||
 | 
					    // [-1.6448536269514722, 1.6448536269514722]
 | 
				
			||||||
 | 
					    // see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
 | 
				
			||||||
 | 
					    // 2. So if we take a unit normal and multiply it by
 | 
				
			||||||
 | 
					    // L / 1.6448536269514722, its new 90% confidence interval will be 
 | 
				
			||||||
 | 
					    // [-L, L], i.e., length 2 * L
 | 
				
			||||||
 | 
					    // 3. Instead, if we want to get a confidence interval of length L, 
 | 
				
			||||||
 | 
					    // we should multiply the unit normal by 
 | 
				
			||||||
 | 
					    // L / (2 * 1.6448536269514722)
 | 
				
			||||||
 | 
					    // Meaning that its standard deviation should be multiplied by that amount
 | 
				
			||||||
 | 
					    // see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
 | 
				
			||||||
 | 
					    // 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
 | 
				
			||||||
 | 
					    // has a 90% confidence interval of length L
 | 
				
			||||||
 | 
					    // 5. If we want a 90% confidence interval from high to low, 
 | 
				
			||||||
 | 
					    // we can set mean = (high + low)/2; the midpoint, and L = high-low, 
 | 
				
			||||||
 | 
					    // Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
 | 
				
			||||||
 | 
					    const double NORMAL95CONFIDENCE = 1.6448536269514722;
 | 
				
			||||||
 | 
					    double mean = (high + low) / 2.0;
 | 
				
			||||||
 | 
					    double std = (high - low) / (2.0 * NORMAL95CONFIDENCE );
 | 
				
			||||||
 | 
					    return sample_normal(mean, std);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double sample_to(double low, double high, uint64_t* seed)
 | 
					double sample_to(double low, double high, uint64_t* seed)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    // Given a (positive) 90% confidence interval,
 | 
					    // Given a (positive) 90% confidence interval,
 | 
				
			||||||
    // returns a sample from a lognormal
 | 
					    // returns a sample from a lognorma with a matching 90% c.i.
 | 
				
			||||||
    // with a matching 90% c.i.
 | 
					    // Key idea: If we want a lognormal with 90% confidence interval [a, b]
 | 
				
			||||||
    const double NORMAL95CONFIDENCE = 1.6448536269514722;
 | 
					    // we need but get a normal with 90% confidence interval [log(a), log(b)].
 | 
				
			||||||
 | 
					    // Then see code for sample_normal_from_95_confidence_interval
 | 
				
			||||||
    double loglow = logf(low);
 | 
					    double loglow = logf(low);
 | 
				
			||||||
    double loghigh = logf(high);
 | 
					    double loghigh = logf(high);
 | 
				
			||||||
    double logmean = (loglow + loghigh) / 2;
 | 
					    return exp(sample_normal_from_95_confidence_interval(loglow, loghigh));
 | 
				
			||||||
    double logstd = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
 | 
					 | 
				
			||||||
    return sample_lognormal(logmean, logstd, seed);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double sample_gamma(double alpha, uint64_t* seed)
 | 
					double sample_gamma(double alpha, uint64_t* seed)
 | 
				
			||||||
| 
						 | 
					@ -129,13 +159,14 @@ double sample_gamma(double alpha, uint64_t* seed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double sample_beta(double a, double b, uint64_t* seed)
 | 
					double sample_beta(double a, double b, uint64_t* seed)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    // See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions
 | 
				
			||||||
    double gamma_a = sample_gamma(a, seed);
 | 
					    double gamma_a = sample_gamma(a, seed);
 | 
				
			||||||
    double gamma_b = sample_gamma(b, seed);
 | 
					    double gamma_b = sample_gamma(b, seed);
 | 
				
			||||||
    return gamma_a / (gamma_a + gamma_b);
 | 
					    return gamma_a / (gamma_a + gamma_b);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double sample_laplace(double successes, double failures, uint64_t* seed){
 | 
					double sample_laplace(double successes, double failures, uint64_t* seed){
 | 
				
			||||||
	// see <https://wikiless.esmailelbob.xyz/wiki/Beta_distribution?lang=en#Rule_of_succession>
 | 
						// see <https://en.wikipedia.org/wiki/Beta_distribution?lang=en#Rule_of_succession>
 | 
				
			||||||
	return sample_beta(successes + 1, failures + 1, seed);
 | 
						return sample_beta(successes + 1, failures + 1, seed);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -177,8 +208,7 @@ double array_std(double* array, int length)
 | 
				
			||||||
// Mixture function
 | 
					// Mixture function
 | 
				
			||||||
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
 | 
					double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    // You can see a simpler version of this function in the git history
 | 
					    // Sample from samples with frequency proportional to their weights.
 | 
				
			||||||
    // or in C-02-better-algorithm-one-thread/
 | 
					 | 
				
			||||||
    double sum_weights = array_sum(weights, n_dists);
 | 
					    double sum_weights = array_sum(weights, n_dists);
 | 
				
			||||||
    double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double));
 | 
					    double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double));
 | 
				
			||||||
    cumsummed_normalized_weights[0] = weights[0] / sum_weights;
 | 
					    cumsummed_normalized_weights[0] = weights[0] / sum_weights;
 | 
				
			||||||
| 
						 | 
					@ -203,7 +233,11 @@ double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_di
 | 
				
			||||||
    return result;
 | 
					    return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Sample from an arbitrary cdf
 | 
					// # More cool stuff
 | 
				
			||||||
 | 
					// This is no longer necessary to do basic estimation,
 | 
				
			||||||
 | 
					// but is still cool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ## Sample from an arbitrary cdf
 | 
				
			||||||
struct box {
 | 
					struct box {
 | 
				
			||||||
    int empty;
 | 
					    int empty;
 | 
				
			||||||
    double content;
 | 
					    double content;
 | 
				
			||||||
| 
						 | 
					@ -436,45 +470,52 @@ struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* se
 | 
				
			||||||
    return result;
 | 
					    return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// # Small algebra system
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Do algebra over lognormals and normals
 | 
					// Do algebra over lognormals and normals
 | 
				
			||||||
typedef struct {
 | 
					// here I discover named structs, 
 | 
				
			||||||
 | 
					// which mean that I don't have to be typing
 | 
				
			||||||
 | 
					// struct blah all the time.
 | 
				
			||||||
 | 
					typedef struct normal_params_t {
 | 
				
			||||||
    double mean;
 | 
					    double mean;
 | 
				
			||||||
    double std;
 | 
					    double std;
 | 
				
			||||||
} normal_parameters;
 | 
					} normal_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct {
 | 
					typedef struct lognormal_params_t {
 | 
				
			||||||
    double logmean;
 | 
					    double logmean;
 | 
				
			||||||
    double logstd;
 | 
					    double logstd;
 | 
				
			||||||
} lognormal_parameters;
 | 
					} lognormal_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
normal_parameters algebra_sum_normals(normal_parameters a, normal_parameters b)
 | 
					normal_params algebra_sum_normals(normal_params a, normal_params b)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    normal_parameters result = {
 | 
					    normal_params result = {
 | 
				
			||||||
        .mean = a.mean + b.mean,
 | 
					        .mean = a.mean + b.mean,
 | 
				
			||||||
        .std = sqrt((a.std * a.std) + (b.std * b.std)),
 | 
					        .std = sqrt((a.std * a.std) + (b.std * b.std)),
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    return result;
 | 
					    return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
normal_parameters algebra_shift_normal(normal_parameters a, double shift)
 | 
					normal_params algebra_shift_normal(normal_params a, double shift)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    normal_parameters result = {
 | 
					    normal_params result = {
 | 
				
			||||||
        .mean = a.mean + shift,
 | 
					        .mean = a.mean + shift,
 | 
				
			||||||
        .std = a.std,
 | 
					        .std = a.std,
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    return result;
 | 
					    return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
lognormal_parameters algebra_product_lognormals(lognormal_parameters a, lognormal_parameters b)
 | 
					// Also add stretching
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    lognormal_parameters result = {
 | 
					    lognormal_params result = {
 | 
				
			||||||
        .logmean = a.logmean + b.logmean,
 | 
					        .logmean = a.logmean + b.logmean,
 | 
				
			||||||
        .logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
 | 
					        .logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    return result;
 | 
					    return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
lognormal_parameters algebra_scale_lognormal(lognormal_parameters a, double k)
 | 
					lognormal_params algebra_scale_lognormal(lognormal_params a, double k)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    lognormal_parameters result = {
 | 
					    lognormal_params result = {
 | 
				
			||||||
        .logmean = a.logmean + k,
 | 
					        .logmean = a.logmean + k,
 | 
				
			||||||
        .logstd = a.logstd,
 | 
					        .logstd = a.logstd,
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										20
									
								
								squiggle.h
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								squiggle.h
									
									
									
									
									
								
							| 
						 | 
					@ -58,4 +58,24 @@ struct c_i {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
 | 
					struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// small algebra system
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct normal_params_t {
 | 
				
			||||||
 | 
					    double mean;
 | 
				
			||||||
 | 
					    double std;
 | 
				
			||||||
 | 
					} normal_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct lognormal_params_t {
 | 
				
			||||||
 | 
					    double logmean;
 | 
				
			||||||
 | 
					    double logstd;
 | 
				
			||||||
 | 
					} lognormal_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					normal_params algebra_sum_normals(normal_params a, normal_params b);
 | 
				
			||||||
 | 
					normal_params algebra_shift_normal(normal_params a, double shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b);
 | 
				
			||||||
 | 
					lognormal_params algebra_scale_lognormal(lognormal_params a, double k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user