2023-11-29 23:21:24 +00:00
# include "squiggle.h"
2023-11-18 20:25:12 +00:00
# include <float.h>
2023-11-29 23:17:41 +00:00
# include <limits.h>
2023-11-29 23:21:24 +00:00
# include <math.h>
2023-11-18 23:54:31 +00:00
# include <omp.h>
2023-11-18 20:25:12 +00:00
# include <stdint.h>
# include <stdio.h>
# include <stdlib.h>
2023-12-14 13:02:27 +00:00
# include <string.h> // memcpy
2023-11-18 21:00:02 +00:00
2024-01-12 18:02:41 +00:00
/* Cache optimizations */
2024-01-11 23:11:01 +00:00
# define CACHE_LINE_SIZE 64
2024-01-12 18:02:41 +00:00
// getconf LEVEL1_DCACHE_LINESIZE
// <https://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size>
2024-01-11 23:23:01 +00:00
typedef struct seed_cache_box_t {
2024-01-12 15:52:56 +00:00
uint64_t seed ;
2024-01-12 18:02:41 +00:00
char padding [ CACHE_LINE_SIZE - sizeof ( uint64_t ) ] ;
// Cache line size is 64 *bytes*, uint64_t is 64 *bits* (8 bytes). Different units!
2024-01-11 23:23:01 +00:00
} seed_cache_box ;
2024-01-11 23:33:46 +00:00
// This avoids "false sharing", i.e., different threads competing for the same cache line
2024-01-12 18:02:41 +00:00
// Dealing with this shaves 4ms from a 12ms process, or a third of runtime
// <http://www.nic.uoregon.edu/~khuck/ts/acumem-report/manual_html/ch06s07.html>
2024-01-11 23:11:01 +00:00
2024-01-12 18:02:41 +00:00
/* Parallel sampler */
2023-11-29 23:21:24 +00:00
void sampler_parallel ( double ( * sampler ) ( uint64_t * seed ) , double * results , int n_threads , int n_samples )
{
2023-11-29 23:32:18 +00:00
2023-12-03 18:25:35 +00:00
// Terms of the division:
// a = b * quotient + reminder
// a = b * (a/b) + (a%b)
2023-11-29 23:32:18 +00:00
// dividend: a
// divisor: b
2023-12-03 18:25:35 +00:00
// quotient = a/b
// reminder = a%b
// "divisor's multiple" := b*(a/b)
2023-11-29 23:32:18 +00:00
// now, we have n_samples and n_threads
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
// and we'll compute the remainder of samples separately
// to possibly do by Jorge: improve so that the remainder is included in the threads
int quotient = n_samples / n_threads ;
int divisor_multiple = quotient * n_threads ;
2024-01-11 23:11:01 +00:00
// uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*));
2024-01-12 22:55:09 +00:00
seed_cache_box * cache_box = ( seed_cache_box * ) malloc ( sizeof ( seed_cache_box ) * ( size_t ) n_threads ) ;
2024-01-12 18:08:12 +00:00
// seed_cache_box cache_box[n_threads]; // we could use the C stack. On normal linux machines, it's 8MB ($ ulimit -s). However, it doesn't quite feel right.
2023-11-29 23:32:18 +00:00
srand ( 1 ) ;
2023-12-09 17:31:32 +00:00
for ( int i = 0 ; i < n_threads ; i + + ) {
2023-11-29 23:32:18 +00:00
// Constraints:
// - xorshift can't start with 0
// - the seeds should be reasonably separated and not correlated
2024-01-12 15:52:56 +00:00
cache_box [ i ] . seed = ( uint64_t ) rand ( ) * ( UINT64_MAX / RAND_MAX ) ;
2023-11-29 23:32:18 +00:00
// Other initializations tried:
// *seeds[i] = 1 + i;
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
2023-11-29 23:21:24 +00:00
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
int i ;
2024-01-12 23:50:27 +00:00
# pragma omp parallel private(i)
2023-11-29 23:21:24 +00:00
{
2024-01-13 00:05:44 +00:00
# pragma omp for
2023-11-29 23:21:24 +00:00
for ( i = 0 ; i < n_threads ; i + + ) {
2024-01-28 16:48:51 +00:00
// It's possible I don't need the for, and could instead call omp
// in some different way and get the thread number with omp_get_thread_num()
2023-11-29 23:32:18 +00:00
int lower_bound_inclusive = i * quotient ;
int upper_bound_not_inclusive = ( ( i + 1 ) * quotient ) ; // note the < in the for loop below,
2024-01-12 23:50:27 +00:00
2023-11-29 23:32:18 +00:00
for ( int j = lower_bound_inclusive ; j < upper_bound_not_inclusive ; j + + ) {
2024-01-12 15:52:56 +00:00
results [ j ] = sampler ( & ( cache_box [ i ] . seed ) ) ;
2024-01-13 11:47:14 +00:00
/*
t starts at 0 and ends at T
at t = 0 ,
thread i accesses : results [ i * quotient + 0 ] ,
thread i + 1 acccesses : results [ ( i + 1 ) * quotient + 0 ]
at t = T
thread i accesses : results [ ( i + 1 ) * quotient - 1 ]
thread i + 1 acccesses : results [ ( i + 2 ) * quotient - 1 ]
The results [ j ] that are directly adjacent are
results [ ( i + 1 ) * quotient - 1 ] ( accessed by thread i at time T )
results [ ( i + 1 ) * quotient + 0 ] ( accessed by thread i + 1 at time 0 )
and these are themselves adjacent to
results [ ( i + 1 ) * quotient - 2 ] ( accessed by thread i at time T - 1 )
results [ ( i + 1 ) * quotient + 1 ] ( accessed by thread i + 1 at time 2 )
If T is large enough , which it is , two threads won ' t access the same
cache line at the same time .
Pictorially :
at t = 0 . . . . i . . . . . . . . . I . . . . . . . . .
at t = T . . . . . . . . . . . . . i . . . . . . . . . I
and the two never overlap
Note that results [ j ] is a double , a double has 8 bytes ( 64 bits )
8 doubles fill a cache line of 64 bytes .
So we specifically won ' t get problems as long as n_samples / n_threads > 8
n_threads is normally 16 , so n_samples > 128
Note also that this is only a problem in terms of speed , if n_samples < 128
the results are still computed , it ' ll just be slower
*/
2024-01-12 22:55:09 +00:00
}
2023-11-29 23:21:24 +00:00
}
}
2023-11-29 23:32:18 +00:00
for ( int j = divisor_multiple ; j < n_samples ; j + + ) {
2024-01-12 15:52:56 +00:00
results [ j ] = sampler ( & ( cache_box [ 0 ] . seed ) ) ;
2024-01-13 11:47:56 +00:00
// we can just reuse a seed,
2024-01-13 11:47:14 +00:00
// this isn't problematic because we;ve now stopped doing multithreading
2023-11-29 23:32:18 +00:00
}
2024-01-12 18:02:41 +00:00
2024-01-11 23:23:01 +00:00
free ( cache_box ) ;
2023-11-29 23:21:24 +00:00
}
2023-11-18 20:25:12 +00:00
2023-11-19 15:50:21 +00:00
/* Get confidence intervals, given a sampler */
2023-11-29 23:21:24 +00:00
2023-11-18 21:00:02 +00:00
typedef struct ci_t {
2023-11-29 23:21:24 +00:00
double low ;
double high ;
2023-11-18 21:00:02 +00:00
} ci ;
2023-11-29 23:21:24 +00:00
2024-01-12 23:50:27 +00:00
inline static void swp ( int i , int j , double xs [ ] )
2023-11-29 23:10:22 +00:00
{
2023-11-29 23:21:24 +00:00
double tmp = xs [ i ] ;
xs [ i ] = xs [ j ] ;
xs [ j ] = tmp ;
2023-11-29 22:51:58 +00:00
}
2023-11-29 23:21:24 +00:00
static int partition ( int low , int high , double xs [ ] , int length )
2023-11-29 23:10:22 +00:00
{
2023-12-09 19:00:43 +00:00
if ( low > high | | high > = length ) {
2023-12-09 17:59:41 +00:00
printf ( " Invariant violated for function partition in %s (%d) " , __FILE__ , __LINE__ ) ;
exit ( 1 ) ;
}
2023-12-03 18:25:35 +00:00
// Note: the scratchpad/ folder in commit 578bfa27 has printfs sprinkled throughout
2023-12-09 19:00:43 +00:00
int pivot = low + ( int ) floor ( ( high - low ) / 2 ) ;
2023-11-29 23:21:24 +00:00
double pivot_value = xs [ pivot ] ;
swp ( pivot , high , xs ) ;
int gt = low ; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
for ( int i = low ; i < high ; i + + ) {
if ( xs [ i ] < pivot_value ) {
swp ( gt , i , xs ) ;
gt + + ;
}
2023-11-29 22:51:58 +00:00
}
2023-11-29 23:21:24 +00:00
swp ( high , gt , xs ) ;
return gt ;
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
static double quickselect ( int k , double xs [ ] , int n )
{
// https://en.wikipedia.org/wiki/Quickselect
2023-12-14 13:02:27 +00:00
2024-01-12 22:55:09 +00:00
double * ys = malloc ( ( size_t ) n * sizeof ( double ) ) ;
2023-12-14 13:02:27 +00:00
memcpy ( ys , xs , ( size_t ) n * sizeof ( double ) ) ;
2024-01-11 23:11:01 +00:00
// ^: don't rearrange item order in the original array
2023-12-14 13:02:27 +00:00
2023-11-29 23:21:24 +00:00
int low = 0 ;
int high = n - 1 ;
for ( ; ; ) {
if ( low = = high ) {
2023-12-14 13:02:27 +00:00
double result = ys [ low ] ;
free ( ys ) ;
return result ;
2023-11-29 23:21:24 +00:00
}
2023-12-14 13:02:27 +00:00
int pivot = partition ( low , high , ys , n ) ;
2023-11-29 23:21:24 +00:00
if ( pivot = = k ) {
2023-12-14 13:02:27 +00:00
double result = ys [ pivot ] ;
free ( ys ) ;
return result ;
2023-11-29 23:21:24 +00:00
} else if ( k < pivot ) {
high = pivot - 1 ;
} else {
low = pivot + 1 ;
}
}
}
ci array_get_ci ( ci interval , double * xs , int n )
{
2023-12-09 19:00:43 +00:00
int low_k = ( int ) floor ( interval . low * n ) ;
int high_k = ( int ) ceil ( interval . high * n ) ;
2023-11-29 23:17:41 +00:00
ci result = {
2023-11-29 23:21:24 +00:00
. low = quickselect ( low_k , xs , n ) ,
. high = quickselect ( high_k , xs , n ) ,
} ;
return result ;
}
ci array_get_90_ci ( double xs [ ] , int n )
{
return array_get_ci ( ( ci ) { . low = 0.05 , . high = 0.95 } , xs , n ) ;
}
2024-01-30 12:27:54 +00:00
double array_get_median ( double xs [ ] , int n ) {
int median_k = ( int ) floor ( 0.5 * n ) ;
2024-01-30 12:28:17 +00:00
return quickselect ( median_k , xs , n ) ;
2024-01-30 12:27:54 +00:00
}
void array_print_stats ( double xs [ ] , int n ) {
ci ci_90 = array_get_ci ( ( ci ) { . low = 0.05 , . high = 0.95 } , xs , n ) ;
ci ci_80 = array_get_ci ( ( ci ) { . low = 0.1 , . high = 0.9 } , xs , n ) ;
ci ci_50 = array_get_ci ( ( ci ) { . low = 0.25 , . high = 0.75 } , xs , n ) ;
double median = array_get_median ( xs , n ) ;
double mean = array_mean ( xs , n ) ;
double std = array_std ( xs , n ) ;
printf ( " Mean: %lf \n "
" Std: %lf \n "
" 5%%: %lf \n "
" 10%%: %lf \n "
" 25%%: %lf \n "
" 50%%: %lf \n "
" 75%%: %lf \n "
" 90%%: %lf \n "
" 95%%: %lf \n " ,
mean , std , ci_90 . low , ci_80 . low , ci_50 . low , median , ci_50 . high , ci_80 . high , ci_90 . high ) ;
}
2024-01-30 15:23:51 +00:00
2024-01-30 15:24:56 +00:00
void array_print_histogram ( double * xs , int n_samples , int n_bins ) {
2024-01-30 15:23:51 +00:00
// Generated with the help of an llm; there might be subtle off-by-one errors
// interface inspired by <https://github.com/red-data-tools/YouPlot>
if ( n_bins < = 0 ) {
fprintf ( stderr , " Number of bins must be a positive integer. \n " ) ;
return ;
} else if ( n_samples < = 0 ) {
fprintf ( stderr , " Number of samples must be a positive integer. \n " ) ;
return ;
}
2024-01-30 15:27:17 +00:00
int * bins = ( int * ) calloc ( ( size_t ) n_bins , sizeof ( int ) ) ;
2024-01-30 15:23:51 +00:00
if ( bins = = NULL ) {
fprintf ( stderr , " Memory allocation for bins failed. \n " ) ;
return ;
}
double min_value = xs [ 0 ] , max_value = xs [ 0 ] ;
// Find the minimum and maximum values from the samples
for ( int i = 0 ; i < n_samples ; i + + ) {
if ( xs [ i ] < min_value ) {
min_value = xs [ i ] ;
}
if ( xs [ i ] > max_value ) {
max_value = xs [ i ] ;
}
}
// Avoid division by zero for a single unique value
if ( min_value = = max_value ) {
max_value + + ;
}
// Calculate bin width
double range = max_value - min_value ;
double bin_width = range / n_bins ;
// Fill the bins with sample counts
for ( int i = 0 ; i < n_samples ; i + + ) {
int bin_index = ( int ) ( ( xs [ i ] - min_value ) / bin_width ) ;
if ( bin_index = = n_bins ) {
bin_index - - ; // Last bin includes max_value
}
bins [ bin_index ] + + ;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0 ;
for ( int i = 0 ; i < n_bins ; i + + ) {
if ( bins [ i ] > max_bin_count ) {
max_bin_count = bins [ i ] ;
}
}
const int MAX_WIDTH = 50 ; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? ( double ) MAX_WIDTH / max_bin_count : 1.0 ;
// Print the histogram
for ( int i = 0 ; i < n_bins ; i + + ) {
double bin_start = min_value + i * bin_width ;
double bin_end = bin_start + bin_width ;
2024-01-30 17:27:00 +00:00
if ( bin_width < 0.01 ) {
printf ( " [%4.3f, %4.3f): " , bin_start , bin_end ) ;
} else if ( bin_width < 0.1 ) {
printf ( " [%4.2f, %4.2f): " , bin_start , bin_end ) ;
} else if ( bin_width < 1 ) {
printf ( " [%4.1f, %4.1f): " , bin_start , bin_end ) ;
} else if ( bin_width < 10 ) {
printf ( " [%4.0f, %4.0f): " , bin_start , bin_end ) ;
} else {
printf ( " [%4f, %4f): " , bin_start , bin_end ) ;
}
// number of decimals could depend on the number of bins
// or on the size of the smallest bucket
2024-01-30 15:23:51 +00:00
int marks = ( int ) ( bins [ i ] * scale ) ;
for ( int j = 0 ; j < marks ; j + + ) {
printf ( " █ " ) ;
}
printf ( " %d \n " , bins [ i ] ) ;
}
// Free the allocated memory for bins
free ( bins ) ;
}
2024-01-30 12:27:54 +00:00
// Replicate some of the above functions over samplers
// However, in the future I'll delete this
// There should be a clear boundary between working with samplers and working with an array of samples
2023-11-29 23:21:24 +00:00
ci sampler_get_ci ( ci interval , double ( * sampler ) ( uint64_t * ) , int n , uint64_t * seed )
{
2023-12-09 17:59:41 +00:00
UNUSED ( seed ) ; // don't want to use it right now, but want to preserve ability to do so (e.g., remove parallelism from internals). Also nicer for consistency.
2023-12-09 19:00:43 +00:00
double * xs = malloc ( ( size_t ) n * sizeof ( double ) ) ;
2023-11-29 23:28:31 +00:00
sampler_parallel ( sampler , xs , 16 , n ) ;
2023-11-29 23:21:24 +00:00
ci result = array_get_ci ( interval , xs , n ) ;
free ( xs ) ;
return result ;
}
ci sampler_get_90_ci ( double ( * sampler ) ( uint64_t * ) , int n , uint64_t * seed )
{
return sampler_get_ci ( ( ci ) { . low = 0.05 , . high = 0.95 } , sampler , n , seed ) ;
}
/* Algebra manipulations */
# define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean ;
double std ;
} normal_params ;
normal_params algebra_sum_normals ( normal_params a , normal_params b )
{
normal_params result = {
. mean = a . mean + b . mean ,
. std = sqrt ( ( a . std * a . std ) + ( b . std * b . std ) ) ,
2023-11-29 22:24:42 +00:00
} ;
2023-11-29 23:21:24 +00:00
return result ;
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
typedef struct lognormal_params_t {
double logmean ;
double logstd ;
} lognormal_params ;
lognormal_params algebra_product_lognormals ( lognormal_params a , lognormal_params b )
{
lognormal_params result = {
. logmean = a . logmean + b . logmean ,
. logstd = sqrt ( ( a . logstd * a . logstd ) + ( b . logstd * b . logstd ) ) ,
} ;
return result ;
}
lognormal_params convert_ci_to_lognormal_params ( ci x )
{
2023-12-09 18:49:20 +00:00
double loghigh = log ( x . high ) ;
double loglow = log ( x . low ) ;
2023-11-29 23:21:24 +00:00
double logmean = ( loghigh + loglow ) / 2.0 ;
double logstd = ( loghigh - loglow ) / ( 2.0 * NORMAL90CONFIDENCE ) ;
lognormal_params result = { . logmean = logmean , . logstd = logstd } ;
return result ;
}
ci convert_lognormal_params_to_ci ( lognormal_params y )
{
double h = y . logstd * NORMAL90CONFIDENCE ;
double loghigh = y . logmean + h ;
double loglow = y . logmean - h ;
ci result = { . low = exp ( loglow ) , . high = exp ( loghigh ) } ;
2023-11-18 21:00:02 +00:00
return result ;
}
2023-11-19 15:50:21 +00:00
/* Scaffolding to handle errors */
2023-11-29 23:21:24 +00:00
// We will sample from an arbitrary cdf
2023-11-19 15:50:21 +00:00
// and that operation might fail
// so we build some scaffolding here
2023-11-29 23:21:24 +00:00
# define MAX_ERROR_LENGTH 500
# define EXIT_ON_ERROR 0
# define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
2023-12-03 18:25:35 +00:00
typedef struct box_t {
2023-11-18 20:25:12 +00:00
int empty ;
double content ;
char * error_msg ;
2023-12-03 18:25:35 +00:00
} box ;
2023-11-18 20:25:12 +00:00
2023-12-03 18:25:35 +00:00
box process_error ( const char * error_msg , int should_exit , char * file , int line )
2023-11-18 20:25:12 +00:00
{
if ( should_exit ) {
2023-12-09 17:59:41 +00:00
printf ( " %s, @, in %s (%d) " , error_msg , file , line ) ;
2023-11-18 20:25:12 +00:00
exit ( 1 ) ;
} else {
char error_msg [ MAX_ERROR_LENGTH ] ;
snprintf ( error_msg , MAX_ERROR_LENGTH , " @, in %s (%d) " , file , line ) ; // NOLINT: We are being carefull here by considering MAX_ERROR_LENGTH explicitly.
2023-12-03 18:25:35 +00:00
box error = { . empty = 1 , . error_msg = error_msg } ;
2023-11-18 20:25:12 +00:00
return error ;
}
}
2023-11-19 15:50:21 +00:00
/* Invert an arbitrary cdf at a point */
// Version #1:
// - input: (cdf: double => double, p)
// - output: Box(number|error)
2023-12-03 18:25:35 +00:00
box inverse_cdf_double ( double cdf ( double ) , double p )
2023-11-18 20:25:12 +00:00
{
// given a cdf: [-Inf, Inf] => [0,1]
// returns a box with either
// x such that cdf(x) = p
// or an error
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
double low = - 1.0 ;
double high = 1.0 ;
// 1. Make sure that cdf(low) < p < cdf(high)
int interval_found = 0 ;
2023-12-09 18:18:07 +00:00
while ( ( ! interval_found ) & & ( low > - DBL_MAX / 4 ) & & ( high < DBL_MAX / 4 ) ) {
// for floats, use FLT_MAX instead
// Note that this approach is overkill
2023-11-18 20:25:12 +00:00
// but it's also the *correct* thing to do.
int low_condition = ( cdf ( low ) < p ) ;
int high_condition = ( p < cdf ( high ) ) ;
if ( low_condition & & high_condition ) {
interval_found = 1 ;
} else if ( ! low_condition ) {
low = low * 2 ;
} else if ( ! high_condition ) {
high = high * 2 ;
}
}
if ( ! interval_found ) {
return PROCESS_ERROR ( " Interval containing the target value not found, in function inverse_cdf " ) ;
} else {
int convergence_condition = 0 ;
int count = 0 ;
while ( ! convergence_condition & & ( count < ( INT_MAX / 2 ) ) ) {
double mid = ( high + low ) / 2 ;
int mid_not_new = ( mid = = low ) | | ( mid = = high ) ;
// double width = high - low;
// if ((width < 1e-8) || mid_not_new){
if ( mid_not_new ) {
convergence_condition = 1 ;
} else {
double mid_sign = cdf ( mid ) - p ;
if ( mid_sign < 0 ) {
low = mid ;
} else if ( mid_sign > 0 ) {
high = mid ;
} else if ( mid_sign = = 0 ) {
low = mid ;
high = mid ;
}
}
}
if ( convergence_condition ) {
2023-12-03 18:25:35 +00:00
box result = { . empty = 0 , . content = low } ;
2023-11-18 20:25:12 +00:00
return result ;
} else {
return PROCESS_ERROR ( " Search process did not converge, in function inverse_cdf " ) ;
}
}
}
2023-11-29 23:21:24 +00:00
// Version #2:
2023-11-19 15:50:21 +00:00
// - input: (cdf: double => Box(number|error), p)
// - output: Box(number|error)
2023-12-03 18:25:35 +00:00
box inverse_cdf_box ( box cdf_box ( double ) , double p )
2023-11-18 20:25:12 +00:00
{
// given a cdf: [-Inf, Inf] => Box([0,1])
// returns a box with either
// x such that cdf(x) = p
// or an error
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
double low = - 1.0 ;
double high = 1.0 ;
// 1. Make sure that cdf(low) < p < cdf(high)
int interval_found = 0 ;
2023-12-09 18:18:07 +00:00
while ( ( ! interval_found ) & & ( low > - DBL_MAX / 4 ) & & ( high < DBL_MAX / 4 ) ) {
// for floats, use FLT_MAX instead
// Note that this approach is overkill
2023-11-18 20:25:12 +00:00
// but it's also the *correct* thing to do.
2023-12-03 18:25:35 +00:00
box cdf_low = cdf_box ( low ) ;
2023-11-18 20:25:12 +00:00
if ( cdf_low . empty ) {
return PROCESS_ERROR ( cdf_low . error_msg ) ;
}
2023-12-03 18:25:35 +00:00
box cdf_high = cdf_box ( high ) ;
2023-11-18 20:25:12 +00:00
if ( cdf_high . empty ) {
return PROCESS_ERROR ( cdf_low . error_msg ) ;
}
int low_condition = ( cdf_low . content < p ) ;
int high_condition = ( p < cdf_high . content ) ;
if ( low_condition & & high_condition ) {
interval_found = 1 ;
} else if ( ! low_condition ) {
low = low * 2 ;
} else if ( ! high_condition ) {
high = high * 2 ;
}
}
if ( ! interval_found ) {
return PROCESS_ERROR ( " Interval containing the target value not found, in function inverse_cdf " ) ;
} else {
int convergence_condition = 0 ;
int count = 0 ;
while ( ! convergence_condition & & ( count < ( INT_MAX / 2 ) ) ) {
double mid = ( high + low ) / 2 ;
int mid_not_new = ( mid = = low ) | | ( mid = = high ) ;
// double width = high - low;
if ( mid_not_new ) {
// if ((width < 1e-8) || mid_not_new){
convergence_condition = 1 ;
} else {
2023-12-03 18:25:35 +00:00
box cdf_mid = cdf_box ( mid ) ;
2023-11-18 20:25:12 +00:00
if ( cdf_mid . empty ) {
return PROCESS_ERROR ( cdf_mid . error_msg ) ;
}
double mid_sign = cdf_mid . content - p ;
if ( mid_sign < 0 ) {
low = mid ;
} else if ( mid_sign > 0 ) {
high = mid ;
} else if ( mid_sign = = 0 ) {
low = mid ;
high = mid ;
}
}
}
if ( convergence_condition ) {
2023-12-03 18:25:35 +00:00
box result = { . empty = 0 , . content = low } ;
2023-11-18 20:25:12 +00:00
return result ;
} else {
return PROCESS_ERROR ( " Search process did not converge, in function inverse_cdf " ) ;
}
}
}
2023-11-19 15:50:21 +00:00
/* Sample from an arbitrary cdf */
// Before: invert an arbitrary cdf at a point
// Now: from an arbitrary cdf, get a sample
2023-12-03 18:25:35 +00:00
box sampler_cdf_box ( box cdf ( double ) , uint64_t * seed )
2023-11-18 20:25:12 +00:00
{
double p = sample_unit_uniform ( seed ) ;
2023-12-03 18:25:35 +00:00
box result = inverse_cdf_box ( cdf , p ) ;
2023-11-18 20:25:12 +00:00
return result ;
}
2023-12-03 18:25:35 +00:00
box sampler_cdf_double ( double cdf ( double ) , uint64_t * seed )
2023-11-18 20:25:12 +00:00
{
double p = sample_unit_uniform ( seed ) ;
2023-12-03 18:25:35 +00:00
box result = inverse_cdf_double ( cdf , p ) ;
2023-11-18 20:25:12 +00:00
return result ;
}
2023-12-03 18:25:35 +00:00
double sampler_cdf_danger ( box cdf ( double ) , uint64_t * seed )
2023-11-18 20:25:12 +00:00
{
double p = sample_unit_uniform ( seed ) ;
2023-12-03 18:25:35 +00:00
box result = inverse_cdf_box ( cdf , p ) ;
2023-11-29 23:21:24 +00:00
if ( result . empty ) {
exit ( 1 ) ;
} else {
return result . content ;
}
2023-11-29 23:17:41 +00:00
}
2023-11-29 23:21:24 +00:00
/* array print: potentially useful for debugging */
void array_print ( double xs [ ] , int n )
2023-11-29 23:17:41 +00:00
{
2023-11-29 23:21:24 +00:00
printf ( " [ " ) ;
for ( int i = 0 ; i < n - 1 ; i + + ) {
printf ( " %f, " , xs [ i ] ) ;
2023-11-18 23:54:31 +00:00
}
2023-11-29 23:21:24 +00:00
printf ( " %f " , xs [ n - 1 ] ) ;
printf ( " ] \n " ) ;
2023-11-18 23:54:31 +00:00
}