2023-11-29 23:21:24 +00:00
# include "squiggle.h"
2023-11-18 20:25:12 +00:00
# include <float.h>
2023-11-29 23:17:41 +00:00
# include <limits.h>
2023-11-29 23:21:24 +00:00
# include <math.h>
2023-11-18 23:54:31 +00:00
# include <omp.h>
2023-11-18 20:25:12 +00:00
# include <stdint.h>
# include <stdio.h>
# include <stdlib.h>
2023-12-14 13:02:27 +00:00
# include <string.h> // memcpy
2023-11-18 21:00:02 +00:00
2024-01-12 18:02:41 +00:00
/* Cache optimizations */
2024-01-11 23:11:01 +00:00
# define CACHE_LINE_SIZE 64
2024-01-12 18:02:41 +00:00
// getconf LEVEL1_DCACHE_LINESIZE
// <https://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size>
2024-01-11 23:23:01 +00:00
typedef struct seed_cache_box_t {
2024-01-12 15:52:56 +00:00
uint64_t seed ;
2024-01-12 18:02:41 +00:00
char padding [ CACHE_LINE_SIZE - sizeof ( uint64_t ) ] ;
// Cache line size is 64 *bytes*, uint64_t is 64 *bits* (8 bytes). Different units!
2024-01-11 23:23:01 +00:00
} seed_cache_box ;
2024-01-11 23:33:46 +00:00
// This avoids "false sharing", i.e., different threads competing for the same cache line
2024-01-12 18:02:41 +00:00
// Dealing with this shaves 4ms from a 12ms process, or a third of runtime
// <http://www.nic.uoregon.edu/~khuck/ts/acumem-report/manual_html/ch06s07.html>
2024-01-11 23:11:01 +00:00
2024-01-12 18:02:41 +00:00
/* Parallel sampler */
2023-11-29 23:21:24 +00:00
void sampler_parallel ( double ( * sampler ) ( uint64_t * seed ) , double * results , int n_threads , int n_samples )
{
2023-11-29 23:32:18 +00:00
2023-12-03 18:25:35 +00:00
// Terms of the division:
// a = b * quotient + reminder
// a = b * (a/b) + (a%b)
2023-11-29 23:32:18 +00:00
// dividend: a
// divisor: b
2023-12-03 18:25:35 +00:00
// quotient = a/b
// reminder = a%b
// "divisor's multiple" := b*(a/b)
2023-11-29 23:32:18 +00:00
// now, we have n_samples and n_threads
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
// and we'll compute the remainder of samples separately
// to possibly do by Jorge: improve so that the remainder is included in the threads
int quotient = n_samples / n_threads ;
int divisor_multiple = quotient * n_threads ;
2024-01-11 23:11:01 +00:00
// uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*));
2024-01-12 22:55:09 +00:00
seed_cache_box * cache_box = ( seed_cache_box * ) malloc ( sizeof ( seed_cache_box ) * ( size_t ) n_threads ) ;
2024-01-12 18:08:12 +00:00
// seed_cache_box cache_box[n_threads]; // we could use the C stack. On normal linux machines, it's 8MB ($ ulimit -s). However, it doesn't quite feel right.
2023-11-29 23:32:18 +00:00
srand ( 1 ) ;
2023-12-09 17:31:32 +00:00
for ( int i = 0 ; i < n_threads ; i + + ) {
2023-11-29 23:32:18 +00:00
// Constraints:
// - xorshift can't start with 0
// - the seeds should be reasonably separated and not correlated
2024-01-12 15:52:56 +00:00
cache_box [ i ] . seed = ( uint64_t ) rand ( ) * ( UINT64_MAX / RAND_MAX ) ;
2023-11-29 23:32:18 +00:00
// Other initializations tried:
// *seeds[i] = 1 + i;
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
2023-11-29 23:21:24 +00:00
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
int i ;
2024-01-12 23:50:27 +00:00
# pragma omp parallel private(i)
2023-11-29 23:21:24 +00:00
{
2024-01-13 00:05:44 +00:00
# pragma omp for
2023-11-29 23:21:24 +00:00
for ( i = 0 ; i < n_threads ; i + + ) {
2024-01-28 16:48:51 +00:00
// It's possible I don't need the for, and could instead call omp
// in some different way and get the thread number with omp_get_thread_num()
2023-11-29 23:32:18 +00:00
int lower_bound_inclusive = i * quotient ;
int upper_bound_not_inclusive = ( ( i + 1 ) * quotient ) ; // note the < in the for loop below,
2024-01-12 23:50:27 +00:00
2023-11-29 23:32:18 +00:00
for ( int j = lower_bound_inclusive ; j < upper_bound_not_inclusive ; j + + ) {
2024-01-12 15:52:56 +00:00
results [ j ] = sampler ( & ( cache_box [ i ] . seed ) ) ;
2024-01-13 11:47:14 +00:00
/*
t starts at 0 and ends at T
at t = 0 ,
thread i accesses : results [ i * quotient + 0 ] ,
thread i + 1 acccesses : results [ ( i + 1 ) * quotient + 0 ]
at t = T
thread i accesses : results [ ( i + 1 ) * quotient - 1 ]
thread i + 1 acccesses : results [ ( i + 2 ) * quotient - 1 ]
The results [ j ] that are directly adjacent are
results [ ( i + 1 ) * quotient - 1 ] ( accessed by thread i at time T )
results [ ( i + 1 ) * quotient + 0 ] ( accessed by thread i + 1 at time 0 )
and these are themselves adjacent to
results [ ( i + 1 ) * quotient - 2 ] ( accessed by thread i at time T - 1 )
results [ ( i + 1 ) * quotient + 1 ] ( accessed by thread i + 1 at time 2 )
If T is large enough , which it is , two threads won ' t access the same
cache line at the same time .
Pictorially :
at t = 0 . . . . i . . . . . . . . . I . . . . . . . . .
at t = T . . . . . . . . . . . . . i . . . . . . . . . I
and the two never overlap
Note that results [ j ] is a double , a double has 8 bytes ( 64 bits )
8 doubles fill a cache line of 64 bytes .
So we specifically won ' t get problems as long as n_samples / n_threads > 8
n_threads is normally 16 , so n_samples > 128
Note also that this is only a problem in terms of speed , if n_samples < 128
the results are still computed , it ' ll just be slower
*/
2024-01-12 22:55:09 +00:00
}
2023-11-29 23:21:24 +00:00
}
}
2023-11-29 23:32:18 +00:00
for ( int j = divisor_multiple ; j < n_samples ; j + + ) {
2024-01-12 15:52:56 +00:00
results [ j ] = sampler ( & ( cache_box [ 0 ] . seed ) ) ;
2024-01-13 11:47:56 +00:00
// we can just reuse a seed,
2024-01-13 11:47:14 +00:00
// this isn't problematic because we;ve now stopped doing multithreading
2023-11-29 23:32:18 +00:00
}
2024-01-12 18:02:41 +00:00
2024-01-11 23:23:01 +00:00
free ( cache_box ) ;
2023-11-29 23:21:24 +00:00
}
2023-11-18 20:25:12 +00:00
2023-11-19 15:50:21 +00:00
/* Get confidence intervals, given a sampler */
2023-11-29 23:21:24 +00:00
2023-11-18 21:00:02 +00:00
typedef struct ci_t {
2023-11-29 23:21:24 +00:00
double low ;
double high ;
2023-11-18 21:00:02 +00:00
} ci ;
2023-11-29 23:21:24 +00:00
2024-01-12 23:50:27 +00:00
inline static void swp ( int i , int j , double xs [ ] )
2023-11-29 23:10:22 +00:00
{
2023-11-29 23:21:24 +00:00
double tmp = xs [ i ] ;
xs [ i ] = xs [ j ] ;
xs [ j ] = tmp ;
2023-11-29 22:51:58 +00:00
}
2023-11-29 23:21:24 +00:00
static int partition ( int low , int high , double xs [ ] , int length )
2023-11-29 23:10:22 +00:00
{
2023-12-09 19:00:43 +00:00
if ( low > high | | high > = length ) {
2023-12-09 17:59:41 +00:00
printf ( " Invariant violated for function partition in %s (%d) " , __FILE__ , __LINE__ ) ;
exit ( 1 ) ;
}
2023-12-03 18:25:35 +00:00
// Note: the scratchpad/ folder in commit 578bfa27 has printfs sprinkled throughout
2023-12-09 19:00:43 +00:00
int pivot = low + ( int ) floor ( ( high - low ) / 2 ) ;
2023-11-29 23:21:24 +00:00
double pivot_value = xs [ pivot ] ;
swp ( pivot , high , xs ) ;
int gt = low ; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
for ( int i = low ; i < high ; i + + ) {
if ( xs [ i ] < pivot_value ) {
swp ( gt , i , xs ) ;
gt + + ;
}
2023-11-29 22:51:58 +00:00
}
2023-11-29 23:21:24 +00:00
swp ( high , gt , xs ) ;
return gt ;
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
static double quickselect ( int k , double xs [ ] , int n )
{
// https://en.wikipedia.org/wiki/Quickselect
2023-12-14 13:02:27 +00:00
2024-01-12 22:55:09 +00:00
double * ys = malloc ( ( size_t ) n * sizeof ( double ) ) ;
2023-12-14 13:02:27 +00:00
memcpy ( ys , xs , ( size_t ) n * sizeof ( double ) ) ;
2024-01-11 23:11:01 +00:00
// ^: don't rearrange item order in the original array
2023-12-14 13:02:27 +00:00
2023-11-29 23:21:24 +00:00
int low = 0 ;
int high = n - 1 ;
for ( ; ; ) {
if ( low = = high ) {
2023-12-14 13:02:27 +00:00
double result = ys [ low ] ;
free ( ys ) ;
return result ;
2023-11-29 23:21:24 +00:00
}
2023-12-14 13:02:27 +00:00
int pivot = partition ( low , high , ys , n ) ;
2023-11-29 23:21:24 +00:00
if ( pivot = = k ) {
2023-12-14 13:02:27 +00:00
double result = ys [ pivot ] ;
free ( ys ) ;
return result ;
2023-11-29 23:21:24 +00:00
} else if ( k < pivot ) {
high = pivot - 1 ;
} else {
low = pivot + 1 ;
}
}
}
ci array_get_ci ( ci interval , double * xs , int n )
{
2023-12-09 19:00:43 +00:00
int low_k = ( int ) floor ( interval . low * n ) ;
int high_k = ( int ) ceil ( interval . high * n ) ;
2023-11-29 23:17:41 +00:00
ci result = {
2023-11-29 23:21:24 +00:00
. low = quickselect ( low_k , xs , n ) ,
. high = quickselect ( high_k , xs , n ) ,
} ;
return result ;
}
ci array_get_90_ci ( double xs [ ] , int n )
{
return array_get_ci ( ( ci ) { . low = 0.05 , . high = 0.95 } , xs , n ) ;
}
2024-01-30 12:27:54 +00:00
double array_get_median ( double xs [ ] , int n ) {
int median_k = ( int ) floor ( 0.5 * n ) ;
2024-01-30 12:28:17 +00:00
return quickselect ( median_k , xs , n ) ;
2024-01-30 12:27:54 +00:00
}
2024-02-01 19:26:37 +00:00
/* array print: potentially useful for debugging */
void array_print ( double xs [ ] , int n )
{
printf ( " [ " ) ;
for ( int i = 0 ; i < n - 1 ; i + + ) {
printf ( " %f, " , xs [ i ] ) ;
}
printf ( " %f " , xs [ n - 1 ] ) ;
printf ( " ] \n " ) ;
}
2024-01-30 12:27:54 +00:00
void array_print_stats ( double xs [ ] , int n ) {
ci ci_90 = array_get_ci ( ( ci ) { . low = 0.05 , . high = 0.95 } , xs , n ) ;
ci ci_80 = array_get_ci ( ( ci ) { . low = 0.1 , . high = 0.9 } , xs , n ) ;
ci ci_50 = array_get_ci ( ( ci ) { . low = 0.25 , . high = 0.75 } , xs , n ) ;
double median = array_get_median ( xs , n ) ;
double mean = array_mean ( xs , n ) ;
double std = array_std ( xs , n ) ;
2024-02-01 19:54:52 +00:00
printf ( " Avg: %lf \n "
" Std: %lf \n "
" 5%%: %lf \n "
" 10%%: %lf \n "
" 25%%: %lf \n "
" 50%%: %lf \n "
" 75%%: %lf \n "
" 90%%: %lf \n "
" 95%%: %lf \n " ,
2024-01-30 12:27:54 +00:00
mean , std , ci_90 . low , ci_80 . low , ci_50 . low , median , ci_50 . high , ci_80 . high , ci_90 . high ) ;
}
2024-01-30 15:23:51 +00:00
2024-01-30 15:24:56 +00:00
void array_print_histogram ( double * xs , int n_samples , int n_bins ) {
2024-01-31 14:15:56 +00:00
// Interface inspired by <https://github.com/red-data-tools/YouPlot>
2024-01-30 15:23:51 +00:00
// Generated with the help of an llm; there might be subtle off-by-one errors
2024-01-31 08:49:21 +00:00
if ( n_bins < = 1 ) {
fprintf ( stderr , " Number of bins must be greater than 1. \n " ) ;
2024-01-30 15:23:51 +00:00
return ;
2024-01-31 08:49:21 +00:00
} else if ( n_samples < = 1 ) {
fprintf ( stderr , " Number of samples must be higher than 1. \n " ) ;
2024-01-30 15:23:51 +00:00
return ;
}
2024-01-30 15:27:17 +00:00
int * bins = ( int * ) calloc ( ( size_t ) n_bins , sizeof ( int ) ) ;
2024-01-30 15:23:51 +00:00
if ( bins = = NULL ) {
fprintf ( stderr , " Memory allocation for bins failed. \n " ) ;
return ;
}
// Find the minimum and maximum values from the samples
2024-02-02 17:03:56 +00:00
double min_value = xs [ 0 ] , max_value = xs [ 0 ] ;
2024-01-30 15:23:51 +00:00
for ( int i = 0 ; i < n_samples ; i + + ) {
if ( xs [ i ] < min_value ) {
min_value = xs [ i ] ;
}
if ( xs [ i ] > max_value ) {
max_value = xs [ i ] ;
}
}
// Avoid division by zero for a single unique value
if ( min_value = = max_value ) {
max_value + + ;
}
// Calculate bin width
2024-02-02 17:03:56 +00:00
double bin_width = ( max_value - min_value ) / n_bins ;
2024-01-30 15:23:51 +00:00
// Fill the bins with sample counts
for ( int i = 0 ; i < n_samples ; i + + ) {
int bin_index = ( int ) ( ( xs [ i ] - min_value ) / bin_width ) ;
if ( bin_index = = n_bins ) {
bin_index - - ; // Last bin includes max_value
}
bins [ bin_index ] + + ;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0 ;
for ( int i = 0 ; i < n_bins ; i + + ) {
if ( bins [ i ] > max_bin_count ) {
max_bin_count = bins [ i ] ;
}
}
const int MAX_WIDTH = 50 ; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? ( double ) MAX_WIDTH / max_bin_count : 1.0 ;
// Print the histogram
for ( int i = 0 ; i < n_bins ; i + + ) {
double bin_start = min_value + i * bin_width ;
double bin_end = bin_start + bin_width ;
2024-01-31 08:49:21 +00:00
2024-01-31 14:17:55 +00:00
int decimalPlaces = 1 ;
if ( ( 0 < bin_width ) & & ( bin_width < 1 ) ) {
int magnitude = ( int ) floor ( log10 ( bin_width ) ) ;
decimalPlaces = - magnitude ;
decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces ;
2024-01-31 08:49:21 +00:00
}
2024-02-01 19:54:52 +00:00
printf ( " [%*.*f, %*.*f " , 4 + decimalPlaces , decimalPlaces , bin_start , 4 + decimalPlaces , decimalPlaces , bin_end ) ;
2024-01-31 08:49:21 +00:00
char interval_delimiter = ' ) ' ;
if ( i = = ( n_bins - 1 ) ) {
interval_delimiter = ' ] ' ; // last bucket is inclusive
2024-01-30 17:27:00 +00:00
}
2024-01-31 08:49:21 +00:00
printf ( " %c: " , interval_delimiter ) ;
2024-01-30 15:23:51 +00:00
int marks = ( int ) ( bins [ i ] * scale ) ;
for ( int j = 0 ; j < marks ; j + + ) {
printf ( " █ " ) ;
}
printf ( " %d \n " , bins [ i ] ) ;
}
// Free the allocated memory for bins
free ( bins ) ;
}
2024-01-31 14:19:12 +00:00
void array_print_90_ci_histogram ( double * xs , int n_samples , int n_bins ) {
2024-01-31 14:15:56 +00:00
// Code duplicated from previous function
// I'll consider simplifying it at some future point
// Possible ideas:
// - having only one function that takes any confidence interval?
// - having a utility function that is called by both functions?
2024-01-31 14:19:12 +00:00
ci ci_90 = array_get_90_ci ( xs , n_samples ) ;
2024-01-31 14:15:56 +00:00
if ( n_bins < = 1 ) {
fprintf ( stderr , " Number of bins must be greater than 1. \n " ) ;
return ;
} else if ( n_samples < = 10 ) {
fprintf ( stderr , " Number of samples must be higher than 10. \n " ) ;
return ;
}
int * bins = ( int * ) calloc ( ( size_t ) n_bins , sizeof ( int ) ) ;
if ( bins = = NULL ) {
fprintf ( stderr , " Memory allocation for bins failed. \n " ) ;
return ;
}
double min_value = ci_90 . low , max_value = ci_90 . high ;
// Avoid division by zero for a single unique value
if ( min_value = = max_value ) {
max_value + + ;
}
2024-02-02 17:03:56 +00:00
double bin_width = ( max_value - min_value ) / n_bins ;
2024-01-31 14:15:56 +00:00
// Fill the bins with sample counts
2024-01-31 14:26:20 +00:00
int below_min = 0 , above_max = 0 ;
2024-01-31 14:15:56 +00:00
for ( int i = 0 ; i < n_samples ; i + + ) {
2024-01-31 14:27:03 +00:00
if ( xs [ i ] < min_value ) {
2024-01-31 14:26:20 +00:00
below_min + + ;
2024-01-31 14:27:03 +00:00
} else if ( xs [ i ] > max_value ) {
2024-01-31 14:26:20 +00:00
above_max + + ;
} else {
2024-01-31 14:15:56 +00:00
int bin_index = ( int ) ( ( xs [ i ] - min_value ) / bin_width ) ;
if ( bin_index = = n_bins ) {
bin_index - - ; // Last bin includes max_value
}
bins [ bin_index ] + + ;
}
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0 ;
for ( int i = 0 ; i < n_bins ; i + + ) {
if ( bins [ i ] > max_bin_count ) {
max_bin_count = bins [ i ] ;
}
}
const int MAX_WIDTH = 50 ; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? ( double ) MAX_WIDTH / max_bin_count : 1.0 ;
// Print the histogram
2024-01-31 14:26:20 +00:00
int decimalPlaces = 1 ;
if ( ( 0 < bin_width ) & & ( bin_width < 1 ) ) {
int magnitude = ( int ) floor ( log10 ( bin_width ) ) ;
decimalPlaces = - magnitude ;
decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces ;
}
2024-02-01 19:54:52 +00:00
printf ( " (-∞, %*.*f): %d \n " , 4 + decimalPlaces , decimalPlaces , min_value , below_min ) ;
2024-01-31 14:15:56 +00:00
for ( int i = 0 ; i < n_bins ; i + + ) {
double bin_start = min_value + i * bin_width ;
double bin_end = bin_start + bin_width ;
2024-02-01 19:54:52 +00:00
printf ( " [%*.*f, %*.*f " , 4 + decimalPlaces , decimalPlaces , bin_start , 4 + decimalPlaces , decimalPlaces , bin_end ) ;
2024-01-31 14:15:56 +00:00
char interval_delimiter = ' ) ' ;
if ( i = = ( n_bins - 1 ) ) {
interval_delimiter = ' ] ' ; // last bucket is inclusive
}
printf ( " %c: " , interval_delimiter ) ;
int marks = ( int ) ( bins [ i ] * scale ) ;
for ( int j = 0 ; j < marks ; j + + ) {
printf ( " █ " ) ;
}
printf ( " %d \n " , bins [ i ] ) ;
}
2024-02-01 19:54:52 +00:00
printf ( " (%*.*f, +∞): %d \n " , 4 + decimalPlaces , decimalPlaces , max_value , above_max ) ;
2024-01-31 14:15:56 +00:00
// Free the allocated memory for bins
free ( bins ) ;
}
2023-11-29 23:21:24 +00:00
/* Algebra manipulations */
# define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean ;
double std ;
} normal_params ;
normal_params algebra_sum_normals ( normal_params a , normal_params b )
{
normal_params result = {
. mean = a . mean + b . mean ,
. std = sqrt ( ( a . std * a . std ) + ( b . std * b . std ) ) ,
2023-11-29 22:24:42 +00:00
} ;
2023-11-29 23:21:24 +00:00
return result ;
}
2023-11-29 22:24:42 +00:00
2023-11-29 23:21:24 +00:00
typedef struct lognormal_params_t {
double logmean ;
double logstd ;
} lognormal_params ;
lognormal_params algebra_product_lognormals ( lognormal_params a , lognormal_params b )
{
lognormal_params result = {
. logmean = a . logmean + b . logmean ,
. logstd = sqrt ( ( a . logstd * a . logstd ) + ( b . logstd * b . logstd ) ) ,
} ;
return result ;
}
lognormal_params convert_ci_to_lognormal_params ( ci x )
{
2023-12-09 18:49:20 +00:00
double loghigh = log ( x . high ) ;
double loglow = log ( x . low ) ;
2023-11-29 23:21:24 +00:00
double logmean = ( loghigh + loglow ) / 2.0 ;
double logstd = ( loghigh - loglow ) / ( 2.0 * NORMAL90CONFIDENCE ) ;
lognormal_params result = { . logmean = logmean , . logstd = logstd } ;
return result ;
}
ci convert_lognormal_params_to_ci ( lognormal_params y )
{
double h = y . logstd * NORMAL90CONFIDENCE ;
double loghigh = y . logmean + h ;
double loglow = y . logmean - h ;
ci result = { . low = exp ( loglow ) , . high = exp ( loghigh ) } ;
2023-11-18 21:00:02 +00:00
return result ;
}