From c676a22ba8243998edaa9e323962f717f87f4d00 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Wed, 31 Jan 2024 15:15:56 +0100 Subject: [PATCH] readme tweaks; add 90% histogram function --- README.md | 2 ++ squiggle_more.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 97af0c1..76046cc 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,8 @@ Overall, I'd describe the error handling capabilities of this library as pretty ### To do +- [ ] Come up with a better headline example; fermi paradox paper is too complicated +- [ ] Post on suckless subreddit - [ ] Drive in a few more real-life applications - [ ] US election modelling? - [ ] Look into using size_t instead of int for sample numbers diff --git a/squiggle_more.c b/squiggle_more.c index 288251e..a67054a 100644 --- a/squiggle_more.c +++ b/squiggle_more.c @@ -215,8 +215,8 @@ void array_print_stats(double xs[], int n){ void array_print_histogram(double* xs, int n_samples, int n_bins) { + // Interface inspired by // Generated with the help of an llm; there might be subtle off-by-one errors - // interface inspired by if (n_bins <= 1) { fprintf(stderr, "Number of bins must be greater than 1.\n"); return; @@ -305,6 +305,90 @@ void array_print_histogram(double* xs, int n_samples, int n_bins) { free(bins); } +void array_print_90_ci_histogram(double* xs, int n){ + // Code duplicated from previous function + // I'll consider simplifying it at some future point + // Possible ideas: + // - having only one function that takes any confidence interval? + // - having a utility function that is called by both functions? + ci ci_90 = array_get_90_ci(xs, n); + + if (n_bins <= 1) { + fprintf(stderr, "Number of bins must be greater than 1.\n"); + return; + } else if (n_samples <= 10) { + fprintf(stderr, "Number of samples must be higher than 10.\n"); + return; + } + + int *bins = (int*) calloc((size_t)n_bins, sizeof(int)); + if (bins == NULL) { + fprintf(stderr, "Memory allocation for bins failed.\n"); + return; + } + + double min_value = ci_90.low, max_value = ci_90.high; + + // Avoid division by zero for a single unique value + if (min_value == max_value) { + max_value++; + } + + // Calculate bin width + double range = max_value - min_value; + double bin_width = range / n_bins; + + // Fill the bins with sample counts + for (int i = 0; i < n_samples; i++) { + if((x[i] > min_value) && (x[i] < max_value)){ + int bin_index = (int)((xs[i] - min_value) / bin_width); + if (bin_index == n_bins) { + bin_index--; // Last bin includes max_value + } + bins[bin_index]++; + } + } + + // Calculate the scaling factor based on the maximum bin count + int max_bin_count = 0; + for (int i = 0; i < n_bins; i++) { + if (bins[i] > max_bin_count) { + max_bin_count = bins[i]; + } + } + const int MAX_WIDTH = 50; // Adjust this to your terminal width + double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0; + + // Print the histogram + for (int i = 0; i < n_bins; i++) { + double bin_start = min_value + i * bin_width; + double bin_end = bin_start + bin_width; + + int decimalPlaces = 1; + if((0 < bin_width) && (bin_width < 1)){ + int magnitude = (int) floor(log10(bin_width)); + decimalPlaces = -magnitude; + decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces; + } + printf(" [%*.*f, %*.*f", 4+decimalPlaces, decimalPlaces, bin_start, 4+decimalPlaces, decimalPlaces, bin_end); + char interval_delimiter = ')'; + if(i == (n_bins-1)){ + interval_delimiter = ']'; // last bucket is inclusive + } + printf("%c: ", interval_delimiter); + + int marks = (int)(bins[i] * scale); + for (int j = 0; j < marks; j++) { + printf("█"); + } + printf(" %d\n", bins[i]); + } + + // Free the allocated memory for bins + free(bins); + +} + // Replicate some of the above functions over samplers // However, in the future I'll delete this // There should be a clear boundary between working with samplers and working with an array of samples