add histogram functionality; recompile

This commit is contained in:
NunoSempere 2024-01-30 16:23:51 +01:00
parent 7bc29b9e3d
commit d70296f230
27 changed files with 1000214 additions and 4 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
https://github.com/red-data-tools/YouPlot
Design inspired by ^

1000000
scratchpad/plotting/c/data.dat Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
./samples | hist 200 | head -n 20

BIN
scratchpad/plotting/c/histogram Executable file

Binary file not shown.

View File

@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#define MAX_SAMPLES 1000000
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s number_of_bins\n", argv[0]);
exit(EXIT_FAILURE);
}
int num_bins = atoi(argv[1]);
if (num_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
exit(EXIT_FAILURE);
}
int *bins = calloc(num_bins, sizeof(int));
double *samples = malloc(MAX_SAMPLES * sizeof(double));
if (bins == NULL || samples == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
exit(EXIT_FAILURE);
}
double value, min_value = DBL_MAX, max_value = -DBL_MAX;
int sample_count = 0;
// Read numbers from stdin and store them into the samples array
while (sample_count < MAX_SAMPLES && scanf("%lf", &value) != EOF) {
samples[sample_count++] = value;
if (value < min_value) {
min_value = value;
}
if (value > max_value) {
max_value = value;
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / num_bins;
// Fill the bins with sample counts
for (int i = 0; i < sample_count; i++) {
int bin_index = (int)((samples[i] - min_value) / bin_width);
if (bin_index == num_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < num_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < num_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory
free(bins);
free(samples);
return 0;
}

View File

@ -1,9 +1,11 @@
reset
stats 'data.dat' nooutput
# stats '-' nooutput
# show variables all
max=STATS_max
min=STATS_min
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
@ -23,4 +25,6 @@ set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot 'data.dat' u (hist($1,width)):(1.0) smooth freq w boxes notitle
plot '-' u (hist($1,width)):(1.0) smooth freq w boxes notitle

View File

@ -0,0 +1,5 @@
reset
set terminal dumb size 100, 25
set autoscale
plot '<cat' using 0 with lines notitle

View File

@ -0,0 +1,30 @@
reset
# stats '-' nooutput
# show variables all
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:45000]
unset xtics
unset ytics
unset border
set xtics out nomirror norotate offset 0,0
set ytics out nomirror norotate
set ytics in nomirror norotate offset 0,0
set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot '<cat' u (hist(0,width)):(1.0) smooth freq w boxes notitle

View File

@ -0,0 +1,2 @@
cat data.dat | uplot hist --nbins 100
cat data.dat | uplot hist --nbins 10

View File

@ -0,0 +1,4 @@
sudo apt install ruby-dev
sudo gem install youplot

View File

@ -213,6 +213,81 @@ void array_print_stats(double xs[], int n){
mean, std, ci_90.low, ci_80.low, ci_50.low, median, ci_50.high, ci_80.high, ci_90.high);
}
void print_histogram(double* xs, int n_samples, int n_bins) {
// Generated with the help of an llm; there might be subtle off-by-one errors
// interface inspired by <https://github.com/red-data-tools/YouPlot>
if (n_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
return;
} else if (n_samples <= 0) {
fprintf(stderr, "Number of samples must be a positive integer.\n");
return;
}
int *bins = (int*) malloc((size_t)n_bins * sizeof(int));
if (bins == NULL) {
fprintf(stderr, "Memory allocation for bins failed.\n");
return;
}
double min_value = xs[0], max_value = xs[0];
// Find the minimum and maximum values from the samples
for (int i = 0; i < n_samples; i++) {
if (xs[i] < min_value) {
min_value = xs[i];
}
if (xs[i] > max_value) {
max_value = xs[i];
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / n_bins;
// Fill the bins with sample counts
for (int i = 0; i < n_samples; i++) {
int bin_index = (int)((xs[i] - min_value) / bin_width);
if (bin_index == n_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < n_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < n_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory for bins
free(bins);
}
// Replicate some of the above functions over samplers
// However, in the future I'll delete this
// There should be a clear boundary between working with samplers and working with an array of samples

View File

@ -13,6 +13,7 @@ typedef struct ci_t {
ci array_get_ci(ci interval, double* xs, int n);
ci array_get_90_ci(double xs[], int n);
void array_print_stats(double xs[], int n);
void print_histogram(double* xs, int n_samples, int n_bins);
// Deprecated: get confidence intervals directly from samplers
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);