add histogram functionality; recompile

master
NunoSempere 4 months ago
parent 7bc29b9e3d
commit d70296f230

Binary file not shown.

Binary file not shown.

@ -0,0 +1,2 @@
https://github.com/red-data-tools/YouPlot
Design inspired by ^

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
./samples | hist 200 | head -n 20

Binary file not shown.

@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#define MAX_SAMPLES 1000000
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s number_of_bins\n", argv[0]);
exit(EXIT_FAILURE);
}
int num_bins = atoi(argv[1]);
if (num_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
exit(EXIT_FAILURE);
}
int *bins = calloc(num_bins, sizeof(int));
double *samples = malloc(MAX_SAMPLES * sizeof(double));
if (bins == NULL || samples == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
exit(EXIT_FAILURE);
}
double value, min_value = DBL_MAX, max_value = -DBL_MAX;
int sample_count = 0;
// Read numbers from stdin and store them into the samples array
while (sample_count < MAX_SAMPLES && scanf("%lf", &value) != EOF) {
samples[sample_count++] = value;
if (value < min_value) {
min_value = value;
}
if (value > max_value) {
max_value = value;
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / num_bins;
// Fill the bins with sample counts
for (int i = 0; i < sample_count; i++) {
int bin_index = (int)((samples[i] - min_value) / bin_width);
if (bin_index == num_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < num_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < num_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory
free(bins);
free(samples);
return 0;
}

@ -1,9 +1,11 @@
reset
stats 'data.dat' nooutput
# stats '-' nooutput
# show variables all
max=STATS_max
min=STATS_min
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
@ -23,4 +25,6 @@ set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot 'data.dat' u (hist($1,width)):(1.0) smooth freq w boxes notitle
plot '-' u (hist($1,width)):(1.0) smooth freq w boxes notitle

@ -0,0 +1,5 @@
reset
set terminal dumb size 100, 25
set autoscale
plot '<cat' using 0 with lines notitle

@ -0,0 +1,30 @@
reset
# stats '-' nooutput
# show variables all
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:45000]
unset xtics
unset ytics
unset border
set xtics out nomirror norotate offset 0,0
set ytics out nomirror norotate
set ytics in nomirror norotate offset 0,0
set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot '<cat' u (hist(0,width)):(1.0) smooth freq w boxes notitle

@ -0,0 +1,2 @@
cat data.dat | uplot hist --nbins 100
cat data.dat | uplot hist --nbins 10

@ -0,0 +1,4 @@
sudo apt install ruby-dev
sudo gem install youplot

@ -213,6 +213,81 @@ void array_print_stats(double xs[], int n){
mean, std, ci_90.low, ci_80.low, ci_50.low, median, ci_50.high, ci_80.high, ci_90.high);
}
void print_histogram(double* xs, int n_samples, int n_bins) {
// Generated with the help of an llm; there might be subtle off-by-one errors
// interface inspired by <https://github.com/red-data-tools/YouPlot>
if (n_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
return;
} else if (n_samples <= 0) {
fprintf(stderr, "Number of samples must be a positive integer.\n");
return;
}
int *bins = (int*) malloc((size_t)n_bins * sizeof(int));
if (bins == NULL) {
fprintf(stderr, "Memory allocation for bins failed.\n");
return;
}
double min_value = xs[0], max_value = xs[0];
// Find the minimum and maximum values from the samples
for (int i = 0; i < n_samples; i++) {
if (xs[i] < min_value) {
min_value = xs[i];
}
if (xs[i] > max_value) {
max_value = xs[i];
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / n_bins;
// Fill the bins with sample counts
for (int i = 0; i < n_samples; i++) {
int bin_index = (int)((xs[i] - min_value) / bin_width);
if (bin_index == n_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < n_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < n_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory for bins
free(bins);
}
// Replicate some of the above functions over samplers
// However, in the future I'll delete this
// There should be a clear boundary between working with samplers and working with an array of samples

@ -13,6 +13,7 @@ typedef struct ci_t {
ci array_get_ci(ci interval, double* xs, int n);
ci array_get_90_ci(double xs[], int n);
void array_print_stats(double xs[], int n);
void print_histogram(double* xs, int n_samples, int n_bins);
// Deprecated: get confidence intervals directly from samplers
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);

Loading…
Cancel
Save