forked from personal/squiggle.c
add histogram functionality; recompile
This commit is contained in:
parent
7bc29b9e3d
commit
d70296f230
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2
scratchpad/plotting/c/attribution.md
Normal file
2
scratchpad/plotting/c/attribution.md
Normal file
|
@ -0,0 +1,2 @@
|
|||
https://github.com/red-data-tools/YouPlot
|
||||
Design inspired by ^
|
1000000
scratchpad/plotting/c/data.dat
Normal file
1000000
scratchpad/plotting/c/data.dat
Normal file
File diff suppressed because it is too large
Load Diff
1
scratchpad/plotting/c/example.md
Normal file
1
scratchpad/plotting/c/example.md
Normal file
|
@ -0,0 +1 @@
|
|||
./samples | hist 200 | head -n 20
|
BIN
scratchpad/plotting/c/histogram
Executable file
BIN
scratchpad/plotting/c/histogram
Executable file
Binary file not shown.
86
scratchpad/plotting/c/histogram.c
Normal file
86
scratchpad/plotting/c/histogram.c
Normal file
|
@ -0,0 +1,86 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
|
||||
#define MAX_SAMPLES 1000000
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Usage: %s number_of_bins\n", argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int num_bins = atoi(argv[1]);
|
||||
if (num_bins <= 0) {
|
||||
fprintf(stderr, "Number of bins must be a positive integer.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int *bins = calloc(num_bins, sizeof(int));
|
||||
double *samples = malloc(MAX_SAMPLES * sizeof(double));
|
||||
if (bins == NULL || samples == NULL) {
|
||||
fprintf(stderr, "Memory allocation failed.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
double value, min_value = DBL_MAX, max_value = -DBL_MAX;
|
||||
int sample_count = 0;
|
||||
|
||||
// Read numbers from stdin and store them into the samples array
|
||||
while (sample_count < MAX_SAMPLES && scanf("%lf", &value) != EOF) {
|
||||
samples[sample_count++] = value;
|
||||
if (value < min_value) {
|
||||
min_value = value;
|
||||
}
|
||||
if (value > max_value) {
|
||||
max_value = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid division by zero for a single unique value
|
||||
if (min_value == max_value) {
|
||||
max_value++;
|
||||
}
|
||||
|
||||
// Calculate bin width
|
||||
double range = max_value - min_value;
|
||||
double bin_width = range / num_bins;
|
||||
|
||||
// Fill the bins with sample counts
|
||||
for (int i = 0; i < sample_count; i++) {
|
||||
int bin_index = (int)((samples[i] - min_value) / bin_width);
|
||||
if (bin_index == num_bins) {
|
||||
bin_index--; // Last bin includes max_value
|
||||
}
|
||||
bins[bin_index]++;
|
||||
}
|
||||
|
||||
// Calculate the scaling factor based on the maximum bin count
|
||||
int max_bin_count = 0;
|
||||
for (int i = 0; i < num_bins; i++) {
|
||||
if (bins[i] > max_bin_count) {
|
||||
max_bin_count = bins[i];
|
||||
}
|
||||
}
|
||||
const int MAX_WIDTH = 50; // Adjust this to your terminal width
|
||||
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
|
||||
|
||||
// Print the histogram
|
||||
for (int i = 0; i < num_bins; i++) {
|
||||
double bin_start = min_value + i * bin_width;
|
||||
double bin_end = bin_start + bin_width;
|
||||
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
|
||||
|
||||
int marks = (int)(bins[i] * scale);
|
||||
for (int j = 0; j < marks; j++) {
|
||||
printf("▇");
|
||||
}
|
||||
printf(" %d\n", bins[i]);
|
||||
}
|
||||
|
||||
// Free the allocated memory
|
||||
free(bins);
|
||||
free(samples);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,9 +1,11 @@
|
|||
reset
|
||||
|
||||
stats 'data.dat' nooutput
|
||||
# stats '-' nooutput
|
||||
# show variables all
|
||||
max=STATS_max
|
||||
min=STATS_min
|
||||
# max=STATS_max
|
||||
# min=STATS_min
|
||||
min=-1
|
||||
max=25
|
||||
|
||||
n=1000
|
||||
width=(max-min)/n
|
||||
|
@ -23,4 +25,6 @@ set tics scale 0,0,0,0
|
|||
set xlabel "x"
|
||||
|
||||
set terminal dumb size 100, 25
|
||||
plot 'data.dat' u (hist($1,width)):(1.0) smooth freq w boxes notitle
|
||||
plot '-' u (hist($1,width)):(1.0) smooth freq w boxes notitle
|
||||
|
||||
|
||||
|
|
5
scratchpad/plotting/gnuplot/term5.gp
Normal file
5
scratchpad/plotting/gnuplot/term5.gp
Normal file
|
@ -0,0 +1,5 @@
|
|||
reset
|
||||
set terminal dumb size 100, 25
|
||||
set autoscale
|
||||
plot '<cat' using 0 with lines notitle
|
||||
|
30
scratchpad/plotting/gnuplot/term6.gp
Normal file
30
scratchpad/plotting/gnuplot/term6.gp
Normal file
|
@ -0,0 +1,30 @@
|
|||
reset
|
||||
|
||||
# stats '-' nooutput
|
||||
# show variables all
|
||||
# max=STATS_max
|
||||
# min=STATS_min
|
||||
min=-1
|
||||
max=25
|
||||
|
||||
n=1000
|
||||
width=(max-min)/n
|
||||
hist(x,width)=width*floor(x/width)+width/2.0
|
||||
|
||||
set xrange [min:max]
|
||||
set yrange [0:45000]
|
||||
|
||||
unset xtics
|
||||
unset ytics
|
||||
unset border
|
||||
set xtics out nomirror norotate offset 0,0
|
||||
set ytics out nomirror norotate
|
||||
set ytics in nomirror norotate offset 0,0
|
||||
set tics scale 0,0,0,0
|
||||
|
||||
set xlabel "x"
|
||||
|
||||
set terminal dumb size 100, 25
|
||||
plot '<cat' u (hist(0,width)):(1.0) smooth freq w boxes notitle
|
||||
|
||||
|
2
scratchpad/plotting/uplot/command.sh
Normal file
2
scratchpad/plotting/uplot/command.sh
Normal file
|
@ -0,0 +1,2 @@
|
|||
cat data.dat | uplot hist --nbins 100
|
||||
cat data.dat | uplot hist --nbins 10
|
4
scratchpad/plotting/uplot/howto.md
Normal file
4
scratchpad/plotting/uplot/howto.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
sudo apt install ruby-dev
|
||||
|
||||
sudo gem install youplot
|
||||
|
|
@ -213,6 +213,81 @@ void array_print_stats(double xs[], int n){
|
|||
mean, std, ci_90.low, ci_80.low, ci_50.low, median, ci_50.high, ci_80.high, ci_90.high);
|
||||
}
|
||||
|
||||
|
||||
void print_histogram(double* xs, int n_samples, int n_bins) {
|
||||
// Generated with the help of an llm; there might be subtle off-by-one errors
|
||||
// interface inspired by <https://github.com/red-data-tools/YouPlot>
|
||||
if (n_bins <= 0) {
|
||||
fprintf(stderr, "Number of bins must be a positive integer.\n");
|
||||
return;
|
||||
} else if (n_samples <= 0) {
|
||||
fprintf(stderr, "Number of samples must be a positive integer.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
int *bins = (int*) malloc((size_t)n_bins * sizeof(int));
|
||||
if (bins == NULL) {
|
||||
fprintf(stderr, "Memory allocation for bins failed.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
double min_value = xs[0], max_value = xs[0];
|
||||
|
||||
// Find the minimum and maximum values from the samples
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
if (xs[i] < min_value) {
|
||||
min_value = xs[i];
|
||||
}
|
||||
if (xs[i] > max_value) {
|
||||
max_value = xs[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid division by zero for a single unique value
|
||||
if (min_value == max_value) {
|
||||
max_value++;
|
||||
}
|
||||
|
||||
// Calculate bin width
|
||||
double range = max_value - min_value;
|
||||
double bin_width = range / n_bins;
|
||||
|
||||
// Fill the bins with sample counts
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
int bin_index = (int)((xs[i] - min_value) / bin_width);
|
||||
if (bin_index == n_bins) {
|
||||
bin_index--; // Last bin includes max_value
|
||||
}
|
||||
bins[bin_index]++;
|
||||
}
|
||||
|
||||
// Calculate the scaling factor based on the maximum bin count
|
||||
int max_bin_count = 0;
|
||||
for (int i = 0; i < n_bins; i++) {
|
||||
if (bins[i] > max_bin_count) {
|
||||
max_bin_count = bins[i];
|
||||
}
|
||||
}
|
||||
const int MAX_WIDTH = 50; // Adjust this to your terminal width
|
||||
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
|
||||
|
||||
// Print the histogram
|
||||
for (int i = 0; i < n_bins; i++) {
|
||||
double bin_start = min_value + i * bin_width;
|
||||
double bin_end = bin_start + bin_width;
|
||||
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
|
||||
|
||||
int marks = (int)(bins[i] * scale);
|
||||
for (int j = 0; j < marks; j++) {
|
||||
printf("█");
|
||||
}
|
||||
printf(" %d\n", bins[i]);
|
||||
}
|
||||
|
||||
// Free the allocated memory for bins
|
||||
free(bins);
|
||||
}
|
||||
|
||||
// Replicate some of the above functions over samplers
|
||||
// However, in the future I'll delete this
|
||||
// There should be a clear boundary between working with samplers and working with an array of samples
|
||||
|
|
|
@ -13,6 +13,7 @@ typedef struct ci_t {
|
|||
ci array_get_ci(ci interval, double* xs, int n);
|
||||
ci array_get_90_ci(double xs[], int n);
|
||||
void array_print_stats(double xs[], int n);
|
||||
void print_histogram(double* xs, int n_samples, int n_bins);
|
||||
|
||||
// Deprecated: get confidence intervals directly from samplers
|
||||
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||
|
|
Loading…
Reference in New Issue
Block a user