Compare commits

...

6 Commits

34 changed files with 2000351 additions and 0 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
https://github.com/red-data-tools/YouPlot
Design inspired by ^

1000000
scratchpad/plotting/c/data.dat Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
./samples | hist 200 | head -n 20

BIN
scratchpad/plotting/c/histogram Executable file

Binary file not shown.

View File

@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#define MAX_SAMPLES 1000000
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s number_of_bins\n", argv[0]);
exit(EXIT_FAILURE);
}
int num_bins = atoi(argv[1]);
if (num_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
exit(EXIT_FAILURE);
}
int *bins = calloc(num_bins, sizeof(int));
double *samples = malloc(MAX_SAMPLES * sizeof(double));
if (bins == NULL || samples == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
exit(EXIT_FAILURE);
}
double value, min_value = DBL_MAX, max_value = -DBL_MAX;
int sample_count = 0;
// Read numbers from stdin and store them into the samples array
while (sample_count < MAX_SAMPLES && scanf("%lf", &value) != EOF) {
samples[sample_count++] = value;
if (value < min_value) {
min_value = value;
}
if (value > max_value) {
max_value = value;
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / num_bins;
// Fill the bins with sample counts
for (int i = 0; i < sample_count; i++) {
int bin_index = (int)((samples[i] - min_value) / bin_width);
if (bin_index == num_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < num_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < num_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory
free(bins);
free(samples);
return 0;
}

View File

@ -0,0 +1,22 @@
reset
n=100 #number of intervals
max=3. #max value
min=-3. #min value
width=(max-min)/n #interval width
#function used to map a value to the intervals
hist(x,width)=width*floor(x/width)+width/2.0
set term png #output terminal and file
set output "histogram.png"
set xrange [min:max]
set yrange [0:]
#to put an empty boundary around the
#data inside an autoscaled graph.
set offset graph 0.05,0.05,0.05,0.0
set xtics min,(max-min)/5,max
set boxwidth width*0.9
set style fill solid 0.5 #fillstyle
set tics out nomirror
set xlabel "x"
set ylabel "Frequency"
#count and plot
plot "data.dat" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

View File

@ -0,0 +1,21 @@
reset
set terminal dumb size 80, 25
max=3
min=-3
n=100 #number of intervals
width=(max-min)/n #interval width
#function used to map a value to the intervals
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:]
#to put an empty boundary around the
#data inside an autoscaled graph.
set offset graph 0.05,0.05,0.05,0.0
set xtics min,(max-min)/5,max
set boxwidth width*0.9
set style fill solid 0.5 #fillstyle
set tics out nomirror
set xlabel "x"
set ylabel "Frequency"
#count and plot
plot "data.dat" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle

View File

@ -0,0 +1,21 @@
reset
set terminal dumb size 80, 25
n=100 #number of intervals
max=5. #max value
min=-5. #min value
width=(max-min)/n #interval width
#function used to map a value to the intervals
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:]
#to put an empty boundary around the
#data inside an autoscaled graph.
set offset graph 0.05,0.05,0.05,0.0
set xtics min,(max-min)/5,max
set boxwidth width*0.9
set style fill solid 0.5 #fillstyle
set tics out nomirror
set xlabel "x"
set ylabel "Frequency"
#count and plot
plot "data.dat" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle

View File

@ -0,0 +1,25 @@
reset
set terminal dumb size 80, 25
n=100 #number of intervals
max=5. #max value
min=-5. #min value
width=(max-min)/n #interval width
#function used to map a value to the intervals
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:]
#to put an empty boundary around the
#data inside an autoscaled graph.
set offset graph 0.05,0.05,0.05,0.0
set xtics min,(max-min)/5,max
set boxwidth width*0.9
set style fill solid 0.5 #fillstyle
set tics out nomirror
set xlabel "x"
set ylabel "Frequency"
#count and plot
plot "data.dat" u (hist($1,width)):(1.0) smooth freq w boxes lc rgb"green" notitle
stats 'data.dat'
show variables all

View File

@ -0,0 +1,26 @@
reset
stats 'data.dat' nooutput
# show variables all
max=STATS_max
min=STATS_min
n=1000
width=(max-min)/n
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:]
unset xtics
unset ytics
unset border
set xtics out nomirror norotate offset 0,0
set ytics out nomirror norotate
set ytics in nomirror norotate offset 0,0
set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot "data.dat" u (hist($1,width)):(1.0) smooth freq w boxes notitle

View File

@ -0,0 +1,30 @@
reset
# stats '-' nooutput
# show variables all
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:]
unset xtics
unset ytics
unset border
set xtics out nomirror norotate offset 0,0
set ytics out nomirror norotate
set ytics in nomirror norotate offset 0,0
set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot '-' u (hist($1,width)):(1.0) smooth freq w boxes notitle

View File

@ -0,0 +1,5 @@
reset
set terminal dumb size 100, 25
set autoscale
plot '<cat' using 0 with lines notitle

View File

@ -0,0 +1,30 @@
reset
# stats '-' nooutput
# show variables all
# max=STATS_max
# min=STATS_min
min=-1
max=25
n=1000
width=(max-min)/n
hist(x,width)=width*floor(x/width)+width/2.0
set xrange [min:max]
set yrange [0:45000]
unset xtics
unset ytics
unset border
set xtics out nomirror norotate offset 0,0
set ytics out nomirror norotate
set ytics in nomirror norotate offset 0,0
set tics scale 0,0,0,0
set xlabel "x"
set terminal dumb size 100, 25
plot '<cat' u (hist(0,width)):(1.0) smooth freq w boxes notitle

View File

@ -0,0 +1,2 @@
cat data.dat | uplot hist --nbins 100
cat data.dat | uplot hist --nbins 10

View File

@ -0,0 +1,4 @@
sudo apt install ruby-dev
sudo gem install youplot

View File

@ -213,6 +213,81 @@ void array_print_stats(double xs[], int n){
mean, std, ci_90.low, ci_80.low, ci_50.low, median, ci_50.high, ci_80.high, ci_90.high);
}
void array_print_histogram(double* xs, int n_samples, int n_bins) {
// Generated with the help of an llm; there might be subtle off-by-one errors
// interface inspired by <https://github.com/red-data-tools/YouPlot>
if (n_bins <= 0) {
fprintf(stderr, "Number of bins must be a positive integer.\n");
return;
} else if (n_samples <= 0) {
fprintf(stderr, "Number of samples must be a positive integer.\n");
return;
}
int *bins = (int*) malloc((size_t)n_bins * sizeof(int));
if (bins == NULL) {
fprintf(stderr, "Memory allocation for bins failed.\n");
return;
}
double min_value = xs[0], max_value = xs[0];
// Find the minimum and maximum values from the samples
for (int i = 0; i < n_samples; i++) {
if (xs[i] < min_value) {
min_value = xs[i];
}
if (xs[i] > max_value) {
max_value = xs[i];
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double range = max_value - min_value;
double bin_width = range / n_bins;
// Fill the bins with sample counts
for (int i = 0; i < n_samples; i++) {
int bin_index = (int)((xs[i] - min_value) / bin_width);
if (bin_index == n_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < n_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < n_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf(" [%4.1f, %4.1f): ", bin_start, bin_end);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory for bins
free(bins);
}
// Replicate some of the above functions over samplers
// However, in the future I'll delete this
// There should be a clear boundary between working with samplers and working with an array of samples

View File

@ -13,6 +13,7 @@ typedef struct ci_t {
ci array_get_ci(ci interval, double* xs, int n);
ci array_get_90_ci(double xs[], int n);
void array_print_stats(double xs[], int n);
void array_print_histogram(double* xs, int n_samples, int n_bins);
// Deprecated: get confidence intervals directly from samplers
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);