fermi/fermi.go

555 lines
15 KiB
Go
Raw Normal View History

2024-05-10 18:05:03 +00:00
package main
import (
"bufio"
2024-06-09 13:15:53 +00:00
"errors"
2024-07-12 16:11:25 +00:00
"flag"
2024-05-10 18:05:03 +00:00
"fmt"
2024-06-19 02:44:24 +00:00
"git.nunosempere.com/NunoSempere/fermi/pretty"
2024-06-19 12:30:21 +00:00
"git.nunosempere.com/NunoSempere/fermi/sample"
2024-05-10 18:25:23 +00:00
"math"
2024-05-10 18:05:03 +00:00
"os"
2024-06-09 22:51:05 +00:00
"sort"
2024-05-10 18:05:03 +00:00
"strings"
)
2024-06-19 02:02:42 +00:00
/* Types and interfaces */
type Stack struct {
old_dist Dist
vars map[string]Dist
}
2024-06-09 21:35:36 +00:00
type Dist interface {
Samples() []float64
}
2024-06-19 02:02:42 +00:00
type Scalar float64
2024-06-19 02:02:42 +00:00
type Lognormal struct {
low float64
high float64
}
2024-06-19 02:02:42 +00:00
type Beta struct {
a float64
b float64
}
2024-06-19 02:02:42 +00:00
type FilledSamples struct {
xs []float64
}
2024-06-19 02:44:24 +00:00
/* Dist interface functions */
// https://go.dev/tour/methods/9
func (p Scalar) Samples() []float64 {
xs := make([]float64, N_SAMPLES)
for i := 0; i < N_SAMPLES; i++ {
xs[i] = float64(p)
}
return xs
}
func (ln Lognormal) Samples() []float64 {
2024-12-24 14:31:21 +00:00
sampler := func(r sample.State) float64 { return sample.Sample_to(ln.low, ln.high, r) }
2024-06-19 02:44:24 +00:00
// Can't do parallel because then I'd have to await throughout the code
return sample.Sample_serially(sampler, N_SAMPLES)
}
func (beta Beta) Samples() []float64 {
2024-12-24 14:31:21 +00:00
sampler := func(r sample.State) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
2024-06-19 02:44:24 +00:00
return sample.Sample_serially(sampler, N_SAMPLES)
}
func (fs FilledSamples) Samples() []float64 {
return fs.xs
}
2024-06-19 02:02:42 +00:00
/* Constants */
2024-10-01 07:56:31 +00:00
const HELP_MSG = "1. Grammar:\n" +
" Operation | Variable assignment | Special\n" +
2024-06-19 02:02:42 +00:00
" Operation: operator operand\n" +
" operator: (empty) | * | / | + | -\n" +
" operand: scalar | lognormal | beta | variable\n" +
" lognormal: low high\n" +
" beta: beta alpha beta\n" +
" Variable assignment: =: variable_name\n" +
" Variable assignment and clear stack: =. variable_name\n" +
" Special commands: \n" +
2024-07-07 14:30:35 +00:00
" Comment: # this is a comment\n" +
2024-10-01 07:56:31 +00:00
" Summary stats: stats\n" +
2024-07-07 14:30:35 +00:00
" Clear stack: clear | c | .\n" +
" Print debug info: debug | d\n" +
" Print help message: help | h\n" +
" Start additional stack: operator (\n" +
" Return from additional stack )\n" +
" Exit: exit | e\n" +
2024-06-19 02:02:42 +00:00
" Examples: \n" +
" + 2\n" +
" / 2.5\n" +
" * 1 10 (interpreted as lognormal)\n" +
" + 1 10\n" +
" * beta 1 10\n" +
" 1 10 (multiplication taken as default operation)\n" +
" =: x\n" +
" .\n" +
" 1 100\n" +
" + x\n" +
2024-07-07 14:30:35 +00:00
" # this is a comment\n" +
" * 1 12 # this is an operation followed by a comment\n" +
" * (\n" +
" 1 10\n" +
" + beta 1 100\n" +
" )\n" +
2024-10-01 07:56:31 +00:00
" exit\n" +
"\n" +
"2. Command flags:\n" +
" -echo\n" +
" Specifies whether inputs should be echoed back. Useful if reading from a file\n." +
" -f string\n" +
" Specifies a file with a model to run\n" +
" -n int\n" +
" Specifies the number of samples to draw when using samples (default 100000)\n" +
" -h Shows help message\n"
2024-07-07 14:30:35 +00:00
2024-06-19 02:02:42 +00:00
const NORMAL90CONFIDENCE = 1.6448536269514727
const INIT_DIST Scalar = Scalar(1)
2024-07-12 16:11:25 +00:00
2024-07-12 22:39:13 +00:00
var N_SAMPLES = 100_000
2024-06-19 02:02:42 +00:00
2024-06-19 13:46:08 +00:00
/* Printers */
2024-06-19 02:44:24 +00:00
func prettyPrintDist(dist Dist) {
switch v := dist.(type) {
case Lognormal:
fmt.Printf("=> ")
pretty.PrettyPrint2Floats(v.low, v.high)
2024-06-19 13:46:08 +00:00
fmt.Println()
2024-06-19 02:44:24 +00:00
case Beta:
fmt.Printf("=> beta ")
pretty.PrettyPrint2Floats(v.a, v.b)
2024-06-19 13:46:08 +00:00
fmt.Println()
2024-06-19 02:44:24 +00:00
case Scalar:
fmt.Printf("=> scalar ")
w := float64(v)
pretty.PrettyPrintFloat(w)
fmt.Println()
2024-06-19 13:46:08 +00:00
case FilledSamples:
2024-09-15 21:24:40 +00:00
n := len(v.xs)
sorted_xs := make([]float64, n)
2024-06-19 13:46:08 +00:00
copy(sorted_xs, v.xs)
sort.Slice(sorted_xs, func(i, j int) bool {
return sorted_xs[i] < sorted_xs[j]
})
2024-06-09 13:15:53 +00:00
2024-09-15 21:24:40 +00:00
low := sorted_xs[int(math.Round(float64(n)*0.05))]
2024-10-01 07:44:45 +00:00
high := sorted_xs[int(math.Round(float64(n)*0.95))]
2024-06-19 13:46:08 +00:00
fmt.Printf("=> ")
pretty.PrettyPrint2Floats(low, high)
2024-06-09 13:15:53 +00:00
2024-06-19 13:46:08 +00:00
fmt.Printf(" (")
pretty.PrettyPrintInt(N_SAMPLES)
fmt.Printf(" samples)")
fmt.Println()
2024-06-09 13:15:53 +00:00
default:
2024-06-19 13:46:08 +00:00
fmt.Printf("%v\n", v)
2024-06-09 13:15:53 +00:00
}
2024-05-10 18:07:58 +00:00
}
2024-06-19 13:46:08 +00:00
func printAndReturnErr(err_msg string) error {
fmt.Println(err_msg)
fmt.Println("Type \"help\" (without quotes) to see a pseudogrammar and examples")
2024-06-19 13:46:08 +00:00
return errors.New(err_msg)
}
2024-09-15 21:24:40 +00:00
func prettyPrintStats(dist Dist) {
xs := dist.Samples()
n := len(xs)
mean := 0.0
for i := 0; i < n; i++ {
mean += xs[i]
}
mean /= float64(n)
fmt.Printf("Mean: %f\n", mean)
stdev := 0.0
for i := 0; i < n; i++ {
stdev += math.Pow(xs[i]-mean, 2)
}
stdev = math.Sqrt(stdev / float64(n))
fmt.Printf("Stdev: %f\n", stdev)
sorted_xs := make([]float64, n)
copy(sorted_xs, xs)
sort.Slice(sorted_xs, func(i, j int) bool {
return sorted_xs[i] < sorted_xs[j]
})
2024-10-01 07:44:45 +00:00
print_ci := func(ci float64, prefix string) {
x := sorted_xs[int(math.Round(float64(n)*ci))]
fmt.Printf("%s%f\n", prefix, x)
}
print_ci(0.01, "ci 1%: ")
print_ci(0.05, "ci 5%: ")
print_ci(0.10, "ci 10%: ")
print_ci(0.25, "ci 25%: ")
print_ci(0.50, "ci 50%: ")
print_ci(0.75, "ci 75%: ")
print_ci(0.90, "ci 90%: ")
print_ci(0.95, "ci 95%: ")
print_ci(0.99, "ci 99%: ")
2024-09-15 21:24:40 +00:00
}
2024-06-19 13:46:08 +00:00
/* Operations */
// Generic operations with samples
2024-06-19 02:02:42 +00:00
func operateDistsAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) {
xs := dist1.Samples()
ys := dist2.Samples()
2024-06-09 22:51:05 +00:00
zs := make([]float64, N_SAMPLES)
for i := 0; i < N_SAMPLES; i++ {
switch op {
case "*":
zs[i] = xs[i] * ys[i]
case "/":
if ys[0] != 0 {
zs[i] = xs[i] / ys[i]
} else {
fmt.Println("Error: When dividing as samples, division by zero")
return nil, errors.New("Division by zero")
}
case "+":
zs[i] = xs[i] + ys[i]
case "-":
zs[i] = xs[i] - ys[i]
2024-09-15 20:36:51 +00:00
default:
fmt.Println("Error: Operation not recognized")
return nil, errors.New("Operation not recognized")
}
2024-06-09 13:15:53 +00:00
}
2024-06-09 21:35:36 +00:00
return FilledSamples{xs: zs}, nil
}
2024-06-19 13:46:08 +00:00
// Multiplication
func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal {
logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0
logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE)
logmean2 := (math.Log(l2.high) + math.Log(l2.low)) / 2.0
logstd2 := (math.Log(l2.high) - math.Log(l2.low)) / (2.0 * NORMAL90CONFIDENCE)
logmean_product := logmean1 + logmean2
logstd_product := math.Sqrt(logstd1*logstd1 + logstd2*logstd2)
h := logstd_product * NORMAL90CONFIDENCE
loglow := logmean_product - h
loghigh := logmean_product + h
return Lognormal{low: math.Exp(loglow), high: math.Exp(loghigh)}
}
func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta {
return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b}
}
func multiplyLogDistAndScalar(l Lognormal, s Scalar) (Dist, error) {
if s == 0.0 {
return Scalar(0.0), nil
} else if s < 0.0 {
return operateDistsAsSamples(s, l, "+")
} else {
return multiplyLogDists(l, Lognormal{low: float64(s), high: float64(s)}), nil
}
}
func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
switch o := old_dist.(type) {
case Lognormal:
{
switch n := new_dist.(type) {
case Lognormal:
return multiplyLogDists(o, n), nil
case Scalar:
return multiplyLogDistAndScalar(o, n)
}
}
case Scalar:
{
switch o {
case 1.0:
return new_dist, nil
case 0.0:
return Scalar(0.0), nil
}
switch n := new_dist.(type) {
case Lognormal:
return multiplyLogDistAndScalar(n, o)
case Scalar:
return Scalar(float64(o) * float64(n)), nil
}
}
case Beta:
switch n := new_dist.(type) {
case Beta:
return multiplyBetaDists(o, n), nil
}
}
return operateDistsAsSamples(old_dist, new_dist, "*")
}
func divideDists(old_dist Dist, new_dist Dist) (Dist, error) {
switch o := old_dist.(type) {
// I miss you, OCaml switches
case Lognormal:
{
switch n := new_dist.(type) {
case Lognormal:
2024-06-19 13:46:08 +00:00
if n.high == 0 || n.low == 0 {
fmt.Println("Error: Can't divide by 0.0")
return nil, errors.New("Error: division by zero")
}
return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
case Scalar:
2024-06-19 13:46:08 +00:00
if n == 0.0 {
fmt.Println("Error: Can't divide by 0.0")
return nil, errors.New("Error: division by zero scalar")
}
return multiplyLogDistAndScalar(o, Scalar(1.0/n))
}
}
case Scalar:
{
switch n := new_dist.(type) {
case Lognormal:
return multiplyLogDistAndScalar(Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}, o)
case Scalar:
2024-06-19 13:46:08 +00:00
if n == 0.0 {
fmt.Println("Error: Can't divide by 0.0")
return nil, errors.New("Error: division by zero scalar")
}
return Scalar(float64(o) / float64(n)), nil
}
}
}
2024-09-15 20:34:30 +00:00
return operateDistsAsSamples(old_dist, new_dist, "/")
}
2024-06-19 13:46:08 +00:00
// Generic distribution operations
func operateDists(old_dist Dist, new_dist Dist, op string) (Dist, error) {
2024-06-19 02:02:42 +00:00
switch op {
case "*":
2024-06-19 13:46:08 +00:00
return multiplyDists(old_dist, new_dist)
2024-06-19 02:02:42 +00:00
case "/":
2024-06-19 13:46:08 +00:00
return divideDists(old_dist, new_dist)
2024-06-19 02:02:42 +00:00
case "+":
2024-06-19 13:46:08 +00:00
return operateDistsAsSamples(old_dist, new_dist, "+")
2024-06-19 02:02:42 +00:00
case "-":
2024-06-19 13:46:08 +00:00
return operateDistsAsSamples(old_dist, new_dist, "-")
default:
return nil, printAndReturnErr("Can't combine distributions in this way")
}
}
2024-12-24 14:31:21 +00:00
/* Mixtures */
func parseMixture(words []string, vars map[string]Dist) (Dist, error) {
// mx, mix, var weight var weight var weight ...
// Check syntax
if len(words)%2 != 1 || words[0] != "mx" {
return nil, printAndReturnErr("Not a mixture. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
}
var dists []Dist
var fs [][]float64
var weights []float64
for i, word := range words[1:] {
if i%2 == 0 {
dist, exists := vars[word]
if !exists {
return nil, printAndReturnErr("Expected mixture variable but didn't get a variable. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
}
samples := dist.Samples()
dists = append(dists, dist)
fs = append(fs, samples)
} else {
weight, err := pretty.ParseFloat(word)
if err != nil {
return nil, printAndReturnErr("Expected mixture weight but didn't get a float. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
}
weights = append(weights, weight)
}
}
// Sample from mixture
xs, err := sample.Sample_mixture_serially(fs, weights, N_SAMPLES)
if err != nil {
return nil, printAndReturnErr(err.Error())
}
return FilledSamples{xs: xs}, nil
}
2024-06-19 13:46:08 +00:00
/* Parser and repl */
2024-06-19 14:41:47 +00:00
func parseWordsErr(err_msg string) (string, Dist, error) {
return "", nil, printAndReturnErr(err_msg)
}
func parseWordsIntoOpAndDist(words []string, vars map[string]Dist) (string, Dist, error) {
2024-06-19 13:46:08 +00:00
op := ""
var dist Dist
switch words[0] {
case "*", "/", "+", "-":
op = words[0]
words = words[1:]
default:
2024-12-24 14:31:21 +00:00
op = "*"
2024-06-19 13:46:08 +00:00
}
switch len(words) {
case 0:
2024-06-19 14:41:47 +00:00
return parseWordsErr("Operator must have operand; can't operate on nothing")
2024-06-19 13:46:08 +00:00
case 1:
var_word, var_word_exists := vars[words[0]]
single_float, err1 := pretty.ParseFloat(words[0]) // abstract this away to search for K/M/B/T/etc.
2024-06-19 13:46:08 +00:00
switch {
case var_word_exists:
dist = var_word
case err1 == nil:
dist = Scalar(single_float)
case err1 != nil && !var_word_exists:
2024-06-19 14:41:47 +00:00
return parseWordsErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable")
2024-06-19 13:46:08 +00:00
}
case 2:
new_low, err1 := pretty.ParseFloat(words[0])
new_high, err2 := pretty.ParseFloat(words[1])
switch {
case err1 != nil || err2 != nil:
2024-06-19 14:41:47 +00:00
return parseWordsErr("Trying to operate by a distribution, but distribution is not specified as two floats")
case new_low <= 0.0 || new_high <= 0.0:
return parseWordsErr("Trying to parse two floats as a lognormal, but the two floats must be greater than 0")
case new_low == new_high:
return parseWordsErr("Trying to parse two floats as a lognormal, but the two floats must be different. Try a single scalar instead?")
case new_low > new_high:
return parseWordsErr("Trying to parse two floats as a lognormal, but the first number is larger than the second number")
2024-06-19 13:46:08 +00:00
}
dist = Lognormal{low: new_low, high: new_high}
case 3:
2024-12-24 14:31:21 +00:00
switch {
case words[0] == "beta" || words[0] == "b":
a, err1 := pretty.ParseFloat(words[1])
b, err2 := pretty.ParseFloat(words[2])
2024-06-19 13:46:08 +00:00
if err1 != nil || err2 != nil {
2024-06-19 14:41:47 +00:00
return parseWordsErr("Trying to specify a beta distribution? Try beta 1 2")
2024-06-19 13:46:08 +00:00
}
dist = Beta{a: a, b: b}
2024-12-24 14:31:21 +00:00
default:
2024-06-19 14:41:47 +00:00
return parseWordsErr("Input not understood or not implemented yet")
2024-06-19 02:02:42 +00:00
}
default:
2024-12-24 14:31:21 +00:00
switch words[0] {
case "mx":
tmp, err := parseMixture(words, vars)
if err != nil {
return parseWordsErr("Error parsing a mixture: " + err.Error())
}
dist = tmp
default:
return parseWordsErr("Input not understood or not implemented yet")
}
2024-06-19 02:02:42 +00:00
}
2024-06-19 13:46:08 +00:00
return op, dist, nil
}
2024-06-19 13:46:08 +00:00
/* Combine old dist and new line */
// We want this as a function (rather than just be in main)
// to be able to have parenthesis/recusion, possibly functions
2024-06-09 20:46:08 +00:00
func runRepl(stack Stack, reader *bufio.Reader, echo_flag *bool) Stack {
2024-06-19 12:30:21 +00:00
replForLoop:
2024-05-10 18:05:03 +00:00
for {
new_line, _ := reader.ReadString('\n')
if *echo_flag {
fmt.Print(new_line)
}
2024-07-07 14:06:15 +00:00
new_line_before_comments, _, _ := strings.Cut(new_line, "#")
new_line_trimmed := strings.TrimSpace(new_line_before_comments)
words := strings.Split(new_line_trimmed, " ")
switch {
2024-07-07 14:06:15 +00:00
case strings.TrimSpace(new_line_trimmed) == "": /* Empty line case */
continue replForLoop
/* Parenthesis */
case len(words) == 2 && (words[0] == "*" || words[0] == "+" || words[0] == "-" || words[0] == "/") && words[1] == "(":
new_stack := runRepl(Stack{old_dist: INIT_DIST, vars: stack.vars}, reader, echo_flag)
2024-06-19 13:46:08 +00:00
combined_dist, err := operateDists(stack.old_dist, new_stack.old_dist, words[0])
if err == nil {
stack.old_dist = combined_dist
}
case len(words) == 1 && words[0] == ")":
return stack
/* Special operations */
case words[0] == "exit" || words[0] == "e":
os.Exit(0)
case words[0] == "help" || words[0] == "h":
2024-06-19 13:46:08 +00:00
fmt.Println(HELP_MSG)
case words[0] == "debug" || words[0] == "d":
2024-06-19 13:46:08 +00:00
fmt.Printf("%v", stack)
case words[0] == "clear" || words[0] == "c" || words[0] == ".":
stack.old_dist = INIT_DIST
fmt.Println()
2024-09-15 21:24:40 +00:00
case words[0] == "stats" || words[0] == "s":
prettyPrintStats(stack.old_dist)
2024-06-19 12:30:21 +00:00
/* Variable assignment */
case words[0] == "=:" && len(words) == 2:
stack.vars[words[1]] = stack.old_dist
fmt.Printf("%s ", words[1])
case words[0] == "=." && len(words) == 2:
stack.vars[words[1]] = stack.old_dist
fmt.Printf("%s ", words[1])
prettyPrintDist(stack.old_dist)
stack.old_dist = INIT_DIST
default:
2024-06-19 14:41:47 +00:00
op, new_dist, err := parseWordsIntoOpAndDist(words, stack.vars)
if err != nil {
continue replForLoop
}
2024-06-19 13:46:08 +00:00
combined_dist, err := operateDists(stack.old_dist, new_dist, op)
if err == nil {
stack.old_dist = combined_dist
}
}
2024-06-19 13:46:08 +00:00
prettyPrintDist(stack.old_dist)
2024-05-10 18:05:03 +00:00
}
}
2024-06-19 02:02:42 +00:00
func main() {
2024-07-12 16:11:25 +00:00
num_samples_flag := flag.Int("n", N_SAMPLES, "Specifies the number of samples to draw when using samples")
filename := flag.String("f", "", "Specifies a file with a model to run. Sets the echo flag to true")
echo_flag := flag.Bool("echo", false, "Specifies whether inputs should be echoed back. Useful if reading from a file.")
2024-10-01 07:56:31 +00:00
help_flag := flag.Bool("h", false, "Shows help message")
2024-07-12 22:29:55 +00:00
flag.Parse()
2024-08-09 15:33:14 +00:00
N_SAMPLES = *num_samples_flag
2024-10-01 07:56:31 +00:00
if *help_flag {
fmt.Println(HELP_MSG)
}
2024-07-12 16:11:25 +00:00
2024-07-12 22:29:55 +00:00
var reader *bufio.Reader = nil
if *filename != "" {
file, err := os.Open(*filename)
if err == nil {
*echo_flag = true
2024-07-12 22:29:55 +00:00
reader = bufio.NewReader(file)
} else {
fmt.Printf("Error opening filename; reading from stdin instead\n")
}
}
if reader == nil {
reader = bufio.NewReader(os.Stdin)
}
2024-06-19 02:02:42 +00:00
stack := Stack{old_dist: INIT_DIST, vars: make(map[string]Dist)}
runRepl(stack, reader, echo_flag)
2024-07-12 22:29:55 +00:00
2024-06-19 02:02:42 +00:00
}