diff --git a/README.md b/README.md index 6f71584..8dee97e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# A minimalist calculator for fermi estimation +# A minimalist calculator for f estimation -This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions. +This project is a minimalist, stack-based DSL for f estimation. It can multiply and divide scalars, lognormals and beta distributions. ## Motivation @@ -11,7 +11,7 @@ Sometimes, [Squiggle](https://github.com/quantified-uncertainty/squiggle), [simp Here is an example ``` -$ go run fermi.go +$ go run f.go 5000000 12000000 => 5.0M 12.0M * beta 1 200 @@ -31,7 +31,7 @@ $ go run fermi.go Perhaps this example is more understandable with comments and better units: ``` -$ sed -u "s|#.*||" | sed -u 's|M|000000|g' | go run fermi.go +$ sed -u "s|#.*||" | sed -u 's|M|000000|g' | go run f.go 5M 12M # number of people living in Chicago => 5.0M 12.0M * beta 1 200 # fraction of people that have a piano @@ -74,12 +74,40 @@ x The difference between `=: x` and `=. y` is that `=.` clears the stack after the assignment. +If you type "help", you can see a small grammar: + +``` +help + Operation | Variable assignment | Special + Operation: operator operand + operator: (empty) | * | / | + | - + operand: scalar | lognormal | beta | variable + lognormal: low high + beta: beta alpha beta + Variable assignment: =: variable_name + Clear stack: . | c | clear + Variable assignment and clear stack: =. variable_name + Other special operations: help | debug | exit + Examples: + + 2 + / 2.5 + * 1 10 (interpreted as lognormal) + + 1 10 + * beta 1 10 + 1 10 (multiplication taken as default operation) + =: x + . + 1 100 + + x + exit +``` + ## Installation ``` make build sudo make install -f # rather than the previous go run fermi.go +f # rather than the previous go run f.go ``` Why use make instead of the built-in go commands? Because the point of make is to be able to share command-line recipes. @@ -93,8 +121,8 @@ sed -u "s|#.*||" | sed -u 's|M|000000|g' | f cat more/piano-tuners.f | f cat more/piano-tuners-commented.f | sed -u "s|#.*||" | sed -u 's|M|000000|g' | f -tee -a input.log | go run fermi.go | tee -a output.log -tee -a io.log | go run fermi.go | tee -a io.log +tee -a input.log | go run f.go | tee -a output.log +tee -a io.log | go run f.go | tee -a io.log function f(){ sed -u "s|#.*||" | @@ -114,8 +142,17 @@ Note that these sed commands are just hacks, and won't parse e.g., `3.5K` correc - Sums and divisions now also supported - For things between 0 and 1, consider using a beta distribution +## Different levels of complexity + +The top level f.go file (400 lines) has a bunch of complexity: variables, parenthesis, samples, beta distributions. In the simple/ folder: + +- f_simple.go (370 lines) strips variables and parenthesis, but keeps beta distributions, samples, and addition and substraction +- f_minimal.go (140 lines) strips everything that isn't lognormal and scalar multiplication and addition, plus a few debug options. + ## Roadmap +Done: + - [x] Write README - [x] Add division? - [x] Read from file? @@ -124,13 +161,10 @@ Note that these sed commands are just hacks, and won't parse e.g., `3.5K` correc - [x] Use a sed filter? - [x] Add show more info version - [x] Scalar multiplication and division -- [ ] Program into a small device, like a calculator? -- [-] Think of some way of calling bc - [x] Think how to integrate with squiggle.c to draw samples - [x] Copy the time to botec go code - [x] Define samplers - [x] Call those samplers when operating on distributions that can't be operted on algebraically -- [ ] Think about how to draw a histogram from samples - [x] Display output more nicely, with K/M/B/T - [x] Consider the following: make this into a stack-based DSL, with: - [x] Variables that can be saved to and then displayed @@ -140,10 +174,20 @@ Note that these sed commands are just hacks, and won't parse e.g., `3.5K` correc - Joint types - Enums - [x] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done. -- [ ] Maintain *both* a more complex thing that's more featureful *and* the more simple multiplication of lognormals thing. -- [ ] Clean up error code. Right now only needed for division +- [x] Clean up error code. Right now only needed for division +- [x] Maintain *both* a more complex thing that's more featureful *and* the more simple multiplication of lognormals thing. + +To (possibly) do: + +- [ ] Document parenthesis syntax +- [ ] Allow input with K/M/T +- [ ] Add functions. Now easier to do with an explicit representation of the stakc +- [ ] Think about how to draw a histogram from samples - [ ] Dump samples to file - [ ] Represent samples/statistics in some other way - [ ] Perhaps use qsort rather than full sorting +- [ ] Program into a small device, like a calculator? -Some possible syntax for a more expressive stack-based DSL (now implemented) +Discarded: + +- [ ] ~~Think of some way of calling bc~~ diff --git a/f.go b/f.go index 35d1e2b..c4f2b61 100644 --- a/f.go +++ b/f.go @@ -14,7 +14,6 @@ import ( ) /* Types and interfaces */ - type Stack struct { old_dist Dist vars map[string]Dist @@ -52,7 +51,6 @@ func (p Scalar) Samples() []float64 { func (ln Lognormal) Samples() []float64 { sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) } - // return sample.Sample_parallel(sampler, N_SAMPLES) // Can't do parallel because then I'd have to await throughout the code return sample.Sample_serially(sampler, N_SAMPLES) } @@ -162,7 +160,6 @@ func operateDistsAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) { } } - // fmt.Printf("%v\n", zs) return FilledSamples{xs: zs}, nil } @@ -235,14 +232,12 @@ func divideDists(old_dist Dist, new_dist Dist) (Dist, error) { { switch n := new_dist.(type) { case Lognormal: - // to do: check division by zero if n.high == 0 || n.low == 0 { fmt.Println("Error: Can't divide by 0.0") return nil, errors.New("Error: division by zero") } return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil case Scalar: - // to do: check division by zero if n == 0.0 { fmt.Println("Error: Can't divide by 0.0") return nil, errors.New("Error: division by zero scalar") @@ -258,7 +253,6 @@ func divideDists(old_dist Dist, new_dist Dist) (Dist, error) { case Lognormal: return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil case Scalar: - // to do: check division by zero if n == 0.0 { fmt.Println("Error: Can't divide by 0.0") return nil, errors.New("Error: division by zero scalar") @@ -290,9 +284,11 @@ func operateDists(old_dist Dist, new_dist Dist, op string) (Dist, error) { } /* Parser and repl */ -func parseLineIntoOpAndDist(line string, vars map[string]Dist) (string, Dist, error) { +func parseWordsErr(err_msg string) (string, Dist, error) { + return "", nil, printAndReturnErr(err_msg) +} +func parseWordsIntoOpAndDist(words []string, vars map[string]Dist) (string, Dist, error) { - words := strings.Split(strings.TrimSpace(line), " ") op := "" var dist Dist @@ -304,13 +300,9 @@ func parseLineIntoOpAndDist(line string, vars map[string]Dist) (string, Dist, er op = "*" // later, change the below to } - parseLineErr := func(err_msg string) (string, Dist, error) { - return "", nil, printAndReturnErr(err_msg) - } - switch len(words) { case 0: - return parseLineErr("Operator must have operand; can't operate on nothing") + return parseWordsErr("Operator must have operand; can't operate on nothing") case 1: var_word, var_word_exists := vars[words[0]] single_float, err1 := strconv.ParseFloat(words[0], 64) // abstract this away to search for K/M/B/T/etc. @@ -320,13 +312,13 @@ func parseLineIntoOpAndDist(line string, vars map[string]Dist) (string, Dist, er case err1 == nil: dist = Scalar(single_float) case err1 != nil && !var_word_exists: - return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") + return parseWordsErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") } case 2: new_low, err1 := strconv.ParseFloat(words[0], 64) new_high, err2 := strconv.ParseFloat(words[1], 64) if err1 != nil || err2 != nil { - return parseLineErr("Trying to operate by a distribution, but distribution is not specified as two floats") + return parseWordsErr("Trying to operate by a distribution, but distribution is not specified as two floats") } dist = Lognormal{low: new_low, high: new_high} case 3: @@ -334,17 +326,16 @@ func parseLineIntoOpAndDist(line string, vars map[string]Dist) (string, Dist, er a, err1 := strconv.ParseFloat(words[1], 64) b, err2 := strconv.ParseFloat(words[2], 64) if err1 != nil || err2 != nil { - return parseLineErr("Trying to specify a beta distribution? Try beta 1 2") + return parseWordsErr("Trying to specify a beta distribution? Try beta 1 2") } dist = Beta{a: a, b: b} } else { - return parseLineErr("Input not understood or not implemented yet") + return parseWordsErr("Input not understood or not implemented yet") } default: - return parseLineErr("Input not understood or not implemented yet") + return parseWordsErr("Input not understood or not implemented yet") } return op, dist, nil - } /* Combine old dist and new line */ @@ -389,7 +380,7 @@ replForLoop: // fmt.Println() // continue replForLoop default: - op, new_dist, err := parseLineIntoOpAndDist(new_line, stack.vars) + op, new_dist, err := parseWordsIntoOpAndDist(words, stack.vars) if err != nil { continue replForLoop } diff --git a/simple/f_minimal.go b/simple/f_minimal.go new file mode 100644 index 0000000..4a7f2ac --- /dev/null +++ b/simple/f_minimal.go @@ -0,0 +1,140 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "git.nunosempere.com/NunoSempere/fermi/pretty" + "math" + "os" + "strconv" + "strings" +) + +type Lognormal struct { + low float64 + high float64 +} + +const HELP_MSG = " Operation | Variable assignment | Special\n" + + " Operation: operator operand\n" + + " operator: (empty) | * | / | + | -\n" + + " operand: scalar | lognormal\n" + + " lognormal: low high\n" + + " Clear stack: . | c | clear\n" + + " Other special operations: help | debug | exit\n" + + " Examples: \n" + + " / 2.5\n" + + " * 1 10 (interpreted as lognormal)\n" + + " / 1 10\n" + + " 1 10 (multiplication taken as default operation)\n" + + " .\n" + + " exit\n" +const NORMAL90CONFIDENCE = 1.6448536269514727 +const N_SAMPLES = 100_000 + +func prettyPrintLognormal(l Lognormal) { + fmt.Printf("=> ") + pretty.PrettyPrint2Floats(l.low, l.high) + fmt.Println() +} + +func printAndReturnErr(err_msg string) error { + fmt.Println(err_msg) + fmt.Println(HELP_MSG) + return errors.New(err_msg) +} + +func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal { + logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0 + logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE) + + logmean2 := (math.Log(l2.high) + math.Log(l2.low)) / 2.0 + logstd2 := (math.Log(l2.high) - math.Log(l2.low)) / (2.0 * NORMAL90CONFIDENCE) + + logmean_product := logmean1 + logmean2 + logstd_product := math.Sqrt(logstd1*logstd1 + logstd2*logstd2) + + h := logstd_product * NORMAL90CONFIDENCE + loglow := logmean_product - h + loghigh := logmean_product + h + return Lognormal{low: math.Exp(loglow), high: math.Exp(loghigh)} +} + +func divideLogDists(l1 Lognormal, l2 Lognormal) (Lognormal, error) { + if l2.high == 0 || l2.low == 0 { + fmt.Println("Error: Can't divide by 0.0") + return Lognormal{}, errors.New("Error: division by zero") + } + return multiplyLogDists(l1, Lognormal{low: 1.0 / l2.high, high: 1.0 / l2.low}), nil +} + +func parseWordsErr(err_msg string) (string, Lognormal, error) { + return "", Lognormal{}, printAndReturnErr(err_msg) +} +func parseWordsIntoOpAndDist(words []string) (string, Lognormal, error) { + op := "" + var dist Lognormal + + switch words[0] { + case "*", "/": + op = words[0] + words = words[1:] + default: + op = "*" + } + + switch len(words) { + case 0: + return parseWordsErr("Operator must have operand; can't operate on nothing") + case 1: + single_float, err := strconv.ParseFloat(words[0], 64) // abstract this away to search for K/M/B/T/etc. + if err != nil { + return parseWordsErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") + } + dist = Lognormal{low: single_float, high: single_float} + case 2: + new_low, err1 := strconv.ParseFloat(words[0], 64) + new_high, err2 := strconv.ParseFloat(words[1], 64) + if err1 != nil || err2 != nil { + return parseWordsErr("Trying to operate by a distribution, but distribution is not specified as two floats") + } + dist = Lognormal{low: new_low, high: new_high} + default: + return parseWordsErr("Input not understood or not implemented yet") + } + return op, dist, nil +} + +func main() { + reader := bufio.NewReader(os.Stdin) + old_dist := Lognormal{low: 1, high: 1} + +replForLoop: + for { + + new_line, _ := reader.ReadString('\n') + words := strings.Split(strings.TrimSpace(new_line), " ") + + if strings.TrimSpace(new_line) == "" { + continue replForLoop + } + + op, new_dist, err := parseWordsIntoOpAndDist(words) + if err != nil { + continue replForLoop + } + switch op { + case "*": + old_dist = multiplyLogDists(old_dist, new_dist) + case "/": + result_dist, err := divideLogDists(old_dist, new_dist) + if err != nil { + continue replForLoop + } + old_dist = result_dist + + } + prettyPrintLognormal(old_dist) + } +} diff --git a/f_simple.go b/simple/f_simple.go similarity index 91% rename from f_simple.go rename to simple/f_simple.go index 3500ecc..85838db 100644 --- a/f_simple.go +++ b/simple/f_simple.go @@ -63,7 +63,7 @@ func (fs FilledSamples) Samples() []float64 { const HELP_MSG = " Operation | Variable assignment | Special\n" + " Operation: operator operand\n" + " operator: (empty) | * | / | + | -\n" + - " operand: scalar | lognormal | beta | variable\n" + + " operand: scalar | lognormal | beta\n" + " lognormal: low high\n" + " beta: beta alpha beta\n" + " Clear stack: . | c | clear\n" + @@ -279,9 +279,11 @@ func operateDists(old_dist Dist, new_dist Dist, op string) (Dist, error) { } /* Parser and repl */ -func parseLineIntoOpAndDist(line string) (string, Dist, error) { +func parseWordsErr(err_msg string) (string, Dist, error) { + return "", nil, printAndReturnErr(err_msg) +} +func parseWordsIntoOpAndDist(words []string) (string, Dist, error) { - words := strings.Split(strings.TrimSpace(line), " ") op := "" var dist Dist @@ -293,24 +295,20 @@ func parseLineIntoOpAndDist(line string) (string, Dist, error) { op = "*" // later, change the below to } - parseLineErr := func(err_msg string) (string, Dist, error) { - return "", nil, printAndReturnErr(err_msg) - } - switch len(words) { case 0: - return parseLineErr("Operator must have operand; can't operate on nothing") + return parseWordsErr("Operator must have operand; can't operate on nothing") case 1: single_float, err := strconv.ParseFloat(words[0], 64) // abstract this away to search for K/M/B/T/etc. if err != nil { - return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") + return parseWordsErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") } dist = Scalar(single_float) case 2: new_low, err1 := strconv.ParseFloat(words[0], 64) new_high, err2 := strconv.ParseFloat(words[1], 64) if err1 != nil || err2 != nil { - return parseLineErr("Trying to operate by a distribution, but distribution is not specified as two floats") + return parseWordsErr("Trying to operate by a distribution, but distribution is not specified as two floats") } dist = Lognormal{low: new_low, high: new_high} case 3: @@ -318,14 +316,14 @@ func parseLineIntoOpAndDist(line string) (string, Dist, error) { a, err1 := strconv.ParseFloat(words[1], 64) b, err2 := strconv.ParseFloat(words[2], 64) if err1 != nil || err2 != nil { - return parseLineErr("Trying to specify a beta distribution? Try beta 1 2") + return parseWordsErr("Trying to specify a beta distribution? Try beta 1 2") } dist = Beta{a: a, b: b} } else { - return parseLineErr("Input not understood or not implemented yet") + return parseWordsErr("Input not understood or not implemented yet") } default: - return parseLineErr("Input not understood or not implemented yet") + return parseWordsErr("Input not understood or not implemented yet") } return op, dist, nil @@ -358,7 +356,7 @@ replForLoop: continue replForLoop } - op, new_dist, err := parseLineIntoOpAndDist(new_line) + op, new_dist, err := parseWordsIntoOpAndDist(words) if err != nil { continue replForLoop }