From a4263d0765b9746fc95422b06e88c704dbc92d1a Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Mon, 10 Jun 2024 03:08:10 +0200 Subject: [PATCH] fix correlation problem by using global variable --- README.md | 15 ++-- f.go | 203 +++++++++++++++++++++++++---------------------- sample/sample.go | 6 +- 3 files changed, 122 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index 216dd1e..0ce928e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # A minimalist calculator for fermi estimation -This project contains a minimalist command-line calculator for Fermi estimation. For now, it just multiplies lognormals. +This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions. ## Motivation @@ -105,17 +105,18 @@ Conceptually clearer to have all the multiplications first and then all the divi - [-] Think of some way of calling bc - [x] Think how to integrate with squiggle.c to draw samples - [x] Copy the time to botec go code - - [ ] Define samplers - - [ ] Call those samplers when operating on distributions that can't be operted on algebraically + - [x] Define samplers + - [x] Call those samplers when operating on distributions that can't be operted on algebraically - [ ] Think about how to draw a histogram from samples - [x] Display output more nicely, with K/M/B/T -- [ ] Consider the following: make this into a stack-based DSL, with: - - Variables that can be saved to and then displayed - - Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though. -- Figure out syntax for +- [x] Consider the following: make this into a stack-based DSL, with: + - [x] Variables that can be saved to and then displayed + - [x] Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though. +- [x] Figure out go syntax for - Maps - Joint types - Enums +- [ ] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done. Some possible syntax for a more expressive stack-based DSL diff --git a/f.go b/f.go index b918ce5..0ddb906 100644 --- a/f.go +++ b/f.go @@ -14,7 +14,7 @@ import ( const NORMAL90CONFIDENCE = 1.6448536269514727 const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit" -const N_SAMPLES = 10 // 1_000_000 +const N_SAMPLES = 1_000_000 // Distribution interface // https://go.dev/tour/methods/9 @@ -22,46 +22,37 @@ const N_SAMPLES = 10 // 1_000_000 type Dist interface { Samples() []float64 } - -type Scalar struct { - p float64 +type Scalar float64 +type Lognormal struct { + low float64 + high float64 +} +type Beta struct { + a float64 + b float64 +} +type FilledSamples struct { + xs []float64 } func (p Scalar) Samples() []float64 { xs := make([]float64, N_SAMPLES) for i := 0; i < N_SAMPLES; i++ { - xs[i] = p.p + xs[i] = float64(p) } return xs } - -type Lognormal struct { - low float64 - high float64 -} - func (ln Lognormal) Samples() []float64 { sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) } // return sample.Sample_parallel(sampler, N_SAMPLES) // Can't do parallel because then I'd have to await throughout the code return sample.Sample_serially(sampler, N_SAMPLES) } - -type Beta struct { - a float64 - b float64 -} - func (beta Beta) Samples() []float64 { sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) } // return sample.Sample_parallel(sampler, N_SAMPLES) return sample.Sample_serially(sampler, N_SAMPLES) } - -type FilledSamples struct { - xs []float64 -} - func (fs FilledSamples) Samples() []float64 { return fs.xs } @@ -80,16 +71,9 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { var dist Dist switch words[0] { - case "*": - op = "*" + case "*", "/", "+", "-": + op = words[0] words = words[1:] - case "/": - op = "/" - words = words[1:] - case "+": - return parseLineErr("+ operation not implemented yet") - case "-": - return parseLineErr("- operation not implemented yet") default: op = "*" // later, change the below to } @@ -104,7 +88,7 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { case var_word_exists: dist = var_word case err1 == nil: - dist = Lognormal{low: single_float, high: single_float} + dist = Scalar(single_float) case err1 != nil && !var_word_exists: return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") } @@ -133,9 +117,6 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { } -// Join distributions -// Multiply lognormals - func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal { logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0 logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE) @@ -157,10 +138,8 @@ func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta { return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b} } -func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist { - // dist2 = Beta{a: 1, b: 2} - // fmt.Printf("dist1: %v\n", dist1) - // fmt.Printf("dist2: %v\n", dist2) +func operateAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) { + xs := dist1.Samples() ys := dist2.Samples() // fmt.Printf("xs: %v\n", xs) @@ -168,11 +147,25 @@ func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist { zs := make([]float64, N_SAMPLES) for i := 0; i < N_SAMPLES; i++ { - zs[i] = xs[i] * ys[i] + switch op { + case "*": + zs[i] = xs[i] * ys[i] + case "/": + if ys[0] != 0 { + zs[i] = xs[i] / ys[i] + } else { + fmt.Println("Error: When dividing as samples, division by zero") + return nil, errors.New("Division by zero") + } + case "+": + zs[i] = xs[i] + ys[i] + case "-": + zs[i] = xs[i] - ys[i] + } } // fmt.Printf("%v\n", zs) - return FilledSamples{xs: zs} + return FilledSamples{xs: zs}, nil } func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { @@ -184,23 +177,23 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { case Lognormal: return multiplyLogDists(o, n), nil case Scalar: - return multiplyLogDists(o, Lognormal{low: n.p, high: n.p}), nil + return multiplyLogDists(o, Lognormal{low: float64(n), high: float64(n)}), nil default: - return multiplyAsSamples(o, n), nil + return operateAsSamples(old_dist, new_dist, "*") } } case Scalar: { - if o.p == 1 { + if o == 1 { return new_dist, nil } switch n := new_dist.(type) { case Lognormal: - return multiplyLogDists(Lognormal{low: o.p, high: o.p}, n), nil + return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, n), nil case Scalar: - return Scalar{p: o.p * n.p}, nil + return Scalar(float64(o) * float64(n)), nil default: - return multiplyAsSamples(o, n), nil + return operateAsSamples(old_dist, new_dist, "*") } } case Beta: @@ -208,11 +201,40 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { case Beta: return multiplyBetaDists(o, n), nil default: - return multiplyAsSamples(o, n), nil + return operateAsSamples(old_dist, new_dist, "*") } default: - return multiplyAsSamples(old_dist, new_dist), nil - // return nil, errors.New("Can't multiply dists") + return operateAsSamples(old_dist, new_dist, "*") + } +} + +func divideDists(old_dist Dist, new_dist Dist) (Dist, error) { + + switch o := old_dist.(type) { + case Lognormal: + { + switch n := new_dist.(type) { + case Lognormal: + return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil + case Scalar: + return multiplyLogDists(o, Lognormal{low: 1.0 / float64(n), high: 1.0 / float64(n)}), nil + default: + return operateAsSamples(old_dist, new_dist, "/") + } + } + case Scalar: + { + switch n := new_dist.(type) { + case Lognormal: + return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil + case Scalar: + return Scalar(float64(o) / float64(n)), nil + default: + return operateAsSamples(old_dist, new_dist, "/") + } + } + default: + return operateAsSamples(old_dist, new_dist, "/") } } @@ -221,57 +243,47 @@ func joinDists(old_dist Dist, new_dist Dist, op string) (Dist, error) { switch op { case "*": return multiplyDists(old_dist, new_dist) + case "/": + return divideDists(old_dist, new_dist) + case "+": + return operateAsSamples(old_dist, new_dist, "+") + case "-": + return operateAsSamples(old_dist, new_dist, "-") default: return old_dist, errors.New("Can't combine distributions in this way") } - /* - switch { - case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "*": - return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, new_dist.Lognormal), Samples: nil}, nil - case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "/": - tmp_dist := Lognormal{low: 1.0 / new_dist.Lognormal.high, high: 1.0 / new_dist.Lognormal.low} - return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, tmp_dist), Samples: nil}, nil - default: - fmt.Printf("For now, can't do anything besides multiplying lognormals\n") - } - */ - // return old_dist, errors.New("Can't combine distributions in this way") } /* Pretty print distributions */ +func prettyPrintFloat(f float64) { + switch { + case math.Abs(f) >= 1_000_000_000_000: + fmt.Printf("%.1fT", f/1_000_000_000_000) + case math.Abs(f) >= 1_000_000_000: + fmt.Printf("%.1fB", f/1_000_000_000) + case math.Abs(f) >= 1_000_000: + fmt.Printf("%.1fM", f/1_000_000) + case math.Abs(f) >= 1_000: + fmt.Printf("%.1fK", f/1_000) + + case math.Abs(f) <= 0.0001: + fmt.Printf("%.5f", f) + case math.Abs(f) <= 0.001: + fmt.Printf("%.4f", f) + case math.Abs(f) <= 0.01: + fmt.Printf("%.3f", f) + case math.Abs(f) <= 0.1: + fmt.Printf("%.2f", f) + default: + fmt.Printf("%.1f", f) + } + +} func prettyPrint2Floats(low float64, high float64) { - // fmt.Printf("=> %.1f %.1f\n", low, high) - switch { - case math.Abs(low) >= 1_000_000_000_000: - fmt.Printf("%.1fT", low/1_000_000_000_000) - case math.Abs(low) >= 1_000_000_000: - fmt.Printf("%.1fB", low/1_000_000_000) - case math.Abs(low) >= 1_000_000: - fmt.Printf("%.1fM", low/1_000_000) - case math.Abs(low) >= 1_000: - fmt.Printf("%.1fK", low/1_000) - case math.Abs(low) >= 1_000: - fmt.Printf("%.1fK", low/1_000) - default: - fmt.Printf("%.1f", low) - } + prettyPrintFloat(low) fmt.Printf(" ") - switch { - case math.Abs(high) >= 1_000_000_000_000: - fmt.Printf("%.1fT", high/1_000_000_000_000) - case math.Abs(high) >= 1_000_000_000: - fmt.Printf("%.1fB", high/1_000_000_000) - case math.Abs(high) >= 1_000_000: - fmt.Printf("%.1fM", high/1_000_000) - case math.Abs(high) >= 1_000: - fmt.Printf("%.1fK", high/1_000) - case math.Abs(high) >= 1_000: - fmt.Printf("%.1fK", high/1_000) - default: - fmt.Printf("%.1f", high) - } + prettyPrintFloat(high) fmt.Printf("\n") - // fmt.Printf("=> %.1f %.1f\n", low, high) } func prettyPrintDist(dist Dist) { @@ -293,6 +305,11 @@ func prettyPrintDist(dist Dist) { case Beta: fmt.Printf("=> beta ") prettyPrint2Floats(v.a, v.b) + case Scalar: + fmt.Printf("=> scalar ") + w := float64(v) + prettyPrintFloat(w) + fmt.Println() default: fmt.Printf("%v", v) } @@ -303,7 +320,7 @@ func main() { reader := bufio.NewReader(os.Stdin) var init_dist Dist - init_dist = Scalar{p: 1} // Lognormal{low: 1, high: 1} + init_dist = Scalar(1) // Lognormal{low: 1, high: 1} old_dist := init_dist vars := make(map[string]Dist) // Could eventually be a more complex struct with: diff --git a/sample/sample.go b/sample/sample.go index 8e8844c..0061c06 100644 --- a/sample/sample.go +++ b/sample/sample.go @@ -9,6 +9,8 @@ import rand "math/rand/v2" type Src = *rand.Rand type func64 = func(Src) float64 +var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2))) + func Sample_unit_uniform(r Src) float64 { return r.Float64() } @@ -138,10 +140,10 @@ func Sample_mixture(fs []func64, weights []float64, r Src) float64 { } func Sample_serially(f func64, n_samples int) []float64 { - var r = rand.New(rand.NewPCG(uint64(1), uint64(2))) xs := make([]float64, n_samples) + // var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2))) for i := 0; i < n_samples; i++ { - xs[i] = f(r) + xs[i] = f(global_r) } return xs }