fix correlation problem by using global variable

2024-06-10 03:08:10 +02:00 · 2024-06-10 03:08:10 +02:00 · a4263d0765
commit a4263d0765
parent 5b52cf3297
3 changed files with 122 additions and 102 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # A minimalist calculator for fermi estimation

-This project contains a minimalist command-line calculator for Fermi estimation. For now, it just multiplies lognormals.
+This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions.

 ## Motivation

@ -105,17 +105,18 @@ Conceptually clearer to have all the multiplications first and then all the divi
 - [-] Think of some way of calling bc
 - [x] Think how to integrate with squiggle.c to draw samples
  - [x] Copy the time to botec go code
-  - [ ] Define samplers
-  - [ ] Call those samplers when operating on distributions that can't be operted on algebraically
+  - [x] Define samplers
+  - [x] Call those samplers when operating on distributions that can't be operted on algebraically
 - [ ] Think about how to draw a histogram from samples
 - [x] Display output more nicely, with K/M/B/T
- [ ] Consider the following: make this into a stack-based DSL, with:
-  - Variables that can be saved to and then displayed
-  - Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though.
- Figure out syntax for
+- [x] Consider the following: make this into a stack-based DSL, with:
+  - [x] Variables that can be saved to and then displayed
+  - [x] Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though.
+- [x] Figure out go syntax for
  - Maps
  - Joint types
  - Enums
+- [ ] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done.

 Some possible syntax for a more expressive stack-based DSL

--- a/f.go
+++ b/f.go
@ -14,7 +14,7 @@ import (

 const NORMAL90CONFIDENCE = 1.6448536269514727
 const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit"
-const N_SAMPLES = 10 // 1_000_000
+const N_SAMPLES = 1_000_000

 // Distribution interface
 // https://go.dev/tour/methods/9
@ -22,46 +22,37 @@ const N_SAMPLES = 10 // 1_000_000
 type Dist interface {
 	Samples() []float64
 }
-
-type Scalar struct {
-	p float64
+type Scalar float64
+type Lognormal struct {
+	low  float64
+	high float64
+}
+type Beta struct {
+	a float64
+	b float64
+}
+type FilledSamples struct {
+	xs []float64
 }

 func (p Scalar) Samples() []float64 {
 	xs := make([]float64, N_SAMPLES)
 	for i := 0; i < N_SAMPLES; i++ {
-		xs[i] = p.p
+		xs[i] = float64(p)
 	}
 	return xs
 }
-
-type Lognormal struct {
-	low  float64
-	high float64
-}
-
 func (ln Lognormal) Samples() []float64 {
 	sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) }
 	// return sample.Sample_parallel(sampler, N_SAMPLES)
 	// Can't do parallel because then I'd have to await throughout the code
 	return sample.Sample_serially(sampler, N_SAMPLES)
 }
-
-type Beta struct {
-	a float64
-	b float64
-}
-
 func (beta Beta) Samples() []float64 {
 	sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
 	// return sample.Sample_parallel(sampler, N_SAMPLES)
 	return sample.Sample_serially(sampler, N_SAMPLES)
 }
-
-type FilledSamples struct {
-	xs []float64
-}
-
 func (fs FilledSamples) Samples() []float64 {
 	return fs.xs
 }
@ -80,16 +71,9 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {
 	var dist Dist

 	switch words[0] {
-	case "*":
-		op = "*"
+	case "*", "/", "+", "-":
+		op = words[0]
 		words = words[1:]
-	case "/":
-		op = "/"
-		words = words[1:]
-	case "+":
-		return parseLineErr("+ operation not implemented yet")
-	case "-":
-		return parseLineErr("- operation not implemented yet")
 	default:
 		op = "*" // later, change the below to
 	}
@ -104,7 +88,7 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {
 		case var_word_exists:
 			dist = var_word
 		case err1 == nil:
-			dist = Lognormal{low: single_float, high: single_float}
+			dist = Scalar(single_float)
 		case err1 != nil && !var_word_exists:
 			return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable")
 		}
@ -133,9 +117,6 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {

 }

-// Join distributions
-// Multiply lognormals
-
 func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal {
 	logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0
 	logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE)
@ -157,10 +138,8 @@ func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta {
 	return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b}
 }

-func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist {
-	// dist2 = Beta{a: 1, b: 2}
-	// fmt.Printf("dist1: %v\n", dist1)
-	// fmt.Printf("dist2: %v\n", dist2)
+func operateAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) {
+
 	xs := dist1.Samples()
 	ys := dist2.Samples()
 	// fmt.Printf("xs: %v\n", xs)
@ -168,11 +147,25 @@ func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist {
 	zs := make([]float64, N_SAMPLES)

 	for i := 0; i < N_SAMPLES; i++ {
+		switch op {
+		case "*":
 			zs[i] = xs[i] * ys[i]
+		case "/":
+			if ys[0] != 0 {
+				zs[i] = xs[i] / ys[i]
+			} else {
+				fmt.Println("Error: When dividing as samples, division by zero")
+				return nil, errors.New("Division by zero")
+			}
+		case "+":
+			zs[i] = xs[i] + ys[i]
+		case "-":
+			zs[i] = xs[i] - ys[i]
+		}
 	}

 	// fmt.Printf("%v\n", zs)
-	return FilledSamples{xs: zs}
+	return FilledSamples{xs: zs}, nil
 }

 func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
@ -184,23 +177,23 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
 			case Lognormal:
 				return multiplyLogDists(o, n), nil
 			case Scalar:
-				return multiplyLogDists(o, Lognormal{low: n.p, high: n.p}), nil
+				return multiplyLogDists(o, Lognormal{low: float64(n), high: float64(n)}), nil
 			default:
-				return multiplyAsSamples(o, n), nil
+				return operateAsSamples(old_dist, new_dist, "*")
 			}
 		}
 	case Scalar:
 		{
-			if o.p == 1 {
+			if o == 1 {
 				return new_dist, nil
 			}
 			switch n := new_dist.(type) {
 			case Lognormal:
-				return multiplyLogDists(Lognormal{low: o.p, high: o.p}, n), nil
+				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, n), nil
 			case Scalar:
-				return Scalar{p: o.p * n.p}, nil
+				return Scalar(float64(o) * float64(n)), nil
 			default:
-				return multiplyAsSamples(o, n), nil
+				return operateAsSamples(old_dist, new_dist, "*")
 			}
 		}
 	case Beta:
@ -208,11 +201,40 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
 		case Beta:
 			return multiplyBetaDists(o, n), nil
 		default:
-			return multiplyAsSamples(o, n), nil
+			return operateAsSamples(old_dist, new_dist, "*")
 		}
 	default:
-		return multiplyAsSamples(old_dist, new_dist), nil
-		// return nil, errors.New("Can't multiply dists")
+		return operateAsSamples(old_dist, new_dist, "*")
+	}
+}
+
+func divideDists(old_dist Dist, new_dist Dist) (Dist, error) {
+
+	switch o := old_dist.(type) {
+	case Lognormal:
+		{
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
+			case Scalar:
+				return multiplyLogDists(o, Lognormal{low: 1.0 / float64(n), high: 1.0 / float64(n)}), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "/")
+			}
+		}
+	case Scalar:
+		{
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
+			case Scalar:
+				return Scalar(float64(o) / float64(n)), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "/")
+			}
+		}
+	default:
+		return operateAsSamples(old_dist, new_dist, "/")
 	}
 }

@ -221,57 +243,47 @@ func joinDists(old_dist Dist, new_dist Dist, op string) (Dist, error) {
 	switch op {
 	case "*":
 		return multiplyDists(old_dist, new_dist)
+	case "/":
+		return divideDists(old_dist, new_dist)
+	case "+":
+		return operateAsSamples(old_dist, new_dist, "+")
+	case "-":
+		return operateAsSamples(old_dist, new_dist, "-")
 	default:
 		return old_dist, errors.New("Can't combine distributions in this way")
 	}
-	/*
-		switch {
-		case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "*":
-			return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, new_dist.Lognormal), Samples: nil}, nil
-		case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "/":
-			tmp_dist := Lognormal{low: 1.0 / new_dist.Lognormal.high, high: 1.0 / new_dist.Lognormal.low}
-			return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, tmp_dist), Samples: nil}, nil
-		default:
-			fmt.Printf("For now, can't do anything besides multiplying lognormals\n")
-		}
-	*/
-	// return old_dist, errors.New("Can't combine distributions in this way")
 }

 /* Pretty print distributions */
+func prettyPrintFloat(f float64) {
+	switch {
+	case math.Abs(f) >= 1_000_000_000_000:
+		fmt.Printf("%.1fT", f/1_000_000_000_000)
+	case math.Abs(f) >= 1_000_000_000:
+		fmt.Printf("%.1fB", f/1_000_000_000)
+	case math.Abs(f) >= 1_000_000:
+		fmt.Printf("%.1fM", f/1_000_000)
+	case math.Abs(f) >= 1_000:
+		fmt.Printf("%.1fK", f/1_000)
+
+	case math.Abs(f) <= 0.0001:
+		fmt.Printf("%.5f", f)
+	case math.Abs(f) <= 0.001:
+		fmt.Printf("%.4f", f)
+	case math.Abs(f) <= 0.01:
+		fmt.Printf("%.3f", f)
+	case math.Abs(f) <= 0.1:
+		fmt.Printf("%.2f", f)
+	default:
+		fmt.Printf("%.1f", f)
+	}
+
+}
 func prettyPrint2Floats(low float64, high float64) {
-	// fmt.Printf("=> %.1f %.1f\n", low, high)
-	switch {
-	case math.Abs(low) >= 1_000_000_000_000:
-		fmt.Printf("%.1fT", low/1_000_000_000_000)
-	case math.Abs(low) >= 1_000_000_000:
-		fmt.Printf("%.1fB", low/1_000_000_000)
-	case math.Abs(low) >= 1_000_000:
-		fmt.Printf("%.1fM", low/1_000_000)
-	case math.Abs(low) >= 1_000:
-		fmt.Printf("%.1fK", low/1_000)
-	case math.Abs(low) >= 1_000:
-		fmt.Printf("%.1fK", low/1_000)
-	default:
-		fmt.Printf("%.1f", low)
-	}
+	prettyPrintFloat(low)
 	fmt.Printf(" ")
-	switch {
-	case math.Abs(high) >= 1_000_000_000_000:
-		fmt.Printf("%.1fT", high/1_000_000_000_000)
-	case math.Abs(high) >= 1_000_000_000:
-		fmt.Printf("%.1fB", high/1_000_000_000)
-	case math.Abs(high) >= 1_000_000:
-		fmt.Printf("%.1fM", high/1_000_000)
-	case math.Abs(high) >= 1_000:
-		fmt.Printf("%.1fK", high/1_000)
-	case math.Abs(high) >= 1_000:
-		fmt.Printf("%.1fK", high/1_000)
-	default:
-		fmt.Printf("%.1f", high)
-	}
+	prettyPrintFloat(high)
 	fmt.Printf("\n")
-	// fmt.Printf("=> %.1f %.1f\n", low, high)
 }

 func prettyPrintDist(dist Dist) {
@ -293,6 +305,11 @@ func prettyPrintDist(dist Dist) {
 	case Beta:
 		fmt.Printf("=> beta ")
 		prettyPrint2Floats(v.a, v.b)
+	case Scalar:
+		fmt.Printf("=> scalar ")
+		w := float64(v)
+		prettyPrintFloat(w)
+		fmt.Println()
 	default:
 		fmt.Printf("%v", v)
 	}
@ -303,7 +320,7 @@ func main() {

 	reader := bufio.NewReader(os.Stdin)
 	var init_dist Dist
-	init_dist = Scalar{p: 1} // Lognormal{low: 1, high: 1}
+	init_dist = Scalar(1) // Lognormal{low: 1, high: 1}
 	old_dist := init_dist
 	vars := make(map[string]Dist)
 	// Could eventually be a more complex struct with:
--- a/sample/sample.go
+++ b/sample/sample.go
@ -9,6 +9,8 @@ import rand "math/rand/v2"
 type Src = *rand.Rand
 type func64 = func(Src) float64

+var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+
 func Sample_unit_uniform(r Src) float64 {
 	return r.Float64()
 }
@ -138,10 +140,10 @@ func Sample_mixture(fs []func64, weights []float64, r Src) float64 {
 }

 func Sample_serially(f func64, n_samples int) []float64 {
-	var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
 	xs := make([]float64, n_samples)
+	// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
 	for i := 0; i < n_samples; i++ {
-		xs[i] = f(r)
+		xs[i] = f(global_r)
 	}
 	return xs
 }