fix correlation problem by using global variable
This commit is contained in:
		
							parent
							
								
									5b52cf3297
								
							
						
					
					
						commit
						a4263d0765
					
				
							
								
								
									
										15
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								README.md
									
									
									
									
									
								
							|  | @ -1,6 +1,6 @@ | |||
| # A minimalist calculator for fermi estimation | ||||
| 
 | ||||
| This project contains a minimalist command-line calculator for Fermi estimation. For now, it just multiplies lognormals. | ||||
| This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions. | ||||
| 
 | ||||
| ## Motivation | ||||
| 
 | ||||
|  | @ -105,17 +105,18 @@ Conceptually clearer to have all the multiplications first and then all the divi | |||
| - [-] Think of some way of calling bc | ||||
| - [x] Think how to integrate with squiggle.c to draw samples | ||||
|   - [x] Copy the time to botec go code | ||||
|   - [ ] Define samplers | ||||
|   - [ ] Call those samplers when operating on distributions that can't be operted on algebraically | ||||
|   - [x] Define samplers | ||||
|   - [x] Call those samplers when operating on distributions that can't be operted on algebraically | ||||
| - [ ] Think about how to draw a histogram from samples | ||||
| - [x] Display output more nicely, with K/M/B/T | ||||
| - [ ] Consider the following: make this into a stack-based DSL, with: | ||||
|   - Variables that can be saved to and then displayed | ||||
|   - Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though. | ||||
| - Figure out syntax for | ||||
| - [x] Consider the following: make this into a stack-based DSL, with: | ||||
|   - [x] Variables that can be saved to and then displayed | ||||
|   - [x] Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though. | ||||
| - [x] Figure out go syntax for | ||||
|   - Maps | ||||
|   - Joint types | ||||
|   - Enums | ||||
| - [ ] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done. | ||||
| 
 | ||||
| Some possible syntax for a more expressive stack-based DSL | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										201
									
								
								f.go
									
									
									
									
									
								
							
							
						
						
									
										201
									
								
								f.go
									
									
									
									
									
								
							|  | @ -14,7 +14,7 @@ import ( | |||
| 
 | ||||
| const NORMAL90CONFIDENCE = 1.6448536269514727 | ||||
| const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit" | ||||
| const N_SAMPLES = 10 // 1_000_000
 | ||||
| const N_SAMPLES = 1_000_000 | ||||
| 
 | ||||
| // Distribution interface
 | ||||
| // https://go.dev/tour/methods/9
 | ||||
|  | @ -22,46 +22,37 @@ const N_SAMPLES = 10 // 1_000_000 | |||
| type Dist interface { | ||||
| 	Samples() []float64 | ||||
| } | ||||
| 
 | ||||
| type Scalar struct { | ||||
| 	p float64 | ||||
| type Scalar float64 | ||||
| type Lognormal struct { | ||||
| 	low  float64 | ||||
| 	high float64 | ||||
| } | ||||
| type Beta struct { | ||||
| 	a float64 | ||||
| 	b float64 | ||||
| } | ||||
| type FilledSamples struct { | ||||
| 	xs []float64 | ||||
| } | ||||
| 
 | ||||
| func (p Scalar) Samples() []float64 { | ||||
| 	xs := make([]float64, N_SAMPLES) | ||||
| 	for i := 0; i < N_SAMPLES; i++ { | ||||
| 		xs[i] = p.p | ||||
| 		xs[i] = float64(p) | ||||
| 	} | ||||
| 	return xs | ||||
| } | ||||
| 
 | ||||
| type Lognormal struct { | ||||
| 	low  float64 | ||||
| 	high float64 | ||||
| } | ||||
| 
 | ||||
| func (ln Lognormal) Samples() []float64 { | ||||
| 	sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) } | ||||
| 	// return sample.Sample_parallel(sampler, N_SAMPLES)
 | ||||
| 	// Can't do parallel because then I'd have to await throughout the code
 | ||||
| 	return sample.Sample_serially(sampler, N_SAMPLES) | ||||
| } | ||||
| 
 | ||||
| type Beta struct { | ||||
| 	a float64 | ||||
| 	b float64 | ||||
| } | ||||
| 
 | ||||
| func (beta Beta) Samples() []float64 { | ||||
| 	sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) } | ||||
| 	// return sample.Sample_parallel(sampler, N_SAMPLES)
 | ||||
| 	return sample.Sample_serially(sampler, N_SAMPLES) | ||||
| } | ||||
| 
 | ||||
| type FilledSamples struct { | ||||
| 	xs []float64 | ||||
| } | ||||
| 
 | ||||
| func (fs FilledSamples) Samples() []float64 { | ||||
| 	return fs.xs | ||||
| } | ||||
|  | @ -80,16 +71,9 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { | |||
| 	var dist Dist | ||||
| 
 | ||||
| 	switch words[0] { | ||||
| 	case "*": | ||||
| 		op = "*" | ||||
| 	case "*", "/", "+", "-": | ||||
| 		op = words[0] | ||||
| 		words = words[1:] | ||||
| 	case "/": | ||||
| 		op = "/" | ||||
| 		words = words[1:] | ||||
| 	case "+": | ||||
| 		return parseLineErr("+ operation not implemented yet") | ||||
| 	case "-": | ||||
| 		return parseLineErr("- operation not implemented yet") | ||||
| 	default: | ||||
| 		op = "*" // later, change the below to
 | ||||
| 	} | ||||
|  | @ -104,7 +88,7 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { | |||
| 		case var_word_exists: | ||||
| 			dist = var_word | ||||
| 		case err1 == nil: | ||||
| 			dist = Lognormal{low: single_float, high: single_float} | ||||
| 			dist = Scalar(single_float) | ||||
| 		case err1 != nil && !var_word_exists: | ||||
| 			return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable") | ||||
| 		} | ||||
|  | @ -133,9 +117,6 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) { | |||
| 
 | ||||
| } | ||||
| 
 | ||||
| // Join distributions
 | ||||
| // Multiply lognormals
 | ||||
| 
 | ||||
| func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal { | ||||
| 	logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0 | ||||
| 	logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE) | ||||
|  | @ -157,10 +138,8 @@ func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta { | |||
| 	return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b} | ||||
| } | ||||
| 
 | ||||
| func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist { | ||||
| 	// dist2 = Beta{a: 1, b: 2}
 | ||||
| 	// fmt.Printf("dist1: %v\n", dist1)
 | ||||
| 	// fmt.Printf("dist2: %v\n", dist2)
 | ||||
| func operateAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) { | ||||
| 
 | ||||
| 	xs := dist1.Samples() | ||||
| 	ys := dist2.Samples() | ||||
| 	// fmt.Printf("xs: %v\n", xs)
 | ||||
|  | @ -168,11 +147,25 @@ func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist { | |||
| 	zs := make([]float64, N_SAMPLES) | ||||
| 
 | ||||
| 	for i := 0; i < N_SAMPLES; i++ { | ||||
| 		switch op { | ||||
| 		case "*": | ||||
| 			zs[i] = xs[i] * ys[i] | ||||
| 		case "/": | ||||
| 			if ys[0] != 0 { | ||||
| 				zs[i] = xs[i] / ys[i] | ||||
| 			} else { | ||||
| 				fmt.Println("Error: When dividing as samples, division by zero") | ||||
| 				return nil, errors.New("Division by zero") | ||||
| 			} | ||||
| 		case "+": | ||||
| 			zs[i] = xs[i] + ys[i] | ||||
| 		case "-": | ||||
| 			zs[i] = xs[i] - ys[i] | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// fmt.Printf("%v\n", zs)
 | ||||
| 	return FilledSamples{xs: zs} | ||||
| 	return FilledSamples{xs: zs}, nil | ||||
| } | ||||
| 
 | ||||
| func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { | ||||
|  | @ -184,23 +177,23 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { | |||
| 			case Lognormal: | ||||
| 				return multiplyLogDists(o, n), nil | ||||
| 			case Scalar: | ||||
| 				return multiplyLogDists(o, Lognormal{low: n.p, high: n.p}), nil | ||||
| 				return multiplyLogDists(o, Lognormal{low: float64(n), high: float64(n)}), nil | ||||
| 			default: | ||||
| 				return multiplyAsSamples(o, n), nil | ||||
| 				return operateAsSamples(old_dist, new_dist, "*") | ||||
| 			} | ||||
| 		} | ||||
| 	case Scalar: | ||||
| 		{ | ||||
| 			if o.p == 1 { | ||||
| 			if o == 1 { | ||||
| 				return new_dist, nil | ||||
| 			} | ||||
| 			switch n := new_dist.(type) { | ||||
| 			case Lognormal: | ||||
| 				return multiplyLogDists(Lognormal{low: o.p, high: o.p}, n), nil | ||||
| 				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, n), nil | ||||
| 			case Scalar: | ||||
| 				return Scalar{p: o.p * n.p}, nil | ||||
| 				return Scalar(float64(o) * float64(n)), nil | ||||
| 			default: | ||||
| 				return multiplyAsSamples(o, n), nil | ||||
| 				return operateAsSamples(old_dist, new_dist, "*") | ||||
| 			} | ||||
| 		} | ||||
| 	case Beta: | ||||
|  | @ -208,11 +201,40 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) { | |||
| 		case Beta: | ||||
| 			return multiplyBetaDists(o, n), nil | ||||
| 		default: | ||||
| 			return multiplyAsSamples(o, n), nil | ||||
| 			return operateAsSamples(old_dist, new_dist, "*") | ||||
| 		} | ||||
| 	default: | ||||
| 		return multiplyAsSamples(old_dist, new_dist), nil | ||||
| 		// return nil, errors.New("Can't multiply dists")
 | ||||
| 		return operateAsSamples(old_dist, new_dist, "*") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func divideDists(old_dist Dist, new_dist Dist) (Dist, error) { | ||||
| 
 | ||||
| 	switch o := old_dist.(type) { | ||||
| 	case Lognormal: | ||||
| 		{ | ||||
| 			switch n := new_dist.(type) { | ||||
| 			case Lognormal: | ||||
| 				return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil | ||||
| 			case Scalar: | ||||
| 				return multiplyLogDists(o, Lognormal{low: 1.0 / float64(n), high: 1.0 / float64(n)}), nil | ||||
| 			default: | ||||
| 				return operateAsSamples(old_dist, new_dist, "/") | ||||
| 			} | ||||
| 		} | ||||
| 	case Scalar: | ||||
| 		{ | ||||
| 			switch n := new_dist.(type) { | ||||
| 			case Lognormal: | ||||
| 				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil | ||||
| 			case Scalar: | ||||
| 				return Scalar(float64(o) / float64(n)), nil | ||||
| 			default: | ||||
| 				return operateAsSamples(old_dist, new_dist, "/") | ||||
| 			} | ||||
| 		} | ||||
| 	default: | ||||
| 		return operateAsSamples(old_dist, new_dist, "/") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  | @ -221,57 +243,47 @@ func joinDists(old_dist Dist, new_dist Dist, op string) (Dist, error) { | |||
| 	switch op { | ||||
| 	case "*": | ||||
| 		return multiplyDists(old_dist, new_dist) | ||||
| 	case "/": | ||||
| 		return divideDists(old_dist, new_dist) | ||||
| 	case "+": | ||||
| 		return operateAsSamples(old_dist, new_dist, "+") | ||||
| 	case "-": | ||||
| 		return operateAsSamples(old_dist, new_dist, "-") | ||||
| 	default: | ||||
| 		return old_dist, errors.New("Can't combine distributions in this way") | ||||
| 	} | ||||
| 	/* | ||||
| 		switch { | ||||
| 		case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "*": | ||||
| 			return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, new_dist.Lognormal), Samples: nil}, nil | ||||
| 		case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "/": | ||||
| 			tmp_dist := Lognormal{low: 1.0 / new_dist.Lognormal.high, high: 1.0 / new_dist.Lognormal.low} | ||||
| 			return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, tmp_dist), Samples: nil}, nil | ||||
| 		default: | ||||
| 			fmt.Printf("For now, can't do anything besides multiplying lognormals\n") | ||||
| 		} | ||||
| 	*/ | ||||
| 	// return old_dist, errors.New("Can't combine distributions in this way")
 | ||||
| } | ||||
| 
 | ||||
| /* Pretty print distributions */ | ||||
| func prettyPrintFloat(f float64) { | ||||
| 	switch { | ||||
| 	case math.Abs(f) >= 1_000_000_000_000: | ||||
| 		fmt.Printf("%.1fT", f/1_000_000_000_000) | ||||
| 	case math.Abs(f) >= 1_000_000_000: | ||||
| 		fmt.Printf("%.1fB", f/1_000_000_000) | ||||
| 	case math.Abs(f) >= 1_000_000: | ||||
| 		fmt.Printf("%.1fM", f/1_000_000) | ||||
| 	case math.Abs(f) >= 1_000: | ||||
| 		fmt.Printf("%.1fK", f/1_000) | ||||
| 
 | ||||
| 	case math.Abs(f) <= 0.0001: | ||||
| 		fmt.Printf("%.5f", f) | ||||
| 	case math.Abs(f) <= 0.001: | ||||
| 		fmt.Printf("%.4f", f) | ||||
| 	case math.Abs(f) <= 0.01: | ||||
| 		fmt.Printf("%.3f", f) | ||||
| 	case math.Abs(f) <= 0.1: | ||||
| 		fmt.Printf("%.2f", f) | ||||
| 	default: | ||||
| 		fmt.Printf("%.1f", f) | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
| func prettyPrint2Floats(low float64, high float64) { | ||||
| 	// fmt.Printf("=> %.1f %.1f\n", low, high)
 | ||||
| 	switch { | ||||
| 	case math.Abs(low) >= 1_000_000_000_000: | ||||
| 		fmt.Printf("%.1fT", low/1_000_000_000_000) | ||||
| 	case math.Abs(low) >= 1_000_000_000: | ||||
| 		fmt.Printf("%.1fB", low/1_000_000_000) | ||||
| 	case math.Abs(low) >= 1_000_000: | ||||
| 		fmt.Printf("%.1fM", low/1_000_000) | ||||
| 	case math.Abs(low) >= 1_000: | ||||
| 		fmt.Printf("%.1fK", low/1_000) | ||||
| 	case math.Abs(low) >= 1_000: | ||||
| 		fmt.Printf("%.1fK", low/1_000) | ||||
| 	default: | ||||
| 		fmt.Printf("%.1f", low) | ||||
| 	} | ||||
| 	prettyPrintFloat(low) | ||||
| 	fmt.Printf(" ") | ||||
| 	switch { | ||||
| 	case math.Abs(high) >= 1_000_000_000_000: | ||||
| 		fmt.Printf("%.1fT", high/1_000_000_000_000) | ||||
| 	case math.Abs(high) >= 1_000_000_000: | ||||
| 		fmt.Printf("%.1fB", high/1_000_000_000) | ||||
| 	case math.Abs(high) >= 1_000_000: | ||||
| 		fmt.Printf("%.1fM", high/1_000_000) | ||||
| 	case math.Abs(high) >= 1_000: | ||||
| 		fmt.Printf("%.1fK", high/1_000) | ||||
| 	case math.Abs(high) >= 1_000: | ||||
| 		fmt.Printf("%.1fK", high/1_000) | ||||
| 	default: | ||||
| 		fmt.Printf("%.1f", high) | ||||
| 	} | ||||
| 	prettyPrintFloat(high) | ||||
| 	fmt.Printf("\n") | ||||
| 	// fmt.Printf("=> %.1f %.1f\n", low, high)
 | ||||
| } | ||||
| 
 | ||||
| func prettyPrintDist(dist Dist) { | ||||
|  | @ -293,6 +305,11 @@ func prettyPrintDist(dist Dist) { | |||
| 	case Beta: | ||||
| 		fmt.Printf("=> beta ") | ||||
| 		prettyPrint2Floats(v.a, v.b) | ||||
| 	case Scalar: | ||||
| 		fmt.Printf("=> scalar ") | ||||
| 		w := float64(v) | ||||
| 		prettyPrintFloat(w) | ||||
| 		fmt.Println() | ||||
| 	default: | ||||
| 		fmt.Printf("%v", v) | ||||
| 	} | ||||
|  | @ -303,7 +320,7 @@ func main() { | |||
| 
 | ||||
| 	reader := bufio.NewReader(os.Stdin) | ||||
| 	var init_dist Dist | ||||
| 	init_dist = Scalar{p: 1} // Lognormal{low: 1, high: 1}
 | ||||
| 	init_dist = Scalar(1) // Lognormal{low: 1, high: 1}
 | ||||
| 	old_dist := init_dist | ||||
| 	vars := make(map[string]Dist) | ||||
| 	// Could eventually be a more complex struct with:
 | ||||
|  |  | |||
|  | @ -9,6 +9,8 @@ import rand "math/rand/v2" | |||
| type Src = *rand.Rand | ||||
| type func64 = func(Src) float64 | ||||
| 
 | ||||
| var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2))) | ||||
| 
 | ||||
| func Sample_unit_uniform(r Src) float64 { | ||||
| 	return r.Float64() | ||||
| } | ||||
|  | @ -138,10 +140,10 @@ func Sample_mixture(fs []func64, weights []float64, r Src) float64 { | |||
| } | ||||
| 
 | ||||
| func Sample_serially(f func64, n_samples int) []float64 { | ||||
| 	var r = rand.New(rand.NewPCG(uint64(1), uint64(2))) | ||||
| 	xs := make([]float64, n_samples) | ||||
| 	// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
 | ||||
| 	for i := 0; i < n_samples; i++ { | ||||
| 		xs[i] = f(r) | ||||
| 		xs[i] = f(global_r) | ||||
| 	} | ||||
| 	return xs | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user