fix correlation problem by using global variable

fengshui
add code to multiply beta distributions
2024-06-10 03:08:10 +02:00 · 2024-06-10 01:13:17 +02:00 · 2024-06-10 01:12:02 +02:00 · 2024-06-10 00:51:05 +02:00 · 2024-06-10 00:24:06 +02:00 · 2024-06-10 00:05:23 +02:00
4 changed files with 585 additions and 190 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # A minimalist calculator for fermi estimation

-This project contains a minimalist command-line calculator for Fermi estimation. For now, it just multiplies lognormals.
+This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions.

 ## Motivation

@ -102,6 +102,34 @@ Conceptually clearer to have all the multiplications first and then all the divi
 - [x] Add show more info version
 - [x] Scalar multiplication and division
 - [ ] Program into a small device, like a calculator?
- [ ] Think of some way of calling bc
- [ ] Think how to integrate with squiggle.c to draw samples
+- [-] Think of some way of calling bc
+- [x] Think how to integrate with squiggle.c to draw samples
+  - [x] Copy the time to botec go code
+  - [x] Define samplers
+  - [x] Call those samplers when operating on distributions that can't be operted on algebraically
 - [ ] Think about how to draw a histogram from samples
+- [x] Display output more nicely, with K/M/B/T
+- [x] Consider the following: make this into a stack-based DSL, with:
+  - [x] Variables that can be saved to and then displayed
+  - [x] Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though.
+- [x] Figure out go syntax for
+  - Maps
+  - Joint types
+  - Enums
+- [ ] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done.
+
+Some possible syntax for a more expressive stack-based DSL
+
+```
+1B to 20B
+* 1 to 100
+/ beta 1 2 # or b 1 2
+=: x # content of the stack at this point saved into x
+
+1 to 10
+10 to 100
+=: y # content of the stack at this point saved into y
+
+x # put x on the stack
+- y # substract y from the content of the stack. Requires interpreting x and y as list of samples
+```
--- a/f.go
+++ b/f.go
@ -2,229 +2,386 @@ package main

 import (
 	"bufio"
+	"errors"
 	"fmt"
+	"git.nunosempere.com/NunoSempere/fermi/sample"
 	"math"
 	"os"
+	"sort"
 	"strconv"
 	"strings"
 )

 const NORMAL90CONFIDENCE = 1.6448536269514727
+const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit"
+const N_SAMPLES = 1_000_000

-func boundsToLogParams(low float64, high float64) (float64, float64) {
-	loglow := math.Log(low)
-	loghigh := math.Log(high)
-	logmean := (loghigh + loglow) / 2.0
-	logstd := (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE)
-	return logmean, logstd
+// Distribution interface
+// https://go.dev/tour/methods/9

+type Dist interface {
+	Samples() []float64
+}
+type Scalar float64
+type Lognormal struct {
+	low  float64
+	high float64
+}
+type Beta struct {
+	a float64
+	b float64
+}
+type FilledSamples struct {
+	xs []float64
 }

-func multiplyLognormals(logmean1 float64, logstd1 float64, logmean2 float64, logstd2 float64) (float64, float64) {
-	return logmean1 + logmean2, math.Sqrt(logstd1*logstd1 + logstd2*logstd2)
-}
-
-func logParamsToBounds(logmean float64, logstd float64) (float64, float64) {
-	h := logstd * NORMAL90CONFIDENCE
-	loglow := logmean - h
-	loghigh := logmean + h
-	return math.Exp(loglow), math.Exp(loghigh)
-}
-
-func combineBounds(old_low, old_high, new_low, new_high float64) (float64, float64) {
-	logmean_old, logstd_old := boundsToLogParams(old_low, old_high)
-	logmean_new, logstd_new := boundsToLogParams(new_low, new_high)
-
-	logmean_product, logstd_product := multiplyLognormals(logmean_old, logstd_old, logmean_new, logstd_new)
-
-	return logParamsToBounds(logmean_product, logstd_product)
-}
-
-func prettyPrintDist(low float64, high float64) {
-	// fmt.Printf("=> %.1f %.1f\n", low, high)
-	fmt.Printf("=> ")
-	switch {
-	case math.Abs(low) >= 1_000_000_000_000:
-		fmt.Printf("%.1fT", low/1_000_000_000_000)
-	case math.Abs(low) >= 1_000_000_000:
-		fmt.Printf("%.1fB", low/1_000_000_000)
-	case math.Abs(low) >= 1_000_000:
-		fmt.Printf("%.1fM", low/1_000_000)
-	case math.Abs(low) >= 1_000:
-		fmt.Printf("%.1fK", low/1_000)
-	case math.Abs(low) >= 1_000:
-		fmt.Printf("%.1fK", low/1_000)
-	default:
-		fmt.Printf("%.1f", low)
+func (p Scalar) Samples() []float64 {
+	xs := make([]float64, N_SAMPLES)
+	for i := 0; i < N_SAMPLES; i++ {
+		xs[i] = float64(p)
 	}
-	fmt.Printf(" ")
-	switch {
-	case math.Abs(high) >= 1_000_000_000_000:
-		fmt.Printf("%.1fT", high/1_000_000_000_000)
-	case math.Abs(high) >= 1_000_000_000:
-		fmt.Printf("%.1fB", high/1_000_000_000)
-	case math.Abs(high) >= 1_000_000:
-		fmt.Printf("%.1fM", high/1_000_000)
-	case math.Abs(high) >= 1_000:
-		fmt.Printf("%.1fK", high/1_000)
-	case math.Abs(high) >= 1_000:
-		fmt.Printf("%.1fK", high/1_000)
-	default:
-		fmt.Printf("%.1f", high)
-	}
-	fmt.Printf("\n")
-	// fmt.Printf("=> %.1f %.1f\n", low, high)
+	return xs
+}
+func (ln Lognormal) Samples() []float64 {
+	sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) }
+	// return sample.Sample_parallel(sampler, N_SAMPLES)
+	// Can't do parallel because then I'd have to await throughout the code
+	return sample.Sample_serially(sampler, N_SAMPLES)
+}
+func (beta Beta) Samples() []float64 {
+	sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
+	// return sample.Sample_parallel(sampler, N_SAMPLES)
+	return sample.Sample_serially(sampler, N_SAMPLES)
+}
+func (fs FilledSamples) Samples() []float64 {
+	return fs.xs
 }

-func main() {
-	reader := bufio.NewReader(os.Stdin)
+// Parse line into Distribution
+func parseLineErr(err_msg string) (string, Dist, error) {
+	fmt.Println(GENERAL_ERR_MSG)
+	fmt.Println(err_msg)
+	var errorDist Dist
+	return "", errorDist, errors.New(err_msg)
+}
+func parseLine(line string, vars map[string]Dist) (string, Dist, error) {

-	var old_low, old_high float64
-	var input string
-	var err1, err2 error
+	words := strings.Split(strings.TrimSpace(line), " ")
+	op := ""
+	var dist Dist

-InitialForLoop:
-	for {
-		input, _ = reader.ReadString('\n')
-		input = strings.TrimSpace(input)
-		words := strings.Split(input, " ")
+	switch words[0] {
+	case "*", "/", "+", "-":
+		op = words[0]
+		words = words[1:]
+	default:
+		op = "*" // later, change the below to
+	}

-		switch len(words) {
-		case 1:
-			single_float, err1 := strconv.ParseFloat(words[0], 64)
-			if err1 != nil {
-				fmt.Println("Trying to initialize with a scalar, but scalar is not a float")
-				continue InitialForLoop
-			}
-			old_low = single_float
-			old_high = single_float
-		case 2:
-			old_low, err1 = strconv.ParseFloat(words[0], 64)
-			old_high, err2 = strconv.ParseFloat(words[1], 64)
-			if err1 != nil || err2 != nil {
-				fmt.Println("Trying to initialize with a distribution, but distribution is not specified as two floats")
-				continue InitialForLoop
-			}
-		default:
-			fmt.Println("Please enter two floats separated by a space, like: 1 10")
-			continue InitialForLoop
+	switch len(words) {
+	case 0:
+		return parseLineErr("Operator must have operand; can't operate on nothing")
+	case 1:
+		var_word, var_word_exists := vars[words[0]]
+		single_float, err1 := strconv.ParseFloat(words[0], 64) // abstract this away to search for K/M/B/T/etc.
+		switch {
+		case var_word_exists:
+			dist = var_word
+		case err1 == nil:
+			dist = Scalar(single_float)
+		case err1 != nil && !var_word_exists:
+			return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable")
 		}
+	case 2:
+		new_low, err1 := strconv.ParseFloat(words[0], 64)
+		new_high, err2 := strconv.ParseFloat(words[1], 64)
 		if err1 != nil || err2 != nil {
-			fmt.Println("Please enter two floats separated by a space, like: 1 10")
-			continue
+			return parseLineErr("Trying to operate by a distribution, but distribution is not specified as two floats")
 		}
-		break
+		dist = Lognormal{low: new_low, high: new_high}
+	case 3:
+		if words[0] == "beta" || words[0] == "b" {
+			a, err1 := strconv.ParseFloat(words[1], 64)
+			b, err2 := strconv.ParseFloat(words[2], 64)
+			if err1 != nil || err2 != nil {
+				return parseLineErr("Trying to specify a beta distribution? Try beta 1 2")
+			}
+			dist = Beta{a: a, b: b}
+		} else {
+			return parseLineErr("Input not understood or not implemented yet")
+		}
+	default:
+		return parseLineErr("Input not understood or not implemented yet")
 	}
-	prettyPrintDist(old_low, old_high)
+	return op, dist, nil

-	error_msg_cont := "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || i || e"
+}
+
+func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal {
+	logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0
+	logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE)
+
+	logmean2 := (math.Log(l2.high) + math.Log(l2.low)) / 2.0
+	logstd2 := (math.Log(l2.high) - math.Log(l2.low)) / (2.0 * NORMAL90CONFIDENCE)
+
+	logmean_product := logmean1 + logmean2
+	logstd_product := math.Sqrt(logstd1*logstd1 + logstd2*logstd2)
+
+	h := logstd_product * NORMAL90CONFIDENCE
+	loglow := logmean_product - h
+	loghigh := logmean_product + h
+	return Lognormal{low: math.Exp(loglow), high: math.Exp(loghigh)}
+
+}
+
+func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta {
+	return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b}
+}
+
+func operateAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) {
+
+	xs := dist1.Samples()
+	ys := dist2.Samples()
+	// fmt.Printf("xs: %v\n", xs)
+	// fmt.Printf("ys: %v\n", ys)
+	zs := make([]float64, N_SAMPLES)
+
+	for i := 0; i < N_SAMPLES; i++ {
+		switch op {
+		case "*":
+			zs[i] = xs[i] * ys[i]
+		case "/":
+			if ys[0] != 0 {
+				zs[i] = xs[i] / ys[i]
+			} else {
+				fmt.Println("Error: When dividing as samples, division by zero")
+				return nil, errors.New("Division by zero")
+			}
+		case "+":
+			zs[i] = xs[i] + ys[i]
+		case "-":
+			zs[i] = xs[i] - ys[i]
+		}
+	}
+
+	// fmt.Printf("%v\n", zs)
+	return FilledSamples{xs: zs}, nil
+}
+
+func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
+
+	switch o := old_dist.(type) {
+	case Lognormal:
+		{
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(o, n), nil
+			case Scalar:
+				return multiplyLogDists(o, Lognormal{low: float64(n), high: float64(n)}), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "*")
+			}
+		}
+	case Scalar:
+		{
+			if o == 1 {
+				return new_dist, nil
+			}
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, n), nil
+			case Scalar:
+				return Scalar(float64(o) * float64(n)), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "*")
+			}
+		}
+	case Beta:
+		switch n := new_dist.(type) {
+		case Beta:
+			return multiplyBetaDists(o, n), nil
+		default:
+			return operateAsSamples(old_dist, new_dist, "*")
+		}
+	default:
+		return operateAsSamples(old_dist, new_dist, "*")
+	}
+}
+
+func divideDists(old_dist Dist, new_dist Dist) (Dist, error) {
+
+	switch o := old_dist.(type) {
+	case Lognormal:
+		{
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
+			case Scalar:
+				return multiplyLogDists(o, Lognormal{low: 1.0 / float64(n), high: 1.0 / float64(n)}), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "/")
+			}
+		}
+	case Scalar:
+		{
+			switch n := new_dist.(type) {
+			case Lognormal:
+				return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
+			case Scalar:
+				return Scalar(float64(o) / float64(n)), nil
+			default:
+				return operateAsSamples(old_dist, new_dist, "/")
+			}
+		}
+	default:
+		return operateAsSamples(old_dist, new_dist, "/")
+	}
+}
+
+func joinDists(old_dist Dist, new_dist Dist, op string) (Dist, error) {
+
+	switch op {
+	case "*":
+		return multiplyDists(old_dist, new_dist)
+	case "/":
+		return divideDists(old_dist, new_dist)
+	case "+":
+		return operateAsSamples(old_dist, new_dist, "+")
+	case "-":
+		return operateAsSamples(old_dist, new_dist, "-")
+	default:
+		return old_dist, errors.New("Can't combine distributions in this way")
+	}
+}
+
+/* Pretty print distributions */
+func prettyPrintFloat(f float64) {
+	switch {
+	case math.Abs(f) >= 1_000_000_000_000:
+		fmt.Printf("%.1fT", f/1_000_000_000_000)
+	case math.Abs(f) >= 1_000_000_000:
+		fmt.Printf("%.1fB", f/1_000_000_000)
+	case math.Abs(f) >= 1_000_000:
+		fmt.Printf("%.1fM", f/1_000_000)
+	case math.Abs(f) >= 1_000:
+		fmt.Printf("%.1fK", f/1_000)
+
+	case math.Abs(f) <= 0.0001:
+		fmt.Printf("%.5f", f)
+	case math.Abs(f) <= 0.001:
+		fmt.Printf("%.4f", f)
+	case math.Abs(f) <= 0.01:
+		fmt.Printf("%.3f", f)
+	case math.Abs(f) <= 0.1:
+		fmt.Printf("%.2f", f)
+	default:
+		fmt.Printf("%.1f", f)
+	}
+
+}
+func prettyPrint2Floats(low float64, high float64) {
+	prettyPrintFloat(low)
+	fmt.Printf(" ")
+	prettyPrintFloat(high)
+	fmt.Printf("\n")
+}
+
+func prettyPrintDist(dist Dist) {
+	switch v := dist.(type) {
+	case Lognormal:
+		fmt.Printf("=> ")
+		prettyPrint2Floats(v.low, v.high)
+	case FilledSamples:
+		tmp_xs := make([]float64, N_SAMPLES)
+		copy(tmp_xs, v.xs)
+		sort.Slice(tmp_xs, func(i, j int) bool {
+			return tmp_xs[i] < tmp_xs[j]
+		})
+		low_int := N_SAMPLES / 20
+		low := tmp_xs[low_int]
+		high_int := N_SAMPLES * 19 / 20
+		high := tmp_xs[high_int]
+		prettyPrint2Floats(low, high)
+	case Beta:
+		fmt.Printf("=> beta ")
+		prettyPrint2Floats(v.a, v.b)
+	case Scalar:
+		fmt.Printf("=> scalar ")
+		w := float64(v)
+		prettyPrintFloat(w)
+		fmt.Println()
+	default:
+		fmt.Printf("%v", v)
+	}
+}
+
+/* Main event loop */
+func main() {
+
+	reader := bufio.NewReader(os.Stdin)
+	var init_dist Dist
+	init_dist = Scalar(1) // Lognormal{low: 1, high: 1}
+	old_dist := init_dist
+	vars := make(map[string]Dist)
+	// Could eventually be a more complex struct with:
+	// { Dist, VariableMaps, ConfigParams } or smth
 EventForLoop:
 	for {
-		input, _ = reader.ReadString('\n')
+		input, _ := reader.ReadString('\n')
 		if strings.TrimSpace(input) == "" {
 			continue EventForLoop
 		}
-		words := strings.Split(strings.TrimSpace(input), " ")

-		var new_low, new_high float64
-
-		switch words[0] {
-		case "*":
-			switch len(words) {
-			case 1:
-				fmt.Println("Can't multiply by nothing")
-				fmt.Println(error_msg_cont)
+		{
+			words := strings.Split(strings.TrimSpace(input), " ")
+			switch {
+			case words[0] == "exit" || words[0] == "e":
+				break EventForLoop
+			case words[0] == "help" || words[0] == "h":
+				fmt.Println(GENERAL_ERR_MSG)
 				continue EventForLoop
-			case 2:
-				single_float, err1 := strconv.ParseFloat(words[1], 64)
-				if err1 != nil {
-					fmt.Println("Trying to multiply by a scalar, but scalar is not a float")
-					fmt.Println(error_msg_cont)
-					continue EventForLoop
-				}
-				new_low = single_float
-				new_high = single_float
-			case 3:
-				new_low, err1 = strconv.ParseFloat(words[1], 64)
-				new_high, err2 = strconv.ParseFloat(words[2], 64)
-				if err1 != nil || err2 != nil {
-					fmt.Println(error_msg_cont)
-					fmt.Println("Trying to multiply by a distribution, but distribution is not specified as two floats")
-					continue EventForLoop
-				}
-			default:
-				fmt.Println("Trying to multiply by something, but this something is neither a scalar nor a distribution")
-				fmt.Println(error_msg_cont)
+			case words[0] == "debug" || words[0] == "d":
+				fmt.Printf("Old dist: %v\n", old_dist)
+				fmt.Printf("Vars: %v\n", vars)
 				continue EventForLoop
-			}
-		case "/":
-			switch len(words) {
-			case 1:
-				fmt.Println("Can't divide by nothing")
-				fmt.Println(error_msg_cont)
+			case words[0] == "=:" && len(words) == 2:
+				vars[words[1]] = old_dist
+				fmt.Printf("%s ", words[1])
+				prettyPrintDist(old_dist)
 				continue EventForLoop
-			case 2:
-				single_float, err1 := strconv.ParseFloat(words[1], 64)
-				if err1 != nil {
-					fmt.Println("Trying to divide by a scalar, but scalar is not a float")
-					fmt.Println(error_msg_cont)
-					continue EventForLoop
-				}
-				new_low = 1.0 / single_float
-				new_high = 1.0 / single_float
-			case 3:
-				new_low, err1 = strconv.ParseFloat(words[1], 64)
-				new_high, err2 = strconv.ParseFloat(words[2], 64)
-				if err1 != nil || err2 != nil {
-					fmt.Println("Trying to divide by a distribution, but distribution is not specified as two floats")
-					fmt.Println(error_msg_cont)
-					continue EventForLoop
-				}
-				tmp := new_low
-				new_low = 1.0 / new_high
-				new_high = 1.0 / tmp
-			default:
-				fmt.Println("Trying to divide by something, but this something is neither a scalar nor a distribution")
-			}
-		default:
-			switch len(words) {
-			case 0:
+			case words[0] == "." || words[0] == "clean" || words[0] == "c":
+				old_dist = init_dist
+				fmt.Println()
 				continue EventForLoop
-			case 1:
-				switch words[0] {
-				case "i":
-					fmt.Printf("=> %.1f %.1f\n", old_low, old_high)
-					logmean_old, logstd_old := boundsToLogParams(old_low, old_high)
-					fmt.Printf("=> Lognormal, with logmean: %.1f, logstd: %.1f\n", logmean_old, logstd_old)
-					continue EventForLoop
-				case "e":
-					break EventForLoop
-				default:
-					single_float, err1 := strconv.ParseFloat(words[0], 64)
-					if err1 != nil {
-						fmt.Println("Unrecognized command")
-						fmt.Println(error_msg_cont)
-						continue EventForLoop
-					}
-					new_low = single_float
-					new_high = single_float
-				}
-			case 2:
-				new_low, err1 = strconv.ParseFloat(words[0], 64)
-				new_high, err2 = strconv.ParseFloat(words[1], 64)
-				if err1 != nil || err2 != nil {
-					fmt.Println("Trying to multiply by a distribution, but distribution is not specified as two floats")
-					fmt.Println(error_msg_cont)
-					continue EventForLoop
-				}
-			default:
-				fmt.Println("No operation takes more than 3 words")
-				fmt.Println(error_msg_cont)
+			case words[0] == "=." && len(words) == 2:
+				vars[words[1]] = old_dist
+				fmt.Printf("%s ", words[1])
+				prettyPrintDist(old_dist)
+				old_dist = init_dist
+				fmt.Println()
 				continue EventForLoop
+				// Other possible cases:
+				// Save to file
+				// Sample n samples
+				// Save stack to a variable?
+				// clean stack
+				// Define a function? No, too much of a nerdsnipea
 			}
 		}

-		old_low, old_high = combineBounds(old_low, old_high, new_low, new_high)
-		prettyPrintDist(old_low, old_high)
+		op, new_dist, err := parseLine(input, vars)
+		if err != nil {
+			continue EventForLoop
+		}
+
+		joint_dist, err := joinDists(old_dist, new_dist, op)
+		if err != nil {
+			fmt.Printf("%v\n", err)
+			fmt.Printf("Dist on stack: ")
+			prettyPrintDist(old_dist)
+			continue EventForLoop
+		}
+		old_dist = joint_dist
+		prettyPrintDist(old_dist)
 	}
 }
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
+module git.nunosempere.com/NunoSempere/fermi
+
+go 1.22.1
--- a/sample/sample.go
+++ b/sample/sample.go
@ -0,0 +1,207 @@
+package sample
+
+import "math"
+import "sync"
+import rand "math/rand/v2"
+
+// https://pkg.go.dev/math/rand/v2
+
+type Src = *rand.Rand
+type func64 = func(Src) float64
+
+var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+
+func Sample_unit_uniform(r Src) float64 {
+	return r.Float64()
+}
+
+func Sample_unit_normal(r Src) float64 {
+	return r.NormFloat64()
+}
+
+func Sample_uniform(start float64, end float64, r Src) float64 {
+	return Sample_unit_uniform(r)*(end-start) + start
+}
+
+func Sample_normal(mean float64, sigma float64, r Src) float64 {
+	return mean + Sample_unit_normal(r)*sigma
+}
+
+func Sample_lognormal(logmean float64, logstd float64, r Src) float64 {
+	return (math.Exp(Sample_normal(logmean, logstd, r)))
+}
+
+func Sample_normal_from_90_ci(low float64, high float64, r Src) float64 {
+	var normal90 float64 = 1.6448536269514727
+	var mean float64 = (high + low) / 2.0
+	var std float64 = (high - low) / (2.0 * normal90)
+	return Sample_normal(mean, std, r)
+}
+
+func Sample_to(low float64, high float64, r Src) float64 {
+	// Given a (positive) 90% confidence interval,
+	// returns a sample from a lognorma with a matching 90% c.i.
+	// Key idea: If we want a lognormal with 90% confidence interval [a, b]
+	// we need but get a normal with 90% confidence interval [log(a), log(b)].
+	// Then see code for Sample_normal_from_90_ci
+	var loglow float64 = math.Log(low)
+	var loghigh float64 = math.Log(high)
+	return math.Exp(Sample_normal_from_90_ci(loglow, loghigh, r))
+}
+
+func Sample_gamma(alpha float64, r Src) float64 {
+
+	// a simple method for generating gamma variables, marsaglia and wan tsang, 2001
+	// https://dl.acm.org/doi/pdf/10.1145/358407.358414
+	// see also the references/ folder
+	// note that the wikipedia page for the gamma distribution includes a scaling parameter
+	// k or beta
+	// https://en.wikipedia.org/wiki/gamma_distribution
+	// such that gamma_k(alpha, k) = k * gamma(alpha)
+	// or gamma_beta(alpha, beta) = gamma(alpha) / beta
+	// so far i have not needed to use this, and thus the second parameter is by default 1.
+
+	if alpha >= 1 {
+		var d, c, x, v, u float64
+		d = alpha - 1.0/3.0
+		c = 1.0 / math.Sqrt(9.0*d)
+
+		for {
+
+		InnerLoop:
+			for {
+				x = Sample_unit_normal(r)
+				v = 1.0 + c*x
+				if v > 0.0 {
+					break InnerLoop
+				}
+			}
+
+			v = v * v * v
+			u = Sample_unit_uniform(r)
+
+			if u < 1.0-0.0331*(x*x*x*x) { // Condition 1
+				// the 0.0331 doesn't inspire much confidence
+				// however, this isn't the whole story
+				// by knowing that Condition 1 implies condition 2
+				// we realize that this is just a way of making the algorithm faster
+				// i.e., of not using the logarithms
+				return d * v
+			}
+			if math.Log(u) < 0.5*(x*x)+d*(1.0-v+math.Log(v)) { // Condition 2
+				return d * v
+			}
+
+		}
+
+	} else {
+		return Sample_gamma(1.0+alpha, r) * math.Pow(Sample_unit_uniform(r), 1.0/alpha)
+	}
+}
+
+func Sample_beta(a float64, b float64, r Src) float64 {
+	gamma_a := Sample_gamma(a, r)
+	gamma_b := Sample_gamma(b, r)
+	return gamma_a / (gamma_a + gamma_b)
+}
+
+func Sample_mixture(fs []func64, weights []float64, r Src) float64 {
+
+	// fmt.Println("weights initially: ", weights)
+	var sum_weights float64 = 0
+	for _, weight := range weights {
+		sum_weights += weight
+	}
+
+	var total float64 = 0
+	var cumsummed_normalized_weights = append([]float64(nil), weights...)
+	for i, weight := range weights {
+		total += weight / sum_weights
+		cumsummed_normalized_weights[i] = total
+	}
+
+	var result float64
+	var flag int = 0
+	var p float64 = r.Float64()
+
+	for i, cnw := range cumsummed_normalized_weights {
+		if p < cnw {
+			result = fs[i](r)
+			flag = 1
+			break
+		}
+	}
+
+	if flag == 0 {
+		result = fs[len(fs)-1](r)
+	}
+	return result
+
+}
+
+func Sample_serially(f func64, n_samples int) []float64 {
+	xs := make([]float64, n_samples)
+	// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+	for i := 0; i < n_samples; i++ {
+		xs[i] = f(global_r)
+	}
+	return xs
+}
+
+func Sample_parallel(f func64, n_samples int) []float64 {
+	var num_threads = 16
+	var xs = make([]float64, n_samples)
+	var wg sync.WaitGroup
+	var h = n_samples / num_threads
+	wg.Add(num_threads)
+	for i := range num_threads {
+		var xs_i = xs[i*h : (i+1)*h]
+		go func(f func64) {
+			defer wg.Done()
+			var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))
+			for i := range xs_i {
+				xs_i[i] = f(r)
+			}
+		}(f)
+	}
+
+	wg.Wait()
+	return xs
+
+}
+
+/*
+func main() {
+
+	var p_a float64 = 0.8
+	var p_b float64 = 0.5
+	var p_c float64 = p_a * p_b
+	ws := [4](float64){1 - p_c, p_c / 2, p_c / 4, p_c / 4}
+
+	Sample_0 := func(r Src) float64 { return 0 }
+	Sample_1 := func(r Src) float64 { return 1 }
+	Sample_few := func(r Src) float64 { return Sample_to(1, 3, r) }
+	Sample_many := func(r Src) float64 { return Sample_to(2, 10, r) }
+	fs := [4](func64){Sample_0, Sample_1, Sample_few, Sample_many}
+
+	model := func(r Src) float64 { return Sample_mixture(fs[0:], ws[0:], r) }
+	n_samples := 1_000_000
+	xs := Sample_parallel(model, n_samples)
+	var avg float64 = 0
+	for _, x := range xs {
+		avg += x
+	}
+	avg = avg / float64(n_samples)
+	fmt.Printf("Average: %v\n", avg)
+	/*
+		  // Without concurrency:
+			n_samples := 1_000_000
+			var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+			var avg float64 = 0
+			for i := 0; i < n_samples; i++ {
+				avg += Sample_mixture(fs[0:], ws[0:], r)
+			}
+			avg = avg / float64(n_samples)
+			fmt.Printf("Average: %v\n", avg)
+}
+*/
Author	SHA1	Message	Date
NunoSempere	a4263d0765	fix correlation problem by using global variable	2024-06-10 03:08:10 +02:00
NunoSempere	5b52cf3297	fengshui	2024-06-10 01:13:17 +02:00
NunoSempere	b0f48286d5	add code to multiply beta distributions	2024-06-10 01:12:02 +02:00
NunoSempere	6cf0b3f9d5	continue filling up DSL code	2024-06-10 00:51:05 +02:00
NunoSempere	0e9ef33b8e	switch on type using new distribution interface	2024-06-10 00:24:06 +02:00
NunoSempere	8b1792f861	type wranging	2024-06-10 00:05:23 +02:00
NunoSempere	b0adde937f	savepoint	2024-06-09 23:35:36 +02:00
NunoSempere	318b8da414	add beta sampler and implement beta type conforming to distribution	2024-06-09 23:08:15 +02:00
NunoSempere	d57300e8ce	add implementation for gamma distribution	2024-06-09 23:01:49 +02:00
NunoSempere	5f1188c516	add type for filled samples & beta distribution	2024-06-09 22:48:43 +02:00
NunoSempere	6a68bfdc3b	fix types	2024-06-09 22:46:08 +02:00
NunoSempere	3f0bcf0e03	starting adding interfaces + use go mod to allow for imports	2024-06-09 22:27:49 +02:00
NunoSempere	313b744100	delete old parser	2024-06-09 15:25:50 +02:00
NunoSempere	110d6b6dc6	f2 to f, f to f0	2024-06-09 15:17:14 +02:00
NunoSempere	ae6ad02b56	fengshui tweaks	2024-06-09 15:14:44 +02:00
NunoSempere	84a195a29f	add variables!	2024-06-09 14:48:53 +02:00
NunoSempere	ca8841f604	rename to sample.go	2024-06-09 14:21:30 +02:00
NunoSempere	8af908dd84	add samplers from time to botec code	2024-06-09 14:21:01 +02:00
NunoSempere	ce399ac24e	reach feature parity with past parser	2024-06-09 14:08:19 +02:00
NunoSempere	5bb015ab69	simplify lognormal multiplication code	2024-06-09 14:00:30 +02:00
NunoSempere	3bb705d13d	get mvp of new parser!	2024-06-09 13:46:51 +02:00
NunoSempere	29ea338112	continue with new parser	2024-06-09 13:43:03 +02:00
NunoSempere	de72432c53	continue with new parser, steps now seem way more clear	2024-06-09 13:18:09 +02:00
NunoSempere	f5e7148148	fix compiler errors	2024-06-09 12:59:47 +02:00
NunoSempere	dfb9f3cc25	add some notion of what a more powerful parser could look like	2024-06-09 12:58:16 +02:00
NunoSempere	09583a312c	add spec for more expressive DSL	2024-06-03 09:28:16 +02:00