simplify fermi.go again

continue reflecting about mixtures & stuff
get some speed gains for mixtures thru a bit more complexity
2024-12-24 17:05:53 +01:00 · 2024-12-24 16:50:08 +01:00 · 2024-12-24 16:35:55 +01:00 · 2024-12-24 16:32:17 +01:00 · 2024-12-24 16:11:37 +01:00 · 2024-12-24 15:51:31 +01:00
7 changed files with 215 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -193,11 +193,13 @@ Done:
 - [x] Make -n flag work
 - [x] Add flag to repeat input lines (useful when reading from files)
 - [x] Add percentages
+- [x] Consider adding an understanding of percentages

 To (possibly) do:

+- [ ] Consider implications of sampling strategy for operating variables in this case.
+- [ ] Document mixture distributions
 - [ ] Fix lognormal multiplication and division by 0 or < 0
- [ ] Consider adding an understanding of percentages
 - [ ] With the -f command line option, the program doesn't read from stdin after finishing reading the file
 - [ ] Add functions. Now easier to do with an explicit representation of the stakc
 - [ ] Think about how to draw a histogram from samples
--- a/BIN
+++ b/BIN
--- a/fermi.go
+++ b/fermi.go
@ -20,7 +20,7 @@ type Stack struct {
 }

 type Dist interface {
-	Samples() []float64
+	Sampler(int, sample.State) float64
 }

 type Scalar float64
@ -41,27 +41,23 @@ type FilledSamples struct {

 /* Dist interface functions */
 // https://go.dev/tour/methods/9
-func (p Scalar) Samples() []float64 {
-	xs := make([]float64, N_SAMPLES)
-	for i := 0; i < N_SAMPLES; i++ {
-		xs[i] = float64(p)
-	}
-	return xs
+func (p Scalar) Sampler(i int, r sample.State) float64 {
+	return float64(p)
 }

-func (ln Lognormal) Samples() []float64 {
-	sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) }
-	// Can't do parallel because then I'd have to await throughout the code
-	return sample.Sample_serially(sampler, N_SAMPLES)
+func (ln Lognormal) Sampler(i int, r sample.State) float64 {
+	return sample.Sample_to(ln.low, ln.high, r)
 }

-func (beta Beta) Samples() []float64 {
-	sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
-	return sample.Sample_serially(sampler, N_SAMPLES)
+func (beta Beta) Sampler(i int, r sample.State) float64 {
+	return sample.Sample_beta(beta.a, beta.b, r)
 }

-func (fs FilledSamples) Samples() []float64 {
-	return fs.xs
+func (fs FilledSamples) Sampler(i int, r sample.State) float64 {
+	// This is a bit subtle, because sampling from FilledSamples randomly iteratively converges
+	// to something different than the initial distribution
+	// So instead we have an i parameter.
+	return fs.xs[i]
 }

 /* Constants */
@ -156,13 +152,12 @@ func prettyPrintDist(dist Dist) {

 func printAndReturnErr(err_msg string) error {
 	fmt.Println(err_msg)
-	// fmt.Println(HELP_MSG)
 	fmt.Println("Type \"help\" (without quotes) to see a pseudogrammar and examples")
 	return errors.New(err_msg)
 }

 func prettyPrintStats(dist Dist) {
-	xs := dist.Samples()
+	xs := sample.Sample_serially(dist.Sampler, N_SAMPLES)
 	n := len(xs)

 	mean := 0.0
@ -197,15 +192,14 @@ func prettyPrintStats(dist Dist) {
 	print_ci(0.90, "ci 90%: ")
 	print_ci(0.95, "ci 95%: ")
 	print_ci(0.99, "ci 99%: ")
-
 }

 /* Operations */
 // Generic operations with samples
 func operateDistsAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) {

-	xs := dist1.Samples()
-	ys := dist2.Samples()
+	xs := sample.Sample_serially(dist1.Sampler, N_SAMPLES)
+	ys := sample.Sample_serially(dist2.Sampler, N_SAMPLES)
 	zs := make([]float64, N_SAMPLES)

 	for i := 0; i < N_SAMPLES; i++ {
@ -354,6 +348,41 @@ func operateDists(old_dist Dist, new_dist Dist, op string) (Dist, error) {
 	}
 }

+/* Mixtures */
+func parseMixture(words []string, vars map[string]Dist) (Dist, error) {
+	// mx, mix, var weight var weight var weight ...
+	// Check syntax
+	if len(words)%2 != 0 {
+		return nil, printAndReturnErr("Not a mixture. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
+	}
+
+	var fs []func(int, sample.State) float64
+	var weights []float64
+
+	for i, word := range words {
+		if i%2 == 0 {
+			dist, exists := vars[word]
+			if !exists {
+				return nil, printAndReturnErr("Expected mixture variable but didn't get a variable. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
+			}
+			f := dist.Sampler
+			fs = append(fs, f)
+		} else {
+			weight, err := pretty.ParseFloat(word)
+			if err != nil {
+				return nil, printAndReturnErr("Expected mixture weight but didn't get a float. \nMixture syntax: \nmx x 2.5 y 8 z 10\ni.e.: mx var weight var2 weight2 ... var_n weight_n")
+			}
+			weights = append(weights, weight)
+		}
+	}
+	// Sample from mixture
+	xs, err := sample.Sample_mixture_serially_from_samplers(fs, weights, N_SAMPLES)
+	if err != nil {
+		return nil, printAndReturnErr(err.Error())
+	}
+	return FilledSamples{xs: xs}, nil
+}
+
 /* Parser and repl */
 func parseWordsErr(err_msg string) (string, Dist, error) {
 	return "", nil, printAndReturnErr(err_msg)
@ -364,11 +393,11 @@ func parseWordsIntoOpAndDist(words []string, vars map[string]Dist) (string, Dist
 	var dist Dist

 	switch words[0] {
-	case "*", "/", "+", "-":
+	case "*", "/", "+", "-", "mx":
 		op = words[0]
 		words = words[1:]
 	default:
-		op = "*" // later, change the below to
+		op = "*"
 	}

 	switch len(words) {
@ -400,18 +429,29 @@ func parseWordsIntoOpAndDist(words []string, vars map[string]Dist) (string, Dist
 		}
 		dist = Lognormal{low: new_low, high: new_high}
 	case 3:
-		if words[0] == "beta" || words[0] == "b" {
+		switch {
+		case words[0] == "beta" || words[0] == "b":
 			a, err1 := pretty.ParseFloat(words[1])
 			b, err2 := pretty.ParseFloat(words[2])
 			if err1 != nil || err2 != nil {
 				return parseWordsErr("Trying to specify a beta distribution? Try beta 1 2")
 			}
 			dist = Beta{a: a, b: b}
-		} else {
+		default:
 			return parseWordsErr("Input not understood or not implemented yet")
 		}
 	default:
-		return parseWordsErr("Input not understood or not implemented yet")
+		switch op {
+		case "mx":
+			tmp, err := parseMixture(words, vars)
+			if err != nil {
+				return parseWordsErr("Error parsing a mixture: " + err.Error())
+			}
+			dist = tmp
+			op = "*"
+		default:
+			return parseWordsErr("Input not understood or not implemented yet")
+		}
 	}
 	return op, dist, nil
 }
--- a/go.mod
+++ b/go.mod
@ -1,3 +1,5 @@
 module git.nunosempere.com/NunoSempere/fermi

 go 1.22.1
+
+require github.com/pkg/errors v0.9.1 // indirect
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,2 @@
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
--- a/more/time-to-botec.fermi
+++ b/more/time-to-botec.fermi
@ -0,0 +1,17 @@
+0
+=. a
+
+1
+=. b 
+
+1 3
+=. c
+
+2 10
+=. d
+
+mx a 60% b 20% c 10% d 10%
+
+stats
+
+exit
--- a/sample/sample.go
+++ b/sample/sample.go
@ -1,44 +1,54 @@
 package sample

-import "math"
-import "sync"
-import rand "math/rand/v2"
+import (
+	"math"
+	"sync"
+
+	rand "math/rand/v2"
+
+	"github.com/pkg/errors"
+)

 // https://pkg.go.dev/math/rand/v2

-type Src = *rand.Rand
-type func64 = func(Src) float64
+type State = *rand.Rand
+type func64 = func(State) float64
+type func64i = func(int, State) float64

-var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+var global_state = rand.New(rand.NewPCG(uint64(1), uint64(2)))

-func Sample_unit_uniform(r Src) float64 {
+func Sample_int(n int, r State) int {
+	return r.IntN(n)
+}
+
+func Sample_unit_uniform(r State) float64 {
 	return r.Float64()
 }

-func Sample_unit_normal(r Src) float64 {
+func Sample_unit_normal(r State) float64 {
 	return r.NormFloat64()
 }

-func Sample_uniform(start float64, end float64, r Src) float64 {
+func Sample_uniform(start float64, end float64, r State) float64 {
 	return Sample_unit_uniform(r)*(end-start) + start
 }

-func Sample_normal(mean float64, sigma float64, r Src) float64 {
+func Sample_normal(mean float64, sigma float64, r State) float64 {
 	return mean + Sample_unit_normal(r)*sigma
 }

-func Sample_lognormal(logmean float64, logstd float64, r Src) float64 {
+func Sample_lognormal(logmean float64, logstd float64, r State) float64 {
 	return (math.Exp(Sample_normal(logmean, logstd, r)))
 }

-func Sample_normal_from_90_ci(low float64, high float64, r Src) float64 {
+func Sample_normal_from_90_ci(low float64, high float64, r State) float64 {
 	var normal90 float64 = 1.6448536269514727
 	var mean float64 = (high + low) / 2.0
 	var std float64 = (high - low) / (2.0 * normal90)
 	return Sample_normal(mean, std, r)
 }

-func Sample_to(low float64, high float64, r Src) float64 {
+func Sample_to(low float64, high float64, r State) float64 {
 	// Given a (positive) 90% confidence interval,
 	// returns a sample from a lognorma with a matching 90% c.i.
 	// Key idea: If we want a lognormal with 90% confidence interval [a, b]
@ -49,7 +59,7 @@ func Sample_to(low float64, high float64, r Src) float64 {
 	return math.Exp(Sample_normal_from_90_ci(loglow, loghigh, r))
 }

-func Sample_gamma(alpha float64, r Src) float64 {
+func Sample_gamma(alpha float64, r State) float64 {

 	// a simple method for generating gamma variables, marsaglia and wan tsang, 2001
 	// https://dl.acm.org/doi/pdf/10.1145/358407.358414
@ -99,13 +109,13 @@ func Sample_gamma(alpha float64, r Src) float64 {
 	}
 }

-func Sample_beta(a float64, b float64, r Src) float64 {
+func Sample_beta(a float64, b float64, r State) float64 {
 	gamma_a := Sample_gamma(a, r)
 	gamma_b := Sample_gamma(b, r)
 	return gamma_a / (gamma_a + gamma_b)
 }

-func Sample_mixture(fs []func64, weights []float64, r Src) float64 {
+func Sample_mixture_once(fs []func64, weights []float64, r State) float64 {

 	// fmt.Println("weights initially: ", weights)
 	var sum_weights float64 = 0
@ -139,15 +149,104 @@ func Sample_mixture(fs []func64, weights []float64, r Src) float64 {

 }

-func Sample_serially(f func64, n_samples int) []float64 {
+func Sample_serially(f func64i, n_samples int) []float64 {
 	xs := make([]float64, n_samples)
-	// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+	// var global_state = rand.New(rand.NewPCG(uint64(1), uint64(2)))
 	for i := 0; i < n_samples; i++ {
-		xs[i] = f(global_r)
+		xs[i] = f(i, global_state)
 	}
 	return xs
 }

+func Sample_mixture_serially_from_samples(fs [][]float64, weights []float64, n_samples int) ([]float64, error) {
+
+	// Checks
+	if len(weights) != len(fs) {
+		return nil, errors.New("Mixture must have dists and weights alternated")
+	}
+	for _, f := range fs {
+		if len(f) < n_samples {
+			return nil, errors.New("Mixture components don't have enough samples")
+		}
+	}
+	// fmt.Println("weights initially: ", weights)
+	var sum_weights float64 = 0
+	for _, weight := range weights {
+		sum_weights += weight
+	}
+
+	var total float64 = 0
+	var cumsummed_normalized_weights = append([]float64(nil), weights...)
+	for i, weight := range weights {
+		total += weight / sum_weights
+		cumsummed_normalized_weights[i] = total
+	}
+	if total == 0.0 {
+		return nil, errors.New("Cummulative sum of weights in mixture must be > 0.0")
+	}
+
+	// fmt.Printf("Weights: %v\n", cumsummed_normalized_weights)
+
+	xs := make([]float64, n_samples)
+	// var global_state = rand.New(rand.NewPCG(uint64(1), uint64(2)))
+	for i := 0; i < n_samples; i++ {
+		var flag int = 0
+		var p float64 = global_state.Float64()
+		for j, cnw := range cumsummed_normalized_weights {
+			if p < cnw {
+				xs[i] = fs[j][i]
+				flag = 1
+				break
+			}
+		}
+		if flag == 0 {
+			xs[i] = fs[len(fs)-1][i]
+		}
+	}
+	return xs, nil
+}
+
+func Sample_mixture_serially_from_samplers(fs []func64i, weights []float64, n_samples int) ([]float64, error) {
+
+	// Checks
+	if len(weights) != len(fs) {
+		return nil, errors.New("Mixture must have dists and weights alternated")
+	}
+	// fmt.Println("weights initially: ", weights)
+	var sum_weights float64 = 0
+	for _, weight := range weights {
+		sum_weights += weight
+	}
+
+	var total float64 = 0
+	var cumsummed_normalized_weights = append([]float64(nil), weights...)
+	for i, weight := range weights {
+		total += weight / sum_weights
+		cumsummed_normalized_weights[i] = total
+	}
+	if total == 0.0 {
+		return nil, errors.New("Cummulative sum of weights in mixture must be > 0.0")
+	}
+
+	// fmt.Printf("Weights: %v\n", cumsummed_normalized_weights)
+	xs := make([]float64, n_samples)
+	for i := 0; i < n_samples; i++ {
+		var flag int = 0
+		var p float64 = global_state.Float64()
+		for j, cnw := range cumsummed_normalized_weights {
+			if p < cnw {
+				xs[i] = fs[j](i, global_state)
+				flag = 1
+				break
+			}
+		}
+		if flag == 0 {
+			xs[i] = fs[len(fs)-1](i, global_state)
+		}
+	}
+	return xs, nil
+}
+
 func Sample_parallel(f func64, n_samples int) []float64 {
 	var num_threads = 16
 	var xs = make([]float64, n_samples)
@ -159,8 +258,8 @@ func Sample_parallel(f func64, n_samples int) []float64 {
 		go func(f func64) {
 			defer wg.Done()
 			var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))
-			for i := range xs_i {
-				xs_i[i] = f(r)
+			for j := range xs_i {
+				xs_i[j] = f(r)
 			}
 		}(f)
 	}
@ -178,13 +277,13 @@ func main() {
 	var p_c float64 = p_a * p_b
 	ws := [4](float64){1 - p_c, p_c / 2, p_c / 4, p_c / 4}

-	Sample_0 := func(r Src) float64 { return 0 }
-	Sample_1 := func(r Src) float64 { return 1 }
-	Sample_few := func(r Src) float64 { return Sample_to(1, 3, r) }
-	Sample_many := func(r Src) float64 { return Sample_to(2, 10, r) }
+	Sample_0 := func(r State) float64 { return 0 }
+	Sample_1 := func(r State) float64 { return 1 }
+	Sample_few := func(r State) float64 { return Sample_to(1, 3, r) }
+	Sample_many := func(r State) float64 { return Sample_to(2, 10, r) }
 	fs := [4](func64){Sample_0, Sample_1, Sample_few, Sample_many}

-	model := func(r Src) float64 { return Sample_mixture(fs[0:], ws[0:], r) }
+	model := func(r State) float64 { return Sample_mixture(fs[0:], ws[0:], r) }
 	n_samples := 1_000_000
 	xs := Sample_parallel(model, n_samples)
 	var avg float64 = 0
Author	SHA1	Message	Date
NunoSempere	e93316446c	simplify fermi.go again	2024-12-24 17:05:53 +01:00
NunoSempere	2314bf5db2	continue reflecting about mixtures & stuff	2024-12-24 16:50:08 +01:00
NunoSempere	7d59beb4bf	get some speed gains for mixtures thru a bit more complexity	2024-12-24 16:35:55 +01:00
NunoSempere	3ca32655d5	rework sampling for mixtures a bit	2024-12-24 16:32:17 +01:00
NunoSempere	e473223bbd	become a bit confused about FilledSamples and mixtures	2024-12-24 16:11:37 +01:00
NunoSempere	7d8a582bd5	test the time to botec example https://github.com/NunoSempere/time-to-botec	2024-12-24 15:51:31 +01:00
NunoSempere	10941f00ac	mixture tweaks	2024-12-24 15:42:54 +01:00
NunoSempere	884adba214	debug mixture implementation	2024-12-24 15:39:45 +01:00
NunoSempere	2f663b1262	first pass mixture	2024-12-24 15:31:21 +01:00