fix correlation problem by using global variable
This commit is contained in:
parent
5b52cf3297
commit
a4263d0765
15
README.md
15
README.md
|
@ -1,6 +1,6 @@
|
||||||
# A minimalist calculator for fermi estimation
|
# A minimalist calculator for fermi estimation
|
||||||
|
|
||||||
This project contains a minimalist command-line calculator for Fermi estimation. For now, it just multiplies lognormals.
|
This project is a minimalist, stack-based DSL for Fermi estimation. It can multiply and divide scalars, lognormals and beta distributions.
|
||||||
|
|
||||||
## Motivation
|
## Motivation
|
||||||
|
|
||||||
|
@ -105,17 +105,18 @@ Conceptually clearer to have all the multiplications first and then all the divi
|
||||||
- [-] Think of some way of calling bc
|
- [-] Think of some way of calling bc
|
||||||
- [x] Think how to integrate with squiggle.c to draw samples
|
- [x] Think how to integrate with squiggle.c to draw samples
|
||||||
- [x] Copy the time to botec go code
|
- [x] Copy the time to botec go code
|
||||||
- [ ] Define samplers
|
- [x] Define samplers
|
||||||
- [ ] Call those samplers when operating on distributions that can't be operted on algebraically
|
- [x] Call those samplers when operating on distributions that can't be operted on algebraically
|
||||||
- [ ] Think about how to draw a histogram from samples
|
- [ ] Think about how to draw a histogram from samples
|
||||||
- [x] Display output more nicely, with K/M/B/T
|
- [x] Display output more nicely, with K/M/B/T
|
||||||
- [ ] Consider the following: make this into a stack-based DSL, with:
|
- [x] Consider the following: make this into a stack-based DSL, with:
|
||||||
- Variables that can be saved to and then displayed
|
- [x] Variables that can be saved to and then displayed
|
||||||
- Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though.
|
- [x] Other types of distributions, particularly beta distributions? => But then this requires moving to bags of samples. It could still be ~instantaneous though.
|
||||||
- Figure out syntax for
|
- [x] Figure out go syntax for
|
||||||
- Maps
|
- Maps
|
||||||
- Joint types
|
- Joint types
|
||||||
- Enums
|
- Enums
|
||||||
|
- [ ] Fix correlation problem, by spinning up a new randomness thing every time some serial computation is done.
|
||||||
|
|
||||||
Some possible syntax for a more expressive stack-based DSL
|
Some possible syntax for a more expressive stack-based DSL
|
||||||
|
|
||||||
|
|
201
f.go
201
f.go
|
@ -14,7 +14,7 @@ import (
|
||||||
|
|
||||||
const NORMAL90CONFIDENCE = 1.6448536269514727
|
const NORMAL90CONFIDENCE = 1.6448536269514727
|
||||||
const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit"
|
const GENERAL_ERR_MSG = "Valid inputs: 2 || * 2 || / 2 || 2 20 || * 2 20 || / 2 20 || clean || =: var || op var || clean || help || debug || exit"
|
||||||
const N_SAMPLES = 10 // 1_000_000
|
const N_SAMPLES = 1_000_000
|
||||||
|
|
||||||
// Distribution interface
|
// Distribution interface
|
||||||
// https://go.dev/tour/methods/9
|
// https://go.dev/tour/methods/9
|
||||||
|
@ -22,46 +22,37 @@ const N_SAMPLES = 10 // 1_000_000
|
||||||
type Dist interface {
|
type Dist interface {
|
||||||
Samples() []float64
|
Samples() []float64
|
||||||
}
|
}
|
||||||
|
type Scalar float64
|
||||||
type Scalar struct {
|
type Lognormal struct {
|
||||||
p float64
|
low float64
|
||||||
|
high float64
|
||||||
|
}
|
||||||
|
type Beta struct {
|
||||||
|
a float64
|
||||||
|
b float64
|
||||||
|
}
|
||||||
|
type FilledSamples struct {
|
||||||
|
xs []float64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Scalar) Samples() []float64 {
|
func (p Scalar) Samples() []float64 {
|
||||||
xs := make([]float64, N_SAMPLES)
|
xs := make([]float64, N_SAMPLES)
|
||||||
for i := 0; i < N_SAMPLES; i++ {
|
for i := 0; i < N_SAMPLES; i++ {
|
||||||
xs[i] = p.p
|
xs[i] = float64(p)
|
||||||
}
|
}
|
||||||
return xs
|
return xs
|
||||||
}
|
}
|
||||||
|
|
||||||
type Lognormal struct {
|
|
||||||
low float64
|
|
||||||
high float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ln Lognormal) Samples() []float64 {
|
func (ln Lognormal) Samples() []float64 {
|
||||||
sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) }
|
sampler := func(r sample.Src) float64 { return sample.Sample_to(ln.low, ln.high, r) }
|
||||||
// return sample.Sample_parallel(sampler, N_SAMPLES)
|
// return sample.Sample_parallel(sampler, N_SAMPLES)
|
||||||
// Can't do parallel because then I'd have to await throughout the code
|
// Can't do parallel because then I'd have to await throughout the code
|
||||||
return sample.Sample_serially(sampler, N_SAMPLES)
|
return sample.Sample_serially(sampler, N_SAMPLES)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Beta struct {
|
|
||||||
a float64
|
|
||||||
b float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (beta Beta) Samples() []float64 {
|
func (beta Beta) Samples() []float64 {
|
||||||
sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
|
sampler := func(r sample.Src) float64 { return sample.Sample_beta(beta.a, beta.b, r) }
|
||||||
// return sample.Sample_parallel(sampler, N_SAMPLES)
|
// return sample.Sample_parallel(sampler, N_SAMPLES)
|
||||||
return sample.Sample_serially(sampler, N_SAMPLES)
|
return sample.Sample_serially(sampler, N_SAMPLES)
|
||||||
}
|
}
|
||||||
|
|
||||||
type FilledSamples struct {
|
|
||||||
xs []float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fs FilledSamples) Samples() []float64 {
|
func (fs FilledSamples) Samples() []float64 {
|
||||||
return fs.xs
|
return fs.xs
|
||||||
}
|
}
|
||||||
|
@ -80,16 +71,9 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {
|
||||||
var dist Dist
|
var dist Dist
|
||||||
|
|
||||||
switch words[0] {
|
switch words[0] {
|
||||||
case "*":
|
case "*", "/", "+", "-":
|
||||||
op = "*"
|
op = words[0]
|
||||||
words = words[1:]
|
words = words[1:]
|
||||||
case "/":
|
|
||||||
op = "/"
|
|
||||||
words = words[1:]
|
|
||||||
case "+":
|
|
||||||
return parseLineErr("+ operation not implemented yet")
|
|
||||||
case "-":
|
|
||||||
return parseLineErr("- operation not implemented yet")
|
|
||||||
default:
|
default:
|
||||||
op = "*" // later, change the below to
|
op = "*" // later, change the below to
|
||||||
}
|
}
|
||||||
|
@ -104,7 +88,7 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {
|
||||||
case var_word_exists:
|
case var_word_exists:
|
||||||
dist = var_word
|
dist = var_word
|
||||||
case err1 == nil:
|
case err1 == nil:
|
||||||
dist = Lognormal{low: single_float, high: single_float}
|
dist = Scalar(single_float)
|
||||||
case err1 != nil && !var_word_exists:
|
case err1 != nil && !var_word_exists:
|
||||||
return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable")
|
return parseLineErr("Trying to operate on a scalar, but scalar is neither a float nor an assigned variable")
|
||||||
}
|
}
|
||||||
|
@ -133,9 +117,6 @@ func parseLine(line string, vars map[string]Dist) (string, Dist, error) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join distributions
|
|
||||||
// Multiply lognormals
|
|
||||||
|
|
||||||
func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal {
|
func multiplyLogDists(l1 Lognormal, l2 Lognormal) Lognormal {
|
||||||
logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0
|
logmean1 := (math.Log(l1.high) + math.Log(l1.low)) / 2.0
|
||||||
logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE)
|
logstd1 := (math.Log(l1.high) - math.Log(l1.low)) / (2.0 * NORMAL90CONFIDENCE)
|
||||||
|
@ -157,10 +138,8 @@ func multiplyBetaDists(beta1 Beta, beta2 Beta) Beta {
|
||||||
return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b}
|
return Beta{a: beta1.a + beta2.a, b: beta1.b + beta2.b}
|
||||||
}
|
}
|
||||||
|
|
||||||
func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist {
|
func operateAsSamples(dist1 Dist, dist2 Dist, op string) (Dist, error) {
|
||||||
// dist2 = Beta{a: 1, b: 2}
|
|
||||||
// fmt.Printf("dist1: %v\n", dist1)
|
|
||||||
// fmt.Printf("dist2: %v\n", dist2)
|
|
||||||
xs := dist1.Samples()
|
xs := dist1.Samples()
|
||||||
ys := dist2.Samples()
|
ys := dist2.Samples()
|
||||||
// fmt.Printf("xs: %v\n", xs)
|
// fmt.Printf("xs: %v\n", xs)
|
||||||
|
@ -168,11 +147,25 @@ func multiplyAsSamples(dist1 Dist, dist2 Dist) Dist {
|
||||||
zs := make([]float64, N_SAMPLES)
|
zs := make([]float64, N_SAMPLES)
|
||||||
|
|
||||||
for i := 0; i < N_SAMPLES; i++ {
|
for i := 0; i < N_SAMPLES; i++ {
|
||||||
|
switch op {
|
||||||
|
case "*":
|
||||||
zs[i] = xs[i] * ys[i]
|
zs[i] = xs[i] * ys[i]
|
||||||
|
case "/":
|
||||||
|
if ys[0] != 0 {
|
||||||
|
zs[i] = xs[i] / ys[i]
|
||||||
|
} else {
|
||||||
|
fmt.Println("Error: When dividing as samples, division by zero")
|
||||||
|
return nil, errors.New("Division by zero")
|
||||||
|
}
|
||||||
|
case "+":
|
||||||
|
zs[i] = xs[i] + ys[i]
|
||||||
|
case "-":
|
||||||
|
zs[i] = xs[i] - ys[i]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fmt.Printf("%v\n", zs)
|
// fmt.Printf("%v\n", zs)
|
||||||
return FilledSamples{xs: zs}
|
return FilledSamples{xs: zs}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
|
func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
|
||||||
|
@ -184,23 +177,23 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
|
||||||
case Lognormal:
|
case Lognormal:
|
||||||
return multiplyLogDists(o, n), nil
|
return multiplyLogDists(o, n), nil
|
||||||
case Scalar:
|
case Scalar:
|
||||||
return multiplyLogDists(o, Lognormal{low: n.p, high: n.p}), nil
|
return multiplyLogDists(o, Lognormal{low: float64(n), high: float64(n)}), nil
|
||||||
default:
|
default:
|
||||||
return multiplyAsSamples(o, n), nil
|
return operateAsSamples(old_dist, new_dist, "*")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case Scalar:
|
case Scalar:
|
||||||
{
|
{
|
||||||
if o.p == 1 {
|
if o == 1 {
|
||||||
return new_dist, nil
|
return new_dist, nil
|
||||||
}
|
}
|
||||||
switch n := new_dist.(type) {
|
switch n := new_dist.(type) {
|
||||||
case Lognormal:
|
case Lognormal:
|
||||||
return multiplyLogDists(Lognormal{low: o.p, high: o.p}, n), nil
|
return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, n), nil
|
||||||
case Scalar:
|
case Scalar:
|
||||||
return Scalar{p: o.p * n.p}, nil
|
return Scalar(float64(o) * float64(n)), nil
|
||||||
default:
|
default:
|
||||||
return multiplyAsSamples(o, n), nil
|
return operateAsSamples(old_dist, new_dist, "*")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case Beta:
|
case Beta:
|
||||||
|
@ -208,11 +201,40 @@ func multiplyDists(old_dist Dist, new_dist Dist) (Dist, error) {
|
||||||
case Beta:
|
case Beta:
|
||||||
return multiplyBetaDists(o, n), nil
|
return multiplyBetaDists(o, n), nil
|
||||||
default:
|
default:
|
||||||
return multiplyAsSamples(o, n), nil
|
return operateAsSamples(old_dist, new_dist, "*")
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return multiplyAsSamples(old_dist, new_dist), nil
|
return operateAsSamples(old_dist, new_dist, "*")
|
||||||
// return nil, errors.New("Can't multiply dists")
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func divideDists(old_dist Dist, new_dist Dist) (Dist, error) {
|
||||||
|
|
||||||
|
switch o := old_dist.(type) {
|
||||||
|
case Lognormal:
|
||||||
|
{
|
||||||
|
switch n := new_dist.(type) {
|
||||||
|
case Lognormal:
|
||||||
|
return multiplyLogDists(o, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
|
||||||
|
case Scalar:
|
||||||
|
return multiplyLogDists(o, Lognormal{low: 1.0 / float64(n), high: 1.0 / float64(n)}), nil
|
||||||
|
default:
|
||||||
|
return operateAsSamples(old_dist, new_dist, "/")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case Scalar:
|
||||||
|
{
|
||||||
|
switch n := new_dist.(type) {
|
||||||
|
case Lognormal:
|
||||||
|
return multiplyLogDists(Lognormal{low: float64(o), high: float64(o)}, Lognormal{low: 1.0 / n.high, high: 1.0 / n.low}), nil
|
||||||
|
case Scalar:
|
||||||
|
return Scalar(float64(o) / float64(n)), nil
|
||||||
|
default:
|
||||||
|
return operateAsSamples(old_dist, new_dist, "/")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return operateAsSamples(old_dist, new_dist, "/")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,57 +243,47 @@ func joinDists(old_dist Dist, new_dist Dist, op string) (Dist, error) {
|
||||||
switch op {
|
switch op {
|
||||||
case "*":
|
case "*":
|
||||||
return multiplyDists(old_dist, new_dist)
|
return multiplyDists(old_dist, new_dist)
|
||||||
|
case "/":
|
||||||
|
return divideDists(old_dist, new_dist)
|
||||||
|
case "+":
|
||||||
|
return operateAsSamples(old_dist, new_dist, "+")
|
||||||
|
case "-":
|
||||||
|
return operateAsSamples(old_dist, new_dist, "-")
|
||||||
default:
|
default:
|
||||||
return old_dist, errors.New("Can't combine distributions in this way")
|
return old_dist, errors.New("Can't combine distributions in this way")
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
switch {
|
|
||||||
case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "*":
|
|
||||||
return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, new_dist.Lognormal), Samples: nil}, nil
|
|
||||||
case old_dist.Type == "Lognormal" && new_dist.Type == "Lognormal" && op == "/":
|
|
||||||
tmp_dist := Lognormal{low: 1.0 / new_dist.Lognormal.high, high: 1.0 / new_dist.Lognormal.low}
|
|
||||||
return Dist{Type: "Lognormal", Lognormal: multiplyLogDists(old_dist.Lognormal, tmp_dist), Samples: nil}, nil
|
|
||||||
default:
|
|
||||||
fmt.Printf("For now, can't do anything besides multiplying lognormals\n")
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
// return old_dist, errors.New("Can't combine distributions in this way")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pretty print distributions */
|
/* Pretty print distributions */
|
||||||
|
func prettyPrintFloat(f float64) {
|
||||||
|
switch {
|
||||||
|
case math.Abs(f) >= 1_000_000_000_000:
|
||||||
|
fmt.Printf("%.1fT", f/1_000_000_000_000)
|
||||||
|
case math.Abs(f) >= 1_000_000_000:
|
||||||
|
fmt.Printf("%.1fB", f/1_000_000_000)
|
||||||
|
case math.Abs(f) >= 1_000_000:
|
||||||
|
fmt.Printf("%.1fM", f/1_000_000)
|
||||||
|
case math.Abs(f) >= 1_000:
|
||||||
|
fmt.Printf("%.1fK", f/1_000)
|
||||||
|
|
||||||
|
case math.Abs(f) <= 0.0001:
|
||||||
|
fmt.Printf("%.5f", f)
|
||||||
|
case math.Abs(f) <= 0.001:
|
||||||
|
fmt.Printf("%.4f", f)
|
||||||
|
case math.Abs(f) <= 0.01:
|
||||||
|
fmt.Printf("%.3f", f)
|
||||||
|
case math.Abs(f) <= 0.1:
|
||||||
|
fmt.Printf("%.2f", f)
|
||||||
|
default:
|
||||||
|
fmt.Printf("%.1f", f)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
func prettyPrint2Floats(low float64, high float64) {
|
func prettyPrint2Floats(low float64, high float64) {
|
||||||
// fmt.Printf("=> %.1f %.1f\n", low, high)
|
prettyPrintFloat(low)
|
||||||
switch {
|
|
||||||
case math.Abs(low) >= 1_000_000_000_000:
|
|
||||||
fmt.Printf("%.1fT", low/1_000_000_000_000)
|
|
||||||
case math.Abs(low) >= 1_000_000_000:
|
|
||||||
fmt.Printf("%.1fB", low/1_000_000_000)
|
|
||||||
case math.Abs(low) >= 1_000_000:
|
|
||||||
fmt.Printf("%.1fM", low/1_000_000)
|
|
||||||
case math.Abs(low) >= 1_000:
|
|
||||||
fmt.Printf("%.1fK", low/1_000)
|
|
||||||
case math.Abs(low) >= 1_000:
|
|
||||||
fmt.Printf("%.1fK", low/1_000)
|
|
||||||
default:
|
|
||||||
fmt.Printf("%.1f", low)
|
|
||||||
}
|
|
||||||
fmt.Printf(" ")
|
fmt.Printf(" ")
|
||||||
switch {
|
prettyPrintFloat(high)
|
||||||
case math.Abs(high) >= 1_000_000_000_000:
|
|
||||||
fmt.Printf("%.1fT", high/1_000_000_000_000)
|
|
||||||
case math.Abs(high) >= 1_000_000_000:
|
|
||||||
fmt.Printf("%.1fB", high/1_000_000_000)
|
|
||||||
case math.Abs(high) >= 1_000_000:
|
|
||||||
fmt.Printf("%.1fM", high/1_000_000)
|
|
||||||
case math.Abs(high) >= 1_000:
|
|
||||||
fmt.Printf("%.1fK", high/1_000)
|
|
||||||
case math.Abs(high) >= 1_000:
|
|
||||||
fmt.Printf("%.1fK", high/1_000)
|
|
||||||
default:
|
|
||||||
fmt.Printf("%.1f", high)
|
|
||||||
}
|
|
||||||
fmt.Printf("\n")
|
fmt.Printf("\n")
|
||||||
// fmt.Printf("=> %.1f %.1f\n", low, high)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func prettyPrintDist(dist Dist) {
|
func prettyPrintDist(dist Dist) {
|
||||||
|
@ -293,6 +305,11 @@ func prettyPrintDist(dist Dist) {
|
||||||
case Beta:
|
case Beta:
|
||||||
fmt.Printf("=> beta ")
|
fmt.Printf("=> beta ")
|
||||||
prettyPrint2Floats(v.a, v.b)
|
prettyPrint2Floats(v.a, v.b)
|
||||||
|
case Scalar:
|
||||||
|
fmt.Printf("=> scalar ")
|
||||||
|
w := float64(v)
|
||||||
|
prettyPrintFloat(w)
|
||||||
|
fmt.Println()
|
||||||
default:
|
default:
|
||||||
fmt.Printf("%v", v)
|
fmt.Printf("%v", v)
|
||||||
}
|
}
|
||||||
|
@ -303,7 +320,7 @@ func main() {
|
||||||
|
|
||||||
reader := bufio.NewReader(os.Stdin)
|
reader := bufio.NewReader(os.Stdin)
|
||||||
var init_dist Dist
|
var init_dist Dist
|
||||||
init_dist = Scalar{p: 1} // Lognormal{low: 1, high: 1}
|
init_dist = Scalar(1) // Lognormal{low: 1, high: 1}
|
||||||
old_dist := init_dist
|
old_dist := init_dist
|
||||||
vars := make(map[string]Dist)
|
vars := make(map[string]Dist)
|
||||||
// Could eventually be a more complex struct with:
|
// Could eventually be a more complex struct with:
|
||||||
|
|
|
@ -9,6 +9,8 @@ import rand "math/rand/v2"
|
||||||
type Src = *rand.Rand
|
type Src = *rand.Rand
|
||||||
type func64 = func(Src) float64
|
type func64 = func(Src) float64
|
||||||
|
|
||||||
|
var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
|
||||||
|
|
||||||
func Sample_unit_uniform(r Src) float64 {
|
func Sample_unit_uniform(r Src) float64 {
|
||||||
return r.Float64()
|
return r.Float64()
|
||||||
}
|
}
|
||||||
|
@ -138,10 +140,10 @@ func Sample_mixture(fs []func64, weights []float64, r Src) float64 {
|
||||||
}
|
}
|
||||||
|
|
||||||
func Sample_serially(f func64, n_samples int) []float64 {
|
func Sample_serially(f func64, n_samples int) []float64 {
|
||||||
var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
|
|
||||||
xs := make([]float64, n_samples)
|
xs := make([]float64, n_samples)
|
||||||
|
// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
|
||||||
for i := 0; i < n_samples; i++ {
|
for i := 0; i < n_samples; i++ {
|
||||||
xs[i] = f(r)
|
xs[i] = f(global_r)
|
||||||
}
|
}
|
||||||
return xs
|
return xs
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user