fermi/sample/sample.go

package sample

import "math"
import "sync"
import rand "math/rand/v2"

// https://pkg.go.dev/math/rand/v2

type Src = *rand.Rand
type func64 = func(Src) float64

var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))

func Sample_unit_uniform(r Src) float64 {
	return r.Float64()
}

func Sample_unit_normal(r Src) float64 {
	return r.NormFloat64()
}

func Sample_uniform(start float64, end float64, r Src) float64 {
	return Sample_unit_uniform(r)*(end-start) + start
}

func Sample_normal(mean float64, sigma float64, r Src) float64 {
	return mean + Sample_unit_normal(r)*sigma
}

func Sample_lognormal(logmean float64, logstd float64, r Src) float64 {
	return (math.Exp(Sample_normal(logmean, logstd, r)))
}

func Sample_normal_from_90_ci(low float64, high float64, r Src) float64 {
	var normal90 float64 = 1.6448536269514727
	var mean float64 = (high + low) / 2.0
	var std float64 = (high - low) / (2.0 * normal90)
	return Sample_normal(mean, std, r)
}

func Sample_to(low float64, high float64, r Src) float64 {
	// Given a (positive) 90% confidence interval,
	// returns a sample from a lognorma with a matching 90% c.i.
	// Key idea: If we want a lognormal with 90% confidence interval [a, b]
	// we need but get a normal with 90% confidence interval [log(a), log(b)].
	// Then see code for Sample_normal_from_90_ci
	var loglow float64 = math.Log(low)
	var loghigh float64 = math.Log(high)
	return math.Exp(Sample_normal_from_90_ci(loglow, loghigh, r))
}

func Sample_gamma(alpha float64, r Src) float64 {

	// a simple method for generating gamma variables, marsaglia and wan tsang, 2001
	// https://dl.acm.org/doi/pdf/10.1145/358407.358414
	// see also the references/ folder
	// note that the wikipedia page for the gamma distribution includes a scaling parameter
	// k or beta
	// https://en.wikipedia.org/wiki/gamma_distribution
	// such that gamma_k(alpha, k) = k * gamma(alpha)
	// or gamma_beta(alpha, beta) = gamma(alpha) / beta
	// so far i have not needed to use this, and thus the second parameter is by default 1.

	if alpha >= 1 {
		var d, c, x, v, u float64
		d = alpha - 1.0/3.0
		c = 1.0 / math.Sqrt(9.0*d)

		for {

		InnerLoop:
			for {
				x = Sample_unit_normal(r)
				v = 1.0 + c*x
				if v > 0.0 {
					break InnerLoop
				}
			}

			v = v * v * v
			u = Sample_unit_uniform(r)

			if u < 1.0-0.0331*(x*x*x*x) { // Condition 1
				// the 0.0331 doesn't inspire much confidence
				// however, this isn't the whole story
				// by knowing that Condition 1 implies condition 2
				// we realize that this is just a way of making the algorithm faster
				// i.e., of not using the logarithms
				return d * v
			}
			if math.Log(u) < 0.5*(x*x)+d*(1.0-v+math.Log(v)) { // Condition 2
				return d * v
			}

		}

	} else {
		return Sample_gamma(1.0+alpha, r) * math.Pow(Sample_unit_uniform(r), 1.0/alpha)
	}
}

func Sample_beta(a float64, b float64, r Src) float64 {
	gamma_a := Sample_gamma(a, r)
	gamma_b := Sample_gamma(b, r)
	return gamma_a / (gamma_a + gamma_b)
}

func Sample_mixture(fs []func64, weights []float64, r Src) float64 {

	// fmt.Println("weights initially: ", weights)
	var sum_weights float64 = 0
	for _, weight := range weights {
		sum_weights += weight
	}

	var total float64 = 0
	var cumsummed_normalized_weights = append([]float64(nil), weights...)
	for i, weight := range weights {
		total += weight / sum_weights
		cumsummed_normalized_weights[i] = total
	}

	var result float64
	var flag int = 0
	var p float64 = r.Float64()

	for i, cnw := range cumsummed_normalized_weights {
		if p < cnw {
			result = fs[i](r)
			flag = 1
			break
		}
	}

	if flag == 0 {
		result = fs[len(fs)-1](r)
	}
	return result

}

func Sample_serially(f func64, n_samples int) []float64 {
	xs := make([]float64, n_samples)
	// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
	for i := 0; i < n_samples; i++ {
		xs[i] = f(global_r)
	}
	return xs
}

func Sample_parallel(f func64, n_samples int) []float64 {
	var num_threads = 16
	var xs = make([]float64, n_samples)
	var wg sync.WaitGroup
	var h = n_samples / num_threads
	wg.Add(num_threads)
	for i := range num_threads {
		var xs_i = xs[i*h : (i+1)*h]
		go func(f func64) {
			defer wg.Done()
			var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))
			for i := range xs_i {
				xs_i[i] = f(r)
			}
		}(f)
	}

	wg.Wait()
	return xs

}

/*
func main() {

	var p_a float64 = 0.8
	var p_b float64 = 0.5
	var p_c float64 = p_a * p_b
	ws := [4](float64){1 - p_c, p_c / 2, p_c / 4, p_c / 4}

	Sample_0 := func(r Src) float64 { return 0 }
	Sample_1 := func(r Src) float64 { return 1 }
	Sample_few := func(r Src) float64 { return Sample_to(1, 3, r) }
	Sample_many := func(r Src) float64 { return Sample_to(2, 10, r) }
	fs := [4](func64){Sample_0, Sample_1, Sample_few, Sample_many}

	model := func(r Src) float64 { return Sample_mixture(fs[0:], ws[0:], r) }
	n_samples := 1_000_000
	xs := Sample_parallel(model, n_samples)
	var avg float64 = 0
	for _, x := range xs {
		avg += x
	}
	avg = avg / float64(n_samples)
	fmt.Printf("Average: %v\n", avg)
	/*
		  // Without concurrency:
			n_samples := 1_000_000
			var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
			var avg float64 = 0
			for i := 0; i < n_samples; i++ {
				avg += Sample_mixture(fs[0:], ws[0:], r)
			}
			avg = avg / float64(n_samples)
			fmt.Printf("Average: %v\n", avg)
}
*/
fix types 2024-06-09 20:46:08 +00:00			`package sample`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00
			`import "math"`
			`import "sync"`
			`import rand "math/rand/v2"`

			`// https://pkg.go.dev/math/rand/v2`

fix types 2024-06-09 20:46:08 +00:00			`type Src = *rand.Rand`
			`type func64 = func(Src) float64`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00
fix correlation problem by using global variable 2024-06-10 01:08:10 +00:00			`var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_unit_uniform(r Src) float64 {`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`return r.Float64()`
			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_unit_normal(r Src) float64 {`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`return r.NormFloat64()`
			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_uniform(start float64, end float64, r Src) float64 {`
			`return Sample_unit_uniform(r)*(end-start) + start`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_normal(mean float64, sigma float64, r Src) float64 {`
			`return mean + Sample_unit_normal(r)*sigma`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_lognormal(logmean float64, logstd float64, r Src) float64 {`
			`return (math.Exp(Sample_normal(logmean, logstd, r)))`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_normal_from_90_ci(low float64, high float64, r Src) float64 {`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`var normal90 float64 = 1.6448536269514727`
			`var mean float64 = (high + low) / 2.0`
			`var std float64 = (high - low) / (2.0 * normal90)`
fix types 2024-06-09 20:46:08 +00:00			`return Sample_normal(mean, std, r)`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_to(low float64, high float64, r Src) float64 {`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`// Given a (positive) 90% confidence interval,`
			`// returns a sample from a lognorma with a matching 90% c.i.`
			`// Key idea: If we want a lognormal with 90% confidence interval [a, b]`
			`// we need but get a normal with 90% confidence interval [log(a), log(b)].`
fix types 2024-06-09 20:46:08 +00:00			`// Then see code for Sample_normal_from_90_ci`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`var loglow float64 = math.Log(low)`
			`var loghigh float64 = math.Log(high)`
fix types 2024-06-09 20:46:08 +00:00			`return math.Exp(Sample_normal_from_90_ci(loglow, loghigh, r))`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`

add implementation for gamma distribution 2024-06-09 21:01:49 +00:00			`func Sample_gamma(alpha float64, r Src) float64 {`

			`// a simple method for generating gamma variables, marsaglia and wan tsang, 2001`
			`// https://dl.acm.org/doi/pdf/10.1145/358407.358414`
			`// see also the references/ folder`
			`// note that the wikipedia page for the gamma distribution includes a scaling parameter`
			`// k or beta`
			`// https://en.wikipedia.org/wiki/gamma_distribution`
			`// such that gamma_k(alpha, k) = k * gamma(alpha)`
			`// or gamma_beta(alpha, beta) = gamma(alpha) / beta`
			`// so far i have not needed to use this, and thus the second parameter is by default 1.`

			`if alpha >= 1 {`
			`var d, c, x, v, u float64`
			`d = alpha - 1.0/3.0`
			`c = 1.0 / math.Sqrt(9.0*d)`

			`for {`

			`InnerLoop:`
			`for {`
			`x = Sample_unit_normal(r)`
			`v = 1.0 + c*x`
			`if v > 0.0 {`
			`break InnerLoop`
			`}`
			`}`

			`v = v * v * v`
			`u = Sample_unit_uniform(r)`

			`if u < 1.0-0.0331(xxxx) { // Condition 1`
			`// the 0.0331 doesn't inspire much confidence`
			`// however, this isn't the whole story`
			`// by knowing that Condition 1 implies condition 2`
			`// we realize that this is just a way of making the algorithm faster`
			`// i.e., of not using the logarithms`
			`return d * v`
			`}`
			`if math.Log(u) < 0.5(xx)+d*(1.0-v+math.Log(v)) { // Condition 2`
			`return d * v`
			`}`

			`}`

			`} else {`
			`return Sample_gamma(1.0+alpha, r) * math.Pow(Sample_unit_uniform(r), 1.0/alpha)`
			`}`
add beta sampler and implement beta type conforming to distribution 2024-06-09 21:08:15 +00:00			`}`
add implementation for gamma distribution 2024-06-09 21:01:49 +00:00
add beta sampler and implement beta type conforming to distribution 2024-06-09 21:08:15 +00:00			`func Sample_beta(a float64, b float64, r Src) float64 {`
			`gamma_a := Sample_gamma(a, r)`
			`gamma_b := Sample_gamma(b, r)`
			`return gamma_a / (gamma_a + gamma_b)`
add implementation for gamma distribution 2024-06-09 21:01:49 +00:00			`}`

fix types 2024-06-09 20:46:08 +00:00			`func Sample_mixture(fs []func64, weights []float64, r Src) float64 {`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00
			`// fmt.Println("weights initially: ", weights)`
			`var sum_weights float64 = 0`
			`for _, weight := range weights {`
			`sum_weights += weight`
			`}`

			`var total float64 = 0`
			`var cumsummed_normalized_weights = append([]float64(nil), weights...)`
			`for i, weight := range weights {`
			`total += weight / sum_weights`
			`cumsummed_normalized_weights[i] = total`
			`}`

			`var result float64`
			`var flag int = 0`
			`var p float64 = r.Float64()`

			`for i, cnw := range cumsummed_normalized_weights {`
			`if p < cnw {`
			`result = fs[i](r)`
			`flag = 1`
			`break`
			`}`
			`}`

			`if flag == 0 {`
			`result = fs[len(fs)-1](r)`
			`}`
			`return result`

			`}`

add code to multiply beta distributions 2024-06-09 23:12:02 +00:00			`func Sample_serially(f func64, n_samples int) []float64 {`
			`xs := make([]float64, n_samples)`
fix correlation problem by using global variable 2024-06-10 01:08:10 +00:00			`// var global_r = rand.New(rand.NewPCG(uint64(1), uint64(2)))`
add code to multiply beta distributions 2024-06-09 23:12:02 +00:00			`for i := 0; i < n_samples; i++ {`
fix correlation problem by using global variable 2024-06-10 01:08:10 +00:00			`xs[i] = f(global_r)`
add code to multiply beta distributions 2024-06-09 23:12:02 +00:00			`}`
			`return xs`
			`}`

add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`func Sample_parallel(f func64, n_samples int) []float64 {`
			`var num_threads = 16`
			`var xs = make([]float64, n_samples)`
			`var wg sync.WaitGroup`
			`var h = n_samples / num_threads`
			`wg.Add(num_threads)`
			`for i := range num_threads {`
			`var xs_i = xs[ih : (i+1)h]`
			`go func(f func64) {`
			`defer wg.Done()`
			`var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))`
			`for i := range xs_i {`
			`xs_i[i] = f(r)`
			`}`
			`}(f)`
			`}`

			`wg.Wait()`
			`return xs`

			`}`
fix types 2024-06-09 20:46:08 +00:00
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`/*`
			`func main() {`

			`var p_a float64 = 0.8`
			`var p_b float64 = 0.5`
			`var p_c float64 = p_a * p_b`
			`ws := [4](float64){1 - p_c, p_c / 2, p_c / 4, p_c / 4}`

fix types 2024-06-09 20:46:08 +00:00			`Sample_0 := func(r Src) float64 { return 0 }`
			`Sample_1 := func(r Src) float64 { return 1 }`
			`Sample_few := func(r Src) float64 { return Sample_to(1, 3, r) }`
			`Sample_many := func(r Src) float64 { return Sample_to(2, 10, r) }`
			`fs := [4](func64){Sample_0, Sample_1, Sample_few, Sample_many}`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00
fix types 2024-06-09 20:46:08 +00:00			`model := func(r Src) float64 { return Sample_mixture(fs[0:], ws[0:], r) }`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`n_samples := 1_000_000`
fix types 2024-06-09 20:46:08 +00:00			`xs := Sample_parallel(model, n_samples)`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`var avg float64 = 0`
			`for _, x := range xs {`
			`avg += x`
			`}`
			`avg = avg / float64(n_samples)`
			`fmt.Printf("Average: %v\n", avg)`
			`/*`
			`// Without concurrency:`
			`n_samples := 1_000_000`
			`var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))`
			`var avg float64 = 0`
			`for i := 0; i < n_samples; i++ {`
fix types 2024-06-09 20:46:08 +00:00			`avg += Sample_mixture(fs[0:], ws[0:], r)`
add samplers from time to botec code 2024-06-09 12:21:01 +00:00			`}`
			`avg = avg / float64(n_samples)`
			`fmt.Printf("Average: %v\n", avg)`
			`}`
			`*/`