update with results
This commit is contained in:
parent
5c51b6a0a2
commit
207f77cdc7
57
README.md
Normal file
57
README.md
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# A Bayesian Nerd-Snipe
|
||||||
|
|
||||||
|
Consider the number of people you know who share your birthday. This seems an unbiased estimate of the number of people who, if they had been born the same day of the year as you, you'd know—just multiply by 365. That estimate itself is an estimate of how many people one knows at a somewhat non-superficial level of familiarity.
|
||||||
|
|
||||||
|
I asked my Twitter followers that question, and this is what they answered:
|
||||||
|
|
||||||
|
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">How many people do you know that were born in the same day of the year as you?</p>— Nuño Sempere (@NunoSempere) <a href="https://twitter.com/NunoSempere/status/1760381265272885485?ref_src=twsrc%5Etfw">February 21, 2024</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
|
||||||
|
|
||||||
|
Now, and here comes the nerd snipe: after seeing the results of that poll, what should my posterior estimate be for the distribution of how many people my pool of followers knows enough that they'd know their birthdays if they fell on the same day as one's own?
|
||||||
|
|
||||||
|
Here is a photo of two cats for those of my readers who don't want to be spoiled and want to sketch the solution before reading on.
|
||||||
|
|
||||||
|
![](https://gatitos.nunosempere.com/)
|
||||||
|
|
||||||
|
## The formal solution.
|
||||||
|
|
||||||
|
Consider distributions over how many people someone knows. Those distributions go from the natural numbers to a probability.
|
||||||
|
|
||||||
|
One example such distribution might be
|
||||||
|
|
||||||
|
| Number of people known | Probability |
|
||||||
|
| 10 | 5% |
|
||||||
|
| 21 | 96% |
|
||||||
|
| 1001 | 1% |
|
||||||
|
|
||||||
|
Now, consider the likelihood of getting the Twitter poll results given a given distribution. Multiply that by the prior for that distribution, normalize, and then integrate over distributions to get your final result.
|
||||||
|
|
||||||
|
## The practical solution
|
||||||
|
|
||||||
|
The above is computationally intractable, so we turn to Monte Carlo approximations and other shortcuts. After tinkering for a bit, I ended considering distributions over logspace, and considering only number of people in the set: 16, 32, 64, 128, 128, 256, 512, 1024, 2046.
|
||||||
|
|
||||||
|
You can see the code [here](https://git.nunosempere.com/NunoSempere/peopleprobs/src/branch/master/probppl.go). It's written in go because I've been recently been learning its syntax, and it's only [moderately slower than C](https://github.com/NunoSempere/time-to-botec/) in exchange for a nicer developer experience.
|
||||||
|
|
||||||
|
With that code, the posterior over the number of people my followers know stands as:
|
||||||
|
|
||||||
|
|
||||||
|
| Num people known | % |
|
||||||
|
| ---- | ----- |
|
||||||
|
| ~16 | 7.2% |
|
||||||
|
| ~32 | 8.6% |
|
||||||
|
| ~64 | 11.0% |
|
||||||
|
| ~128 | 15.7% |
|
||||||
|
| ~256 | 21.9% |
|
||||||
|
| ~512 | 22.6% |
|
||||||
|
| ~1024 | 9.5% |
|
||||||
|
| ~2048 | 3.4% |
|
||||||
|
|
||||||
|
## To do list
|
||||||
|
|
||||||
|
- [x] MVP of mappings
|
||||||
|
- [x] Try as cdfs: too much prob at the beginning
|
||||||
|
- [x] Try as pdfs: too evenly distributed
|
||||||
|
- [x] Try as pdfs over log space; 1, 2, 4, 8, 16, 32, ...
|
||||||
|
- Or maybe even more coarse, 1.x^n
|
||||||
|
- [x] Write blogpost
|
||||||
|
- [ ] Run model for ~half an hour
|
||||||
|
- [ ] Post blogpost
|
50
probppl.go
50
probppl.go
|
@ -66,15 +66,6 @@ func getMatchesDrawGivenNPeopleKnown(n int64, r src) int64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Draw 148 times
|
|
||||||
How many people do you know that were born in the same day of the year as you?
|
|
||||||
0: 46.6% | 69
|
|
||||||
1: 31.1% | 46
|
|
||||||
2: 12.8% | 19
|
|
||||||
≥3: 9.5% | 14
|
|
||||||
*/
|
|
||||||
|
|
||||||
func drawFromDistributionWithReplacement(d IntProbs, r src) int64 {
|
func drawFromDistributionWithReplacement(d IntProbs, r src) int64 {
|
||||||
pp := r.Float64()
|
pp := r.Float64()
|
||||||
sum := 0.0
|
sum := 0.0
|
||||||
|
@ -107,10 +98,14 @@ func draw148PplFromDistributionAndCheck(d IntProbs, r src, show bool) int64 {
|
||||||
person_i_num_birthday_matches := getMatchesDrawGivenNPeopleKnown(person_i_ppl_known, r)
|
person_i_num_birthday_matches := getMatchesDrawGivenNPeopleKnown(person_i_ppl_known, r)
|
||||||
count[person_i_num_birthday_matches]++
|
count[person_i_num_birthday_matches]++
|
||||||
}
|
}
|
||||||
// if (count[0] == 69) && (count[1] == 46) && (count[2] == 19) && (count[3] == 14) {
|
/*
|
||||||
if show {
|
Draw 148 times
|
||||||
// fmt.Println(count)
|
How many people do you know that were born in the same day of the year as you?
|
||||||
}
|
0: 46.6% | 69
|
||||||
|
1: 31.1% | 46
|
||||||
|
2: 12.8% | 19
|
||||||
|
≥3: 9.5% | 14
|
||||||
|
*/
|
||||||
if aboutEq(count[0], 69) && aboutEq(count[1], 46) && aboutEq(count[2], 19) && aboutEq(count[3], 14) {
|
if aboutEq(count[0], 69) && aboutEq(count[1], 46) && aboutEq(count[2], 19) && aboutEq(count[3], 14) {
|
||||||
return 1
|
return 1
|
||||||
} else {
|
} else {
|
||||||
|
@ -120,21 +115,17 @@ func draw148PplFromDistributionAndCheck(d IntProbs, r src, show bool) int64 {
|
||||||
|
|
||||||
func getUnnormalizedBayesianUpdateForDistribution(d IntProbs, r src) int64 {
|
func getUnnormalizedBayesianUpdateForDistribution(d IntProbs, r src) int64 {
|
||||||
var sum int64 = 0
|
var sum int64 = 0
|
||||||
n := 30_000
|
n := 10_000
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
/* if i%1000 == 0 {
|
|
||||||
fmt.Println(i)
|
|
||||||
} */
|
|
||||||
draw_result := draw148PplFromDistributionAndCheck(d, r, i == 0)
|
draw_result := draw148PplFromDistributionAndCheck(d, r, i == 0)
|
||||||
// fmt.Println(draw_result)
|
|
||||||
sum += draw_result
|
sum += draw_result
|
||||||
}
|
}
|
||||||
return sum // float64(sum) / float64(n)
|
return sum
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
n_dists := 30_000
|
n_dists := 100_000
|
||||||
var dists = make([]IntProbsWeights, n_dists)
|
var dists = make([]IntProbsWeights, n_dists)
|
||||||
|
|
||||||
// Prepare for concurrency
|
// Prepare for concurrency
|
||||||
|
@ -146,30 +137,29 @@ func main() {
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))
|
var r = rand.New(rand.NewPCG(uint64(i), uint64(i+1)))
|
||||||
|
|
||||||
for j := i * (n_dists / num_threads); j < (i+1)*(n_dists/num_threads); j++ {
|
for j := i * (n_dists / num_threads); j < (i+1)*(n_dists/num_threads); j++ {
|
||||||
|
|
||||||
|
// Get
|
||||||
people_known_distribution := generatePeopleKnownDistribution(r)
|
people_known_distribution := generatePeopleKnownDistribution(r)
|
||||||
result := getUnnormalizedBayesianUpdateForDistribution(people_known_distribution, r)
|
result := getUnnormalizedBayesianUpdateForDistribution(people_known_distribution, r)
|
||||||
/*
|
|
||||||
if i%10 == 0 {
|
|
||||||
fmt.Printf("%d/%d\n", i, n_dists)
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
if result > 0 {
|
if result > 0 {
|
||||||
dists[j] = IntProbsWeights{IntProbs: people_known_distribution, w: result}
|
dists[j] = IntProbsWeights{IntProbs: people_known_distribution, w: result}
|
||||||
|
} else {
|
||||||
|
dists[j] = IntProbsWeights{IntProbs: nil, w: 0} // make explicit that this is initialized to nil; we won't compute it later becasue weight is 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
// Now calculate the posterior
|
|
||||||
sum_weights := int64(0)
|
sum_weights := int64(0)
|
||||||
for _, dist := range dists {
|
for _, dist := range dists {
|
||||||
sum_weights += dist.w
|
sum_weights += dist.w
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Printf("\n| Num people known | %% |\n")
|
||||||
|
fmt.Printf("| ---- | ----- |\n")
|
||||||
for i := int64(16); i <= 2048; i *= 2 {
|
for i := int64(16); i <= 2048; i *= 2 {
|
||||||
p := 0.0
|
p := 0.0
|
||||||
for _, dist := range dists {
|
for _, dist := range dists {
|
||||||
|
@ -180,6 +170,6 @@ func main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p = p / float64(sum_weights)
|
p = p / float64(sum_weights)
|
||||||
fmt.Printf("%d: %f\n", i, p)
|
fmt.Printf("| %4d | %4.1f%% | \n", i, p*100)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
15
record.txt
Normal file
15
record.txt
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
make[1]: Entering directory '/home/loki/Documents/core/software/fresh/go/probability-people'
|
||||||
|
go build -o probppl
|
||||||
|
./probppl
|
||||||
|
|
||||||
|
| Num people known | % |
|
||||||
|
| ---- | ----- |
|
||||||
|
| 16 | 7.2% |
|
||||||
|
| 32 | 8.6% |
|
||||||
|
| 64 | 11.0% |
|
||||||
|
| 128 | 15.7% |
|
||||||
|
| 256 | 21.9% |
|
||||||
|
| 512 | 22.6% |
|
||||||
|
| 1024 | 9.5% |
|
||||||
|
| 2048 | 3.4% |
|
||||||
|
make[1]: Leaving directory '/home/loki/Documents/core/software/fresh/go/probability-people'
|
Loading…
Reference in New Issue
Block a user