diff --git a/README.md b/README.md index de403ee..6989f5d 100644 --- a/README.md +++ b/README.md @@ -48,3 +48,5 @@ Remedy: consider the conditional probabilities? But how? Or, relax assumptions u - [ ] Exclude polls older than one month? - [ ] Exclude partisan polls - [ ] ... + +https://stats.stackexchange.com/questions/274211/calculating-the-probability-of-someone-winning-from-a-poll diff --git a/main.go b/main.go index 131fe70..fd47c03 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( "encoding/csv" "fmt" + "math" rand "math/rand/v2" "os" "strconv" @@ -30,8 +31,6 @@ type Poll struct { Date time.Time } -// type src = *rand.Rand - /* Globals */ var r = rand.New(rand.NewPCG(uint64(100), uint64(2224))) @@ -186,6 +185,22 @@ func readStates() ([]State, error) { return states_slice, nil } +/* Sampling helper functions */ +func getNormalCDF(x float64, mean float64, std float64) float64 { + erf_term := (x - mean) / (std * math.Sqrt2) + return (1 + math.Erf(erf_term)) / 2 +} + +func getProbabilityAboveX(x float64, mean float64, std float64) float64 { + return 1 - getNormalCDF(x, mean, std) +} + +func getChanceCandidateWinsFromPoll(candidate_p float64, poll_sample_size float64) float64 { + std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll + return getProbabilityAboveX(0.5, candidate_p, std) +} + +/* Sample state by state */ func sampleFromState(state State) VotesForEachParty { fmt.Printf("\n\nState: %s", state.Name) fmt.Printf("\n\tVotes: %d", state.Votes) @@ -256,25 +271,40 @@ func sampleFromState(state State) VotesForEachParty { if has_biden && has_trump { recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll) } - fmt.Printf("\n\tPoll: %+v", recent_poll) } num_biden_votes := 0.0 num_trump_votes := 0.0 for _, recent_biden_trump_poll := range recent_biden_trump_polls { - biden_percentage := 0.0 - trump_percentage := 0.0 + biden_share := 0.0 + trump_share := 0.0 for candidate_name, candidate_percentage := range recent_biden_trump_poll.PollResults { if candidate_name == "Biden" { - biden_percentage = candidate_percentage + biden_share = candidate_percentage / 100 } else if candidate_name == "Trump" { - trump_percentage = candidate_percentage + trump_share = candidate_percentage / 100 } } - num_biden_votes += biden_percentage * float64(recent_biden_trump_poll.SampleSize) - num_trump_votes += trump_percentage * float64(recent_biden_trump_poll.SampleSize) - } + sample_size := float64(recent_biden_trump_poll.SampleSize) + poll_biden_votes := biden_share * sample_size + poll_trump_votes := trump_share * sample_size + joint_trump_biden_sample_size := poll_biden_votes + poll_trump_votes + normalized_trump_share := trump_share / (trump_share + biden_share) + normalized_biden_share := biden_share / (trump_share + biden_share) + std_poll := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) + p_trump_more_votes := getProbabilityAboveX(0.5, normalized_trump_share, std_poll) + + fmt.Printf("\n\tPoll: %+v", recent_biden_trump_poll) + fmt.Printf("\n\t\tChance of R win: %f", p_trump_more_votes) + + // Update general tally + num_biden_votes += poll_biden_votes + num_trump_votes += poll_trump_votes + } + // total_sample_size := num_biden_votes + num_trump_votes + + fmt.Println("") if r.Float64() < p_republican { return VotesForEachParty{Democrats: 0, Republicans: state.Votes} } else {