From cbda7b1e5c1bbd68ae7263ffc59be4baedca6fa6 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sun, 14 Apr 2024 21:42:38 -0400 Subject: [PATCH] move glue code function to the end --- main.go | 562 +++++++++++++++++++++++++++----------------------------- 1 file changed, 275 insertions(+), 287 deletions(-) diff --git a/main.go b/main.go index 36e0d58..569cd4e 100644 --- a/main.go +++ b/main.go @@ -34,9 +34,283 @@ type Poll struct { /* Globals */ var r = rand.New(rand.NewPCG(uint64(100), uint64(2224))) -var dev = false + +/* Sampling helper functions */ +func getNormalCDF(x float64, mean float64, std float64) float64 { + erf_term := (x - mean) / (std * math.Sqrt2) + return (1 + math.Erf(erf_term)) / 2 +} + +func getProbabilityAboveX(x float64, mean float64, std float64) float64 { + return 1 - getNormalCDF(x, mean, std) +} + +func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 { + std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll + return getProbabilityAboveX(0.5, candidate_p, std) +} + +func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 { + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") + } + biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it + trump_share := trump_percentage / 100.0 + + normalized_trump_share := trump_share / (trump_share + biden_share) + normalized_biden_share := biden_share / (trump_share + biden_share) + + joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) + std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) + + p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean) + + if pretty_print { + fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size) + fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share) + fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean) + fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win) + } + return p_republican_win +} + +func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 { + // Uncertainty from the state + n_republican_win := 0 + for _, party := range state.PresidentialElectoralHistory { + if party == "R" { + n_republican_win++ + } + } + + // Get the uncertainty from the poll + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") + } + biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it + trump_share := trump_percentage / 100.0 + + normalized_trump_share := trump_share / (trump_share + biden_share) + normalized_biden_share := biden_share / (trump_share + biden_share) + + joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) + std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) + + /* Inject additional uncertainty */ + /* + Possible factors: + - National drift between now and the election (biggest one) + - States more uncertain than the national average + - Idiosyncratic factors + - Polls not being as good as gallup + - Increased polarization + Also note that the polls already have some error already + */ + std_additional_uncertainty := 5.0 / 100.0 + + if n_republican_win == 0 || n_republican_win == 6 { + // if solid states for the last 6 elections + std_additional_uncertainty = std_additional_uncertainty / 3.0 + + if pretty_print { + fmt.Printf("\n\t\tN republican wins: %d", n_republican_win) + fmt.Printf("\n\t\t=> Reducing additional uncertainty") + } + } + + std_error := std_error_poll_mean + std_additional_uncertainty + + // std_error := std_error_poll_mean + 0.065 + p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error) + + if pretty_print { + fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error) + fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win) + } + return p_republican_win +} + +/* Print state by state data */ +func printStates(states []State) { + for _, state := range states { + fmt.Printf("\n\nState: %s", state.Name) + fmt.Printf("\n\tVotes: %d", state.Votes) + fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory) + + p_baserate_republican_win := 0.0 + for _, party := range state.PresidentialElectoralHistory { + if party == "R" { + p_baserate_republican_win++ + } + } + fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory))) + + // Individual poll + for _, poll := range state.Polls { + fmt.Printf("\n\tPoll: %+v", poll) + _ = getChanceRepublicanWinFromPoll(poll, true) + _ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true) + } + + // Aggregate poll + num_biden_votes := 0.0 + num_trump_votes := 0.0 + for _, poll := range state.Polls { + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") + } + num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) + num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) + } + aggregate_sample_size := num_biden_votes + num_trump_votes + if aggregate_sample_size != 0.0 { + var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} + aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size + aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size + + fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll) + _ = getChanceRepublicanWinFromPoll(aggregate_poll, true) + _ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true) + } + } +} + +/* Sample state by state */ +func sampleFromState(state State) VotesForEachParty { + switch state.Name { + case "Nebraska": + /* + 2000: R + 2004: R + 2008: Split, 1 D, 4 R + 2012: R + 2016: R + 2020: Split, 1 D, 4 R + */ + p_split := 2.0 / 6.0 + if r.Float64() < p_split { + return VotesForEachParty{Democrats: 1, Republicans: 4} + } else { + return VotesForEachParty{Democrats: 0, Republicans: 5} + } + case "Maine": + /* + 2000: D + 2004: D + 2008: D + 2012: D + 2016: Split: 3 D, 1 R + 2020: Split, 3 D, 1 R + */ + p_split := 2.0 / 6.0 + if r.Float64() < p_split { + return VotesForEachParty{Democrats: 3, Republicans: 1} + } else { + return VotesForEachParty{Democrats: 1, Republicans: 0} + } + default: + { + /* Consider the base rate for the state */ + p_baserate_republican_win := 0.0 + for _, party := range state.PresidentialElectoralHistory { + if party == "R" { + p_baserate_republican_win++ + } + } + p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory)) + p_republican_win := p_baserate_republican_win // if no polls + + /* Consider polls */ + num_biden_votes := 0.0 + num_trump_votes := 0.0 + for _, poll := range state.Polls { + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") + } + num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) + num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) + } + + aggregate_sample_size := num_biden_votes + num_trump_votes + if aggregate_sample_size != 0.0 { + var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} + aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size + aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size + + p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false) + // p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false) + + // weight_polls := 0.75 + // p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win + p_republican_win = p_republican_win_aggregate_polls + } + + if r.Float64() < p_republican_win { + return VotesForEachParty{Democrats: 0, Republicans: state.Votes} + } else { + return VotesForEachParty{Democrats: state.Votes, Republicans: 0} + } + } + } +} + +/* Simulate election */ +func simulateElection(states []State) int { + republican_seats := 0 + for _, state := range states { + election_sample := sampleFromState(state) + republican_seats += election_sample.Republicans + } + return republican_seats +} + +/* Histogram */ +func barString(n int) string { + str := "" + for i := 0; i < n; i++ { + str += "█" + } + return str +} + +func printElectoralCollegeHistogram(samples []int) { + histogram := [538]int{} + for _, sample := range samples { + histogram[sample]++ + } + + max_count := 0 + for _, count := range histogram { + if count > max_count { + max_count = count + } + } + + cp := 0.0 + for i, count := range histogram { + bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters + p := float64(count) / float64(len(samples)) * 100 + cp += p + + if i > 130 && i < 400 { + fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp) + } else if p >= 0.01 { + fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n") + + } + } +} /* Load data from csvs */ +// Glue code func readStates() ([]State, error) { var states map[string]State = make(map[string]State) @@ -205,294 +479,9 @@ func readStates() ([]State, error) { for _, state := range states { states_slice = append(states_slice, state) } - return states_slice, nil } -/* Sampling helper functions */ -func getNormalCDF(x float64, mean float64, std float64) float64 { - erf_term := (x - mean) / (std * math.Sqrt2) - return (1 + math.Erf(erf_term)) / 2 -} - -func getProbabilityAboveX(x float64, mean float64, std float64) float64 { - return 1 - getNormalCDF(x, mean, std) -} - -func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 { - std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll - return getProbabilityAboveX(0.5, candidate_p, std) -} - -func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 { - - biden_percentage, biden_exists := poll.PollResults["Biden"] - trump_percentage, trump_exists := poll.PollResults["Trump"] - if !biden_exists || !trump_exists { - panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") - } - biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it - trump_share := trump_percentage / 100.0 - - normalized_trump_share := trump_share / (trump_share + biden_share) - normalized_biden_share := biden_share / (trump_share + biden_share) - - joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) - std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) - - p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean) - - if pretty_print { - fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size) - fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share) - fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean) - fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win) - } - return p_republican_win - -} - -func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 { - - // Uncertainty from the state - n_republican_win := 0 - for _, party := range state.PresidentialElectoralHistory { - if party == "R" { - n_republican_win++ - } - } - - // Get the uncertainty from the poll - biden_percentage, biden_exists := poll.PollResults["Biden"] - trump_percentage, trump_exists := poll.PollResults["Trump"] - if !biden_exists || !trump_exists { - panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") - } - biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it - trump_share := trump_percentage / 100.0 - - normalized_trump_share := trump_share / (trump_share + biden_share) - normalized_biden_share := biden_share / (trump_share + biden_share) - - joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) - std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) - - /* Inject additional uncertainty */ - /* - Possible factors: - - National drift between now and the election (biggest one) - - States more uncertain than the national average - - Idiosyncratic factors - - Polls not being as good as gallup - - Increased polarization - Also note that the polls already have some error already - */ - std_additional_uncertainty := 5.0 / 100.0 - - if n_republican_win == 0 || n_republican_win == 6 { - // if solid states for the last 6 elections - std_additional_uncertainty = std_additional_uncertainty / 3.0 - - if pretty_print { - fmt.Printf("\n\t\tN republican wins: %d", n_republican_win) - fmt.Printf("\n\t\t=> Reducing additional uncertainty") - } - - } - - std_error := std_error_poll_mean + std_additional_uncertainty - - // std_error := std_error_poll_mean + 0.065 - p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error) - - if pretty_print { - fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error) - fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win) - } - return p_republican_win - -} - -/* Print state by state data */ -func printStates(states []State) { - for _, state := range states { - fmt.Printf("\n\nState: %s", state.Name) - fmt.Printf("\n\tVotes: %d", state.Votes) - fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory) - - p_baserate_republican_win := 0.0 - for _, party := range state.PresidentialElectoralHistory { - if party == "R" { - p_baserate_republican_win++ - } - } - fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory))) - - // Individual poll - for _, poll := range state.Polls { - fmt.Printf("\n\tPoll: %+v", poll) - _ = getChanceRepublicanWinFromPoll(poll, true) - _ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true) - } - - // Aggregate poll - num_biden_votes := 0.0 - num_trump_votes := 0.0 - for _, poll := range state.Polls { - biden_percentage, biden_exists := poll.PollResults["Biden"] - trump_percentage, trump_exists := poll.PollResults["Trump"] - if !biden_exists || !trump_exists { - panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") - } - num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) - num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) - } - aggregate_sample_size := num_biden_votes + num_trump_votes - if aggregate_sample_size != 0.0 { - var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} - aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size - aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size - - fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll) - _ = getChanceRepublicanWinFromPoll(aggregate_poll, true) - _ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true) - } - - } - -} - -/* Sample state by state */ -func sampleFromState(state State) VotesForEachParty { - switch state.Name { - case "Nebraska": - /* - 2000: R - 2004: R - 2008: Split, 1 D, 4 R - 2012: R - 2016: R - 2020: Split, 1 D, 4 R - */ - p_split := 2.0 / 6.0 - if r.Float64() < p_split { - return VotesForEachParty{Democrats: 1, Republicans: 4} - } else { - return VotesForEachParty{Democrats: 0, Republicans: 5} - } - case "Maine": - /* - 2000: D - 2004: D - 2008: D - 2012: D - 2016: Split: 3 D, 1 R - 2020: Split, 3 D, 1 R - */ - p_split := 2.0 / 6.0 - if r.Float64() < p_split { - return VotesForEachParty{Democrats: 3, Republicans: 1} - } else { - return VotesForEachParty{Democrats: 1, Republicans: 0} - } - default: - { - /* Consider the base rate for the state */ - p_baserate_republican_win := 0.0 - for _, party := range state.PresidentialElectoralHistory { - if party == "R" { - p_baserate_republican_win++ - } - } - p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory)) - p_republican_win := p_baserate_republican_win // if no polls - - /* Consider polls */ - num_biden_votes := 0.0 - num_trump_votes := 0.0 - for _, poll := range state.Polls { - biden_percentage, biden_exists := poll.PollResults["Biden"] - trump_percentage, trump_exists := poll.PollResults["Trump"] - if !biden_exists || !trump_exists { - panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") - } - num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) - num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) - } - - aggregate_sample_size := num_biden_votes + num_trump_votes - if aggregate_sample_size != 0.0 { - var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} - aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size - aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size - - p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false) - // p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false) - - // weight_polls := 0.75 - // p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win - p_republican_win = p_republican_win_aggregate_polls - } - - if r.Float64() < p_republican_win { - return VotesForEachParty{Democrats: 0, Republicans: state.Votes} - } else { - return VotesForEachParty{Democrats: state.Votes, Republicans: 0} - } - } - } -} - -/* Simulate election */ -func simulateElection(states []State) int { - - republican_seats := 0 - for _, state := range states { - election_sample := sampleFromState(state) - republican_seats += election_sample.Republicans - } - return republican_seats -} - -/* Histogram */ -func barString(n int) string { - str := "" - for i := 0; i < n; i++ { - str += "█" - } - return str -} - -func printElectoralCollegeHistogram(samples []int) { - - histogram := [538]int{} - for _, sample := range samples { - histogram[sample]++ - } - - max_count := 0 - for _, count := range histogram { - if count > max_count { - max_count = count - } - } - - cp := 0.0 - for i, count := range histogram { - bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters - p := float64(count) / float64(len(samples)) * 100 - cp += p - - if i > 130 && i < 400 { - fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp) - } else if p >= 0.01 { - fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n") - - } - } - -} - func main() { states, err := readStates() if err != nil { @@ -518,5 +507,4 @@ func main() { p_republicans = p_republicans / float64(n_sims) fmt.Printf("\n%% republicans: %f\n", p_republicans) - }