diff --git a/main.go b/main.go index d243e61..9916ee2 100644 --- a/main.go +++ b/main.go @@ -163,6 +163,7 @@ func readStates() ([]State, error) { // Add the aggregated poll data to the respective states for state_name, polls := range state_polls_map { + // Filter polls by recency and by having both Biden and Trump var recent_polls []Poll for _, poll := range polls { if poll.Date.After(time.Now().AddDate(0, 0, -30)) { @@ -218,19 +219,52 @@ func getProbabilityAboveX(x float64, mean float64, std float64) float64 { return 1 - getNormalCDF(x, mean, std) } -func getChanceCandidateWinsFromPoll(candidate_p float64, poll_sample_size float64) float64 { +func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 { std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll return getProbabilityAboveX(0.5, candidate_p, std) } -/* Sample state by state */ -func sampleFromState(state State) VotesForEachParty { - if dev { +func getChanceRepublicanWinFromPoll(poll Poll) float64 { + + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") + } + biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it + trump_share := trump_percentage / 100.0 + + normalized_trump_share := trump_share / (trump_share + biden_share) + normalized_biden_share := biden_share / (trump_share + biden_share) + + joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) + std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) + + p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean) + return p_republican_win + +} + +/* Print state by state data */ +func printStates(states []State) { + for _, state := range states { fmt.Printf("\n\nState: %s", state.Name) fmt.Printf("\n\tVotes: %d", state.Votes) fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory) // fmt.Printf("\n\tPolls: %s", state.Polls) + + for _, poll := range state.Polls { + p_republican_win_poll := getChanceRepublicanWinFromPoll(poll) + fmt.Printf("\n\tPoll: %+v", poll) + fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win_poll) + } + } + +} + +/* Sample state by state */ +func sampleFromState(state State) VotesForEachParty { switch state.Name { case "Nebraska": /* @@ -277,48 +311,31 @@ func sampleFromState(state State) VotesForEachParty { /* Consider polls */ num_biden_votes := 0.0 num_trump_votes := 0.0 - for _, recent_biden_trump_poll := range state.Polls { - biden_share := 0.0 - trump_share := 0.0 - for candidate_name, candidate_percentage := range recent_biden_trump_poll.PollResults { - if candidate_name == "Biden" { - biden_share = candidate_percentage / 100 - } else if candidate_name == "Trump" { - trump_share = candidate_percentage / 100 - } - } - sample_size := float64(recent_biden_trump_poll.SampleSize) - poll_biden_votes := biden_share * sample_size - poll_trump_votes := trump_share * sample_size - joint_trump_biden_sample_size := poll_biden_votes + poll_trump_votes - normalized_trump_share := trump_share / (trump_share + biden_share) - normalized_biden_share := biden_share / (trump_share + biden_share) - std_poll := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) + for _, poll := range state.Polls { + // p_republican_win_poll = getChanceRepublicanWinFromPoll(poll) - p_trump_more_votes := getProbabilityAboveX(0.5, normalized_trump_share, std_poll) - if dev { - fmt.Printf("\n\tPoll: %+v", recent_biden_trump_poll) - fmt.Printf("\n\t\tPoll says chance of R win: %f", p_trump_more_votes) + biden_percentage, biden_exists := poll.PollResults["Biden"] + trump_percentage, trump_exists := poll.PollResults["Trump"] + if !biden_exists || !trump_exists { + panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") } - - // Update general tally - num_biden_votes += poll_biden_votes - num_trump_votes += poll_trump_votes + num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) + num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) } - total_sample_size := num_biden_votes + num_trump_votes - if total_sample_size != 0.0 { + + aggregate_sample_size := num_biden_votes + num_trump_votes + if aggregate_sample_size != 0.0 { aggregate_trump_share := num_trump_votes / (num_trump_votes + num_biden_votes) aggregate_biden_share := num_biden_votes / (num_trump_votes + num_biden_votes) - std_all_polls := math.Sqrt((aggregate_trump_share * aggregate_biden_share) / total_sample_size) - - p_republican_according_to_polls := getProbabilityAboveX(0.5, aggregate_trump_share, std_all_polls) + std_mean_aggregate_polls := math.Sqrt((aggregate_trump_share * aggregate_biden_share) / aggregate_sample_size) + p_republican_aggregate_polls := getProbabilityAboveX(0.5, aggregate_trump_share, std_mean_aggregate_polls) if dev { - fmt.Printf("\n\tAggregating all polls naïvely says chance of R win: %f", p_republican_according_to_polls) + fmt.Printf("\n\tAggregating all polls naïvely says chance of R win: %f", p_republican_aggregate_polls) } weight_polls := 1.0 - p_republican = weight_polls*p_republican_according_to_polls + (1.0-weight_polls)*p_baserate_republican + p_republican = weight_polls*p_republican_aggregate_polls + (1.0-weight_polls)*p_baserate_republican }