package main import ( "encoding/csv" "fmt" "math" rand "math/rand/v2" "os" "strconv" "time" // "strings" ) /* Structs */ type State struct { Name string Votes int PresidentialElectoralHistory map[string]string Polls []Poll } type VotesForEachParty struct { Democrats int Republicans int } type Poll struct { PollId string SampleSize int PollResults map[string]float64 Date time.Time Partisan string } /* Globals */ var r = rand.New(rand.NewPCG(uint64(100), uint64(2224))) var dem_nominee_name = "Biden" var rep_nominee_name = "Trump" /* Sampling helper functions */ func getNormalCDF(x float64, mean float64, std float64) float64 { erf_term := (x - mean) / (std * math.Sqrt2) return (1 + math.Erf(erf_term)) / 2 } func getProbabilityAboveX(x float64, mean float64, std float64) float64 { return 1 - getNormalCDF(x, mean, std) } func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 { std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll return getProbabilityAboveX(0.5, candidate_p, std) } func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 { biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] if !biden_exists || !trump_exists { panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") } biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it trump_share := trump_percentage / 100.0 normalized_trump_share := trump_share / (trump_share + biden_share) normalized_biden_share := biden_share / (trump_share + biden_share) joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean) if pretty_print { fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size) fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share) fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean) fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win) } return p_republican_win } func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 { // Uncertainty from the state n_republican_win := 0 for _, party := range state.PresidentialElectoralHistory { if party == "R" { n_republican_win++ } } // Get the uncertainty from the poll biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] if !biden_exists || !trump_exists { panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") } biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it trump_share := trump_percentage / 100.0 normalized_trump_share := trump_share / (trump_share + biden_share) normalized_biden_share := biden_share / (trump_share + biden_share) joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) /* Inject additional uncertainty */ /* Possible factors: - National drift between now and the election (biggest one) - States more uncertain than the national average - Idiosyncratic factors - Polls not being as good as gallup - Increased polarization Also note that the polls already have some error already */ std_additional_uncertainty := 4.0 / 100.0 if n_republican_win == 0 || n_republican_win == 6 { // if solid states for the last 6 elections std_additional_uncertainty = std_additional_uncertainty / 3.0 if pretty_print { fmt.Printf("\n\t\tN republican wins: %d", n_republican_win) fmt.Printf("\n\t\t=> Reducing additional uncertainty") } } std_error := std_error_poll_mean + std_additional_uncertainty // std_error := std_error_poll_mean + 0.065 p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error) if pretty_print { fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error) fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win) } return p_republican_win } /* Print state by state data */ func printStates(states []State) { for _, state := range states { fmt.Printf("\n\nState: %s", state.Name) fmt.Printf("\n\tVotes: %d", state.Votes) fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory) p_baserate_republican_win := 0.0 for _, party := range state.PresidentialElectoralHistory { if party == "R" { p_baserate_republican_win++ } } fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory))) // Individual poll for _, poll := range state.Polls { fmt.Printf("\n\tPoll: %+v", poll) _ = getChanceRepublicanWinFromPoll(poll, true) _ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true) } // Aggregate poll num_biden_votes := 0.0 num_trump_votes := 0.0 for _, poll := range state.Polls { biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] if !biden_exists || !trump_exists { panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") } num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) } aggregate_sample_size := num_biden_votes + num_trump_votes if aggregate_sample_size != 0.0 { var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll) _ = getChanceRepublicanWinFromPoll(aggregate_poll, true) _ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true) } } } /* Sample state by state */ func sampleFromState(state State) VotesForEachParty { switch state.Name { case "Nebraska": /* 2000: R 2004: R 2008: Split, 1 D, 4 R 2012: R 2016: R 2020: Split, 1 D, 4 R */ p_split := 2.0 / 6.0 if r.Float64() < p_split { return VotesForEachParty{Democrats: 1, Republicans: 4} } else { return VotesForEachParty{Democrats: 0, Republicans: 5} } case "Maine": /* 2000: D 2004: D 2008: D 2012: D 2016: Split: 3 D, 1 R 2020: Split, 3 D, 1 R */ p_split := 2.0 / 6.0 if r.Float64() < p_split { return VotesForEachParty{Democrats: 3, Republicans: 1} } else { return VotesForEachParty{Democrats: 1, Republicans: 0} } default: { /* Consider the base rate for the state */ p_baserate_republican_win := 0.0 for _, party := range state.PresidentialElectoralHistory { if party == "R" { p_baserate_republican_win++ } } p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory)) p_republican_win := p_baserate_republican_win // if no polls /* Consider polls */ num_biden_votes := 0.0 num_trump_votes := 0.0 for _, poll := range state.Polls { biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] if !biden_exists || !trump_exists { panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") } num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) } aggregate_sample_size := num_biden_votes + num_trump_votes if aggregate_sample_size != 0.0 { var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false) // p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false) weight_polls := 0.75 p_republican_win = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win // p_republican_win = p_republican_win_aggregate_polls } if r.Float64() < p_republican_win { return VotesForEachParty{Democrats: 0, Republicans: state.Votes} } else { return VotesForEachParty{Democrats: state.Votes, Republicans: 0} } } } } /* Simulate election */ func simulateElection(states []State) int { republican_seats := 0 for _, state := range states { election_sample := sampleFromState(state) republican_seats += election_sample.Republicans } return republican_seats } /* Histogram */ func barString(n int) string { str := "" for i := 0; i < n; i++ { str += "█" } return str } func printElectoralCollegeHistogram(samples []int) { histogram := [538]int{} for _, sample := range samples { histogram[sample]++ } max_count := 0 for _, count := range histogram { if count > max_count { max_count = count } } cp := 0.0 for i, count := range histogram { bar_length := (count * 100) / max_count // Assuming max_count bar length is 50 characters. 75. p := float64(count) / float64(len(samples)) * 100 cp += p if i > 130 && i < 538 { fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp) } else if p >= 0.01 { fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n") } } } /* Load data from csvs */ // Glue code func readStates() ([]State, error) { var states map[string]State = make(map[string]State) /* Electoral college votes for the 2024 election*/ votes_file, err := os.Open("data/num-electors/electoral-college-votes.csv") // votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv") if err != nil { return nil, fmt.Errorf("error opening the votes file: %v", err) } defer votes_file.Close() votes_reader := csv.NewReader(votes_file) if _, err := votes_reader.Read(); err != nil { // Skip header return nil, fmt.Errorf("error reading votes header: %v", err) } for { csv_record, err := votes_reader.Read() if err != nil { break // EOF or an error } votes, err := strconv.Atoi(csv_record[1]) if err != nil { continue // Error in converting votes, skip this record } state := csv_record[0] if _, exists := states[state]; !exists { states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)} } } /* Election results */ var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"} for _, year := range years { electoral_history_filename := fmt.Sprintf("data/electoral-history/%s.csv", year) electoral_history_file, err := os.Open(electoral_history_filename) if err != nil { return nil, fmt.Errorf("error opening the electoral_history file for %s: %v", year, err) } electoral_history_reader := csv.NewReader(electoral_history_file) if _, err := electoral_history_reader.Read(); err != nil { // Skip header return nil, fmt.Errorf("error reading electoral_history header for %s: %v", year, err) } for { record, err := electoral_history_reader.Read() if err != nil { break // EOF or an error } state, party := record[0], record[1] data, exists := states[state] if !exists { continue // State not found in votes map, skip } // Update the party winning in the specific year data.PresidentialElectoralHistory[year] = party states[state] = data } electoral_history_file.Close() } /* Read polls */ polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path if err != nil { return nil, fmt.Errorf("error opening the polls file: %v", err) } defer polls_file.Close() // Using a temporary map to group poll results by state and poll ID state_polls_map := make(map[string]map[string]Poll) polls_reader := csv.NewReader(polls_file) _, err = polls_reader.Read() // Skip the header if err != nil { return nil, fmt.Errorf("error reading polls header: %v", err) } for { record, err := polls_reader.Read() if err != nil { break // EOF or an error } poll_id := record[0] state_name := record[12] end_date := record[14] partisan := record[32] candidate_name := record[44] date_layout := "1/2/06" parsed_date, err := time.Parse(date_layout, end_date) if err != nil { fmt.Println("Error parsing date: ", err) } sample_size, err := strconv.Atoi(record[22]) if err != nil { continue // If error, skip this record } percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column if err != nil { fmt.Printf("Error parsing percentage") continue // If error, skip this record } if _, exists := state_polls_map[state_name]; !exists { state_polls_map[state_name] = make(map[string]Poll) } poll, exists := state_polls_map[state_name][poll_id] if !exists { poll = Poll{ PollId: poll_id, SampleSize: sample_size, PollResults: make(map[string]float64), Date: parsed_date, Partisan: partisan, } } poll.PollResults[candidate_name] = percentage state_polls_map[state_name][poll_id] = poll } // Add the aggregated poll data to the respective states for state_name, polls := range state_polls_map { // Filter polls by recency and by having both Biden and Trump var recent_polls []Poll for _, poll := range polls { if poll.Date.After(time.Now().AddDate(0, 0, -30)) { recent_polls = append(recent_polls, poll) } } var recent_biden_trump_polls []Poll for _, recent_poll := range recent_polls { has_biden := false has_trump := false for candidate_name, _ := range recent_poll.PollResults { if candidate_name == dem_nominee_name { has_biden = true } else if candidate_name == rep_nominee_name { has_trump = true } } if has_biden && has_trump { recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll) } } if state, exists := states[state_name]; exists { state.Polls = recent_biden_trump_polls states[state_name] = state // Not redundant } else { // fmt.Printf("Encountered new state: %s\n", state_name) /* states[state_name] = State{ Name: state_name, Polls: polls_slice, } */ } } // Convert statesData map to a slice for returning var states_slice []State for _, state := range states { states_slice = append(states_slice, state) } return states_slice, nil } func main() { states, err := readStates() if err != nil { fmt.Println("Error:", err) return } n_sims := 1_000_000 printStates(states) fmt.Printf("\n\n") p_republicans := 0.0 results := make([]int, n_sims) for i := 0; i < n_sims; i++ { republican_seats := simulateElection(states) results[i] = republican_seats if republican_seats >= 270 { p_republicans++ } } printElectoralCollegeHistogram(results) p_republicans = p_republicans / float64(n_sims) fmt.Printf("\n%% republicans: %f\n", p_republicans) }