2024-election-modelling/main.go

package main

import (
	"encoding/csv"
	"fmt"
	"math"
	rand "math/rand/v2"
	"os"
	"strconv"
	"time"
	// "strings"
)

/* Structs */
type State struct {
	Name                         string
	Votes                        int
	PresidentialElectoralHistory map[string]string
	Polls                        []Poll
}

type VotesForEachParty struct {
	Democrats   int
	Republicans int
}

type Poll struct {
	PollId      string
	SampleSize  int
	PollResults map[string]float64
	Date        time.Time
	Partisan    string
}

/* Globals */
var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
var dev = true

/* Load data from csvs */
func readStates() ([]State, error) {
	var states map[string]State = make(map[string]State)

	/* Electoral college votes for the 2024 election*/
	votes_file, err := os.Open("data/electoral-college-votes.csv")
	// votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv")
	if err != nil {
		return nil, fmt.Errorf("error opening the votes file: %v", err)
	}
	defer votes_file.Close()

	votes_reader := csv.NewReader(votes_file)
	if _, err := votes_reader.Read(); err != nil { // Skip header
		return nil, fmt.Errorf("error reading votes header: %v", err)
	}
	for {
		csv_record, err := votes_reader.Read()
		if err != nil {
			break // EOF or an error
		}
		votes, err := strconv.Atoi(csv_record[1])
		if err != nil {
			continue // Error in converting votes, skip this record
		}
		state := csv_record[0]
		if _, exists := states[state]; !exists {
			states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)}
		}
	}

	/* Election results */
	var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"}
	for _, year := range years {
		results_filename := fmt.Sprintf("data/results/%s.csv", year)
		results_file, err := os.Open(results_filename)
		if err != nil {
			return nil, fmt.Errorf("error opening the results file for %s: %v", year, err)
		}
		resultsReader := csv.NewReader(results_file)
		if _, err := resultsReader.Read(); err != nil { // Skip header
			return nil, fmt.Errorf("error reading results header for %s: %v", year, err)
		}
		for {
			record, err := resultsReader.Read()
			if err != nil {
				break // EOF or an error
			}
			state, party := record[0], record[1]
			data, exists := states[state]
			if !exists {
				continue // State not found in votes map, skip
			}
			// Update the party winning in the specific year
			data.PresidentialElectoralHistory[year] = party
			states[state] = data
		}

		results_file.Close()
	}

	/* Read polls */
	polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path
	if err != nil {
		return nil, fmt.Errorf("error opening the polls file: %v", err)
	}
	defer polls_file.Close()

	// Using a temporary map to group poll results by state and poll ID
	tmp_polls := make(map[string]map[string]Poll)

	polls_reader := csv.NewReader(polls_file)
	_, err = polls_reader.Read() // Skip the header
	if err != nil {
		return nil, fmt.Errorf("error reading polls header: %v", err)
	}

	for {
		record, err := polls_reader.Read()
		if err != nil {
			break // EOF or an error
		}

		poll_id := record[0]
		state_name := record[12]
		end_date := record[14]
		partisan := record[32]
		candidate_name := record[44]

		date_layout := "1/2/06"
		parsed_date, err := time.Parse(date_layout, end_date)
		if err != nil {
			fmt.Println("Error parsing date: ", err)
		}

		sample_size, err := strconv.Atoi(record[22])
		if err != nil {
			continue // If error, skip this record
		}

		percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column
		if err != nil {
			fmt.Printf("Error parsing percentage")
			continue // If error, skip this record
		}

		if _, exists := tmp_polls[state_name]; !exists {
			tmp_polls[state_name] = make(map[string]Poll)
		}

		poll, exists := tmp_polls[state_name][poll_id]
		if !exists {
			poll = Poll{
				PollId:      poll_id,
				SampleSize:  sample_size,
				PollResults: make(map[string]float64),
				Date:        parsed_date,
				Partisan:    partisan,
			}
		}
		poll.PollResults[candidate_name] = percentage
		tmp_polls[state_name][poll_id] = poll
	}

	// Add the aggregated poll data to the respective states
	for state_name, polls := range tmp_polls {
		var pollsSlice []Poll
		for _, poll := range polls {
			pollsSlice = append(pollsSlice, poll)
		}
		if state, exists := states[state_name]; exists {
			state.Polls = pollsSlice
			states[state_name] = state // Not redundant
		} else {
			// fmt.Printf("Encountered new state: %s\n", state_name)
			/*
				states[state_name] = State{
					Name:  state_name,
					Polls: pollsSlice,
				}
			*/
		}
	}

	// Convert statesData map to a slice for returning
	var states_slice []State
	for _, state := range states {
		states_slice = append(states_slice, state)
	}

	return states_slice, nil
}

/* Sampling helper functions */
func getNormalCDF(x float64, mean float64, std float64) float64 {
	erf_term := (x - mean) / (std * math.Sqrt2)
	return (1 + math.Erf(erf_term)) / 2
}

func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
	return 1 - getNormalCDF(x, mean, std)
}

func getChanceCandidateWinsFromPoll(candidate_p float64, poll_sample_size float64) float64 {
	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
	return getProbabilityAboveX(0.5, candidate_p, std)
}

/* Sample state by state */
func sampleFromState(state State) VotesForEachParty {
	if dev {
		fmt.Printf("\n\nState: %s", state.Name)
		fmt.Printf("\n\tVotes: %d", state.Votes)
		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
		// fmt.Printf("\n\tPolls: %s", state.Polls)
	}
	switch state.Name {
	case "Nebraska":
		/*
			2000: R
			2004: R
			2008: Split, 1 D, 4 R
			2012: R
			2016: R
			2020: Split, 1 D, 4 R
		*/
		p_split := 2.0 / 6.0
		if r.Float64() < p_split {
			return VotesForEachParty{Democrats: 1, Republicans: 4}
		} else {
			return VotesForEachParty{Democrats: 0, Republicans: 5}
		}
	case "Maine":
		/*
			2000: D
			2004: D
			2008: D
			2012: D
			2016: Split: 3 D, 1 R
			2020: Split, 3 D, 1 R
		*/
		p_split := 2.0 / 6.0
		if r.Float64() < p_split {
			return VotesForEachParty{Democrats: 3, Republicans: 1}
		} else {
			return VotesForEachParty{Democrats: 1, Republicans: 0}
		}
	default:
		{
			/* Just considering the base rate for the state */
			p_baserate_republican := 0.0
			for _, party := range state.PresidentialElectoralHistory {
				if party == "R" {
					p_baserate_republican++
				}
			}
			p_baserate_republican = p_baserate_republican / float64(len(state.PresidentialElectoralHistory))
			p_republican := p_baserate_republican // if no polls

			/* Considering polls */
			var recent_polls []Poll
			for _, poll := range state.Polls {
				if poll.Date.After(time.Now().AddDate(0, 0, -30)) {
					recent_polls = append(recent_polls, poll)
				}
			}

			var recent_biden_trump_polls []Poll
			for _, recent_poll := range recent_polls {
				has_biden := false
				has_trump := false
				for candidate_name, _ := range recent_poll.PollResults {
					if candidate_name == "Biden" {
						has_biden = true
					} else if candidate_name == "Trump" {
						has_trump = true
					}
				}
				if has_biden && has_trump {
					recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll)
				}
			}

			num_biden_votes := 0.0
			num_trump_votes := 0.0
			for _, recent_biden_trump_poll := range recent_biden_trump_polls {
				biden_share := 0.0
				trump_share := 0.0
				for candidate_name, candidate_percentage := range recent_biden_trump_poll.PollResults {
					if candidate_name == "Biden" {
						biden_share = candidate_percentage / 100
					} else if candidate_name == "Trump" {
						trump_share = candidate_percentage / 100
					}
				}
				sample_size := float64(recent_biden_trump_poll.SampleSize)
				poll_biden_votes := biden_share * sample_size
				poll_trump_votes := trump_share * sample_size
				joint_trump_biden_sample_size := poll_biden_votes + poll_trump_votes
				normalized_trump_share := trump_share / (trump_share + biden_share)
				normalized_biden_share := biden_share / (trump_share + biden_share)
				std_poll := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)

				p_trump_more_votes := getProbabilityAboveX(0.5, normalized_trump_share, std_poll)
				if dev {
					fmt.Printf("\n\tPoll: %+v", recent_biden_trump_poll)
					fmt.Printf("\n\t\tPoll says chance of R win: %f", p_trump_more_votes)
				}

				// Update general tally
				num_biden_votes += poll_biden_votes
				num_trump_votes += poll_trump_votes
			}
			total_sample_size := num_biden_votes + num_trump_votes
			if total_sample_size != 0.0 {
				aggregate_trump_share := num_trump_votes / (num_trump_votes + num_biden_votes)
				aggregate_biden_share := num_biden_votes / (num_trump_votes + num_biden_votes)

				std_all_polls := math.Sqrt((aggregate_trump_share * aggregate_biden_share) / total_sample_size)

				p_republican_according_to_polls := getProbabilityAboveX(0.5, aggregate_trump_share, std_all_polls)

				if dev {
					fmt.Printf("\n\tAggregating all polls naïvely says chance of R win: %f", p_republican_according_to_polls)
				}
				p_republican = 0.75*p_republican_according_to_polls + 0.25*p_baserate_republican

			}

			if dev {
				fmt.Printf("\n\tHistorical base rate: %f", p_baserate_republican)
			}
			if r.Float64() < p_republican {
				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
			} else {
				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
			}
		}
	}
}

func simulateElection(states []State) int {

	republican_seats := 0
	for _, state := range states {
		election_sample := sampleFromState(state)
		republican_seats += election_sample.Republicans
	}

	if dev {
		fmt.Printf("\n\n(%d) ", republican_seats)
	}
	if republican_seats >= 270 {
		return 1
	} else {
		return 0
	}

}

func main() {
	states, err := readStates()
	if err != nil {
		fmt.Println("Error:", err)
		return
	}

	n_sims := 10

	p_republicans := 0.0
	for i := 0; i < n_sims; i++ {
		result := simulateElection(states)
		if dev {
			fmt.Printf("Election result: %d", result)
		}
		if result == 1 {
			p_republicans++
		}
	}
	p_republicans = p_republicans / float64(n_sims)
	fmt.Printf("\n\n\n%% republicans: %f\n", p_republicans)

}