2024-election-modelling/main.go

420 lines
12 KiB
Go
Raw Normal View History

2024-04-13 14:22:28 +00:00
package main
import (
"encoding/csv"
2024-04-13 14:22:28 +00:00
"fmt"
"math"
2024-04-13 19:26:18 +00:00
rand "math/rand/v2"
"os"
2024-04-13 15:19:35 +00:00
"strconv"
2024-04-14 13:58:18 +00:00
"time"
2024-04-13 19:26:18 +00:00
// "strings"
2024-04-13 14:22:28 +00:00
)
2024-04-13 19:26:18 +00:00
/* Structs */
2024-04-13 18:44:00 +00:00
type State struct {
2024-04-14 13:58:18 +00:00
Name string
Votes int
PresidentialElectoralHistory map[string]string
Polls []Poll
}
2024-04-13 19:26:18 +00:00
type VotesForEachParty struct {
Democrats int
Republicans int
}
type Poll struct {
PollId string
SampleSize int
PollResults map[string]float64
2024-04-14 13:58:18 +00:00
Date time.Time
2024-04-14 16:03:27 +00:00
Partisan string
}
2024-04-13 19:26:18 +00:00
/* Globals */
2024-04-14 00:37:55 +00:00
var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
2024-04-14 18:06:02 +00:00
var dev = false
2024-04-13 19:26:18 +00:00
/* Load data from csvs */
2024-04-13 18:44:00 +00:00
func readStates() ([]State, error) {
var states map[string]State = make(map[string]State)
2024-04-13 15:19:35 +00:00
2024-04-13 18:44:00 +00:00
/* Electoral college votes for the 2024 election*/
votes_file, err := os.Open("data/electoral-college-votes.csv")
// votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv")
if err != nil {
2024-04-13 16:42:13 +00:00
return nil, fmt.Errorf("error opening the votes file: %v", err)
}
2024-04-13 18:55:57 +00:00
defer votes_file.Close()
votes_reader := csv.NewReader(votes_file)
if _, err := votes_reader.Read(); err != nil { // Skip header
2024-04-13 16:42:13 +00:00
return nil, fmt.Errorf("error reading votes header: %v", err)
2024-04-13 15:19:35 +00:00
}
for {
2024-04-13 18:55:57 +00:00
csv_record, err := votes_reader.Read()
if err != nil {
2024-04-13 16:42:13 +00:00
break // EOF or an error
}
2024-04-13 18:55:57 +00:00
votes, err := strconv.Atoi(csv_record[1])
2024-04-13 15:19:35 +00:00
if err != nil {
2024-04-13 16:42:13 +00:00
continue // Error in converting votes, skip this record
2024-04-13 15:19:35 +00:00
}
2024-04-13 18:55:57 +00:00
state := csv_record[0]
2024-04-13 18:44:00 +00:00
if _, exists := states[state]; !exists {
2024-04-14 13:58:18 +00:00
states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)}
}
}
2024-04-13 16:42:13 +00:00
/* Election results */
var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"}
2024-04-13 16:42:13 +00:00
for _, year := range years {
2024-04-13 18:55:57 +00:00
results_filename := fmt.Sprintf("data/results/%s.csv", year)
results_file, err := os.Open(results_filename)
2024-04-13 16:35:52 +00:00
if err != nil {
2024-04-13 16:42:13 +00:00
return nil, fmt.Errorf("error opening the results file for %s: %v", year, err)
}
2024-04-13 18:55:57 +00:00
resultsReader := csv.NewReader(results_file)
2024-04-13 16:42:13 +00:00
if _, err := resultsReader.Read(); err != nil { // Skip header
return nil, fmt.Errorf("error reading results header for %s: %v", year, err)
}
for {
record, err := resultsReader.Read()
if err != nil {
break // EOF or an error
}
state, party := record[0], record[1]
2024-04-13 18:44:00 +00:00
data, exists := states[state]
2024-04-13 16:42:13 +00:00
if !exists {
continue // State not found in votes map, skip
}
// Update the party winning in the specific year
2024-04-14 13:58:18 +00:00
data.PresidentialElectoralHistory[year] = party
2024-04-13 18:44:00 +00:00
states[state] = data
2024-04-13 16:35:52 +00:00
}
2024-04-14 00:37:55 +00:00
results_file.Close()
2024-04-13 16:35:52 +00:00
}
/* Read polls */
polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path
if err != nil {
return nil, fmt.Errorf("error opening the polls file: %v", err)
}
defer polls_file.Close()
// Using a temporary map to group poll results by state and poll ID
state_polls_map := make(map[string]map[string]Poll)
polls_reader := csv.NewReader(polls_file)
_, err = polls_reader.Read() // Skip the header
if err != nil {
return nil, fmt.Errorf("error reading polls header: %v", err)
}
for {
record, err := polls_reader.Read()
if err != nil {
break // EOF or an error
}
2024-04-14 13:58:18 +00:00
poll_id := record[0]
2024-04-14 13:58:18 +00:00
state_name := record[12]
end_date := record[14]
2024-04-14 16:03:27 +00:00
partisan := record[32]
candidate_name := record[44]
2024-04-14 13:58:18 +00:00
date_layout := "1/2/06"
parsed_date, err := time.Parse(date_layout, end_date)
if err != nil {
fmt.Println("Error parsing date: ", err)
}
sample_size, err := strconv.Atoi(record[22])
if err != nil {
continue // If error, skip this record
}
2024-04-14 13:58:18 +00:00
percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column
if err != nil {
fmt.Printf("Error parsing percentage")
continue // If error, skip this record
}
if _, exists := state_polls_map[state_name]; !exists {
state_polls_map[state_name] = make(map[string]Poll)
}
poll, exists := state_polls_map[state_name][poll_id]
if !exists {
poll = Poll{
PollId: poll_id,
2024-04-14 13:58:18 +00:00
SampleSize: sample_size,
PollResults: make(map[string]float64),
2024-04-14 13:58:18 +00:00
Date: parsed_date,
2024-04-14 16:03:27 +00:00
Partisan: partisan,
}
}
2024-04-14 13:58:18 +00:00
poll.PollResults[candidate_name] = percentage
state_polls_map[state_name][poll_id] = poll
}
// Add the aggregated poll data to the respective states
for state_name, polls := range state_polls_map {
2024-04-14 18:46:45 +00:00
// Filter polls by recency and by having both Biden and Trump
var recent_polls []Poll
for _, poll := range polls {
if poll.Date.After(time.Now().AddDate(0, 0, -30)) {
recent_polls = append(recent_polls, poll)
}
}
var recent_biden_trump_polls []Poll
for _, recent_poll := range recent_polls {
has_biden := false
has_trump := false
for candidate_name, _ := range recent_poll.PollResults {
if candidate_name == "Biden" {
has_biden = true
} else if candidate_name == "Trump" {
has_trump = true
}
}
if has_biden && has_trump {
recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll)
}
}
if state, exists := states[state_name]; exists {
state.Polls = recent_biden_trump_polls
states[state_name] = state // Not redundant
} else {
// fmt.Printf("Encountered new state: %s\n", state_name)
/*
states[state_name] = State{
Name: state_name,
Polls: polls_slice,
}
*/
}
}
2024-04-13 16:42:13 +00:00
// Convert statesData map to a slice for returning
var states_slice []State
for _, state := range states {
states_slice = append(states_slice, state)
2024-04-13 16:35:52 +00:00
}
return states_slice, nil
2024-04-13 16:35:52 +00:00
}
/* Sampling helper functions */
func getNormalCDF(x float64, mean float64, std float64) float64 {
erf_term := (x - mean) / (std * math.Sqrt2)
return (1 + math.Erf(erf_term)) / 2
}
func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
return 1 - getNormalCDF(x, mean, std)
}
2024-04-14 18:46:45 +00:00
func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
return getProbabilityAboveX(0.5, candidate_p, std)
}
2024-04-14 18:46:45 +00:00
func getChanceRepublicanWinFromPoll(poll Poll) float64 {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
trump_share := trump_percentage / 100.0
normalized_trump_share := trump_share / (trump_share + biden_share)
normalized_biden_share := biden_share / (trump_share + biden_share)
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
return p_republican_win
}
/* Print state by state data */
func printStates(states []State) {
for _, state := range states {
2024-04-14 16:03:27 +00:00
fmt.Printf("\n\nState: %s", state.Name)
fmt.Printf("\n\tVotes: %d", state.Votes)
fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
2024-04-14 18:46:45 +00:00
2024-04-14 19:07:21 +00:00
p_baserate_republican := 0.0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
p_baserate_republican++
}
}
fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican)
// Individual poll
2024-04-14 18:46:45 +00:00
for _, poll := range state.Polls {
p_republican_win_poll := getChanceRepublicanWinFromPoll(poll)
fmt.Printf("\n\tPoll: %+v", poll)
fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win_poll)
}
2024-04-14 19:07:21 +00:00
// Aggregate poll
num_biden_votes := 0.0
num_trump_votes := 0.0
for _, poll := range state.Polls {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
p_republican_win_aggregate_polls := getChanceRepublicanWinFromPoll(aggregate_poll)
fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
fmt.Printf("\n\t\tAggregate Poll says chance of R win: %f", p_republican_win_aggregate_polls)
}
2024-04-14 16:03:27 +00:00
}
2024-04-14 18:46:45 +00:00
}
/* Sample state by state */
func sampleFromState(state State) VotesForEachParty {
2024-04-13 18:44:00 +00:00
switch state.Name {
case "Nebraska":
2024-04-14 14:09:46 +00:00
/*
2000: R
2004: R
2008: Split, 1 D, 4 R
2012: R
2016: R
2020: Split, 1 D, 4 R
*/
2024-04-14 00:37:55 +00:00
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 1, Republicans: 4}
} else {
return VotesForEachParty{Democrats: 0, Republicans: 5}
}
2024-04-13 18:44:00 +00:00
case "Maine":
2024-04-14 14:09:46 +00:00
/*
2000: D
2004: D
2008: D
2012: D
2016: Split: 3 D, 1 R
2020: Split, 3 D, 1 R
*/
2024-04-14 00:37:55 +00:00
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 3, Republicans: 1}
} else {
return VotesForEachParty{Democrats: 1, Republicans: 0}
}
2024-04-13 18:44:00 +00:00
default:
2024-04-13 19:26:18 +00:00
{
/* Consider the base rate for the state */
2024-04-14 16:03:27 +00:00
p_baserate_republican := 0.0
2024-04-14 13:58:18 +00:00
for _, party := range state.PresidentialElectoralHistory {
2024-04-13 19:26:18 +00:00
if party == "R" {
2024-04-14 16:03:27 +00:00
p_baserate_republican++
2024-04-13 19:26:18 +00:00
}
}
2024-04-14 16:03:27 +00:00
p_baserate_republican = p_baserate_republican / float64(len(state.PresidentialElectoralHistory))
p_republican := p_baserate_republican // if no polls
2024-04-14 14:09:46 +00:00
/* Consider polls */
num_biden_votes := 0.0
num_trump_votes := 0.0
2024-04-14 18:46:45 +00:00
for _, poll := range state.Polls {
// p_republican_win_poll = getChanceRepublicanWinFromPoll(poll)
2024-04-14 18:46:45 +00:00
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
2024-04-14 18:46:45 +00:00
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
2024-04-14 19:07:21 +00:00
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
p_republican_win_aggregate_polls := getChanceRepublicanWinFromPoll(aggregate_poll)
2024-04-14 18:06:02 +00:00
weight_polls := 1.0
2024-04-14 19:07:21 +00:00
p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican
2024-04-14 15:35:24 +00:00
}
if r.Float64() < p_republican {
return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
} else {
return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
}
2024-04-13 19:26:18 +00:00
}
2024-04-13 18:44:00 +00:00
}
}
func simulateElection(states []State) int {
republican_seats := 0
for _, state := range states {
election_sample := sampleFromState(state)
republican_seats += election_sample.Republicans
}
2024-04-14 16:03:27 +00:00
if dev {
fmt.Printf("\n\n(%d) ", republican_seats)
}
2024-04-14 00:37:55 +00:00
if republican_seats >= 270 {
return 1
} else {
return 0
}
}
2024-04-13 14:22:28 +00:00
func main() {
2024-04-13 18:44:00 +00:00
states, err := readStates()
if err != nil {
fmt.Println("Error:", err)
return
}
n_sims := 10_000
2024-04-13 19:26:18 +00:00
2024-04-14 19:07:21 +00:00
printStates(states)
p_republicans := 0.0
2024-04-14 00:37:55 +00:00
for i := 0; i < n_sims; i++ {
result := simulateElection(states)
2024-04-14 16:03:27 +00:00
if dev {
fmt.Printf("Election result: %d", result)
}
if result == 1 {
p_republicans++
}
2024-04-13 16:35:52 +00:00
}
p_republicans = p_republicans / float64(n_sims)
2024-04-14 16:03:27 +00:00
fmt.Printf("\n\n\n%% republicans: %f\n", p_republicans)
2024-04-13 14:22:28 +00:00
}