513 lines
16 KiB
Go
513 lines
16 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"fmt"
|
|
"math"
|
|
rand "math/rand/v2"
|
|
"os"
|
|
"strconv"
|
|
"time"
|
|
// "strings"
|
|
)
|
|
|
|
/* Structs */
|
|
type State struct {
|
|
Name string
|
|
Votes int
|
|
PresidentialElectoralHistory map[string]string
|
|
Polls []Poll
|
|
}
|
|
|
|
type VotesForEachParty struct {
|
|
Democrats int
|
|
Republicans int
|
|
}
|
|
|
|
type Poll struct {
|
|
PollId string
|
|
SampleSize int
|
|
PollResults map[string]float64
|
|
Date time.Time
|
|
Partisan string
|
|
}
|
|
|
|
/* Globals */
|
|
var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
|
|
var dem_nominee_name = "Biden"
|
|
var rep_nominee_name = "Trump"
|
|
|
|
/* Sampling helper functions */
|
|
func getNormalCDF(x float64, mean float64, std float64) float64 {
|
|
erf_term := (x - mean) / (std * math.Sqrt2)
|
|
return (1 + math.Erf(erf_term)) / 2
|
|
}
|
|
|
|
func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
|
|
return 1 - getNormalCDF(x, mean, std)
|
|
}
|
|
|
|
func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
|
|
std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
|
|
return getProbabilityAboveX(0.5, candidate_p, std)
|
|
}
|
|
|
|
func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
|
|
biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
|
|
trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
|
|
if !biden_exists || !trump_exists {
|
|
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
|
|
}
|
|
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
|
|
trump_share := trump_percentage / 100.0
|
|
|
|
normalized_trump_share := trump_share / (trump_share + biden_share)
|
|
normalized_biden_share := biden_share / (trump_share + biden_share)
|
|
|
|
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
|
|
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
|
|
|
|
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
|
|
|
|
if pretty_print {
|
|
fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
|
|
fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
|
|
fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
|
|
fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
|
|
}
|
|
return p_republican_win
|
|
}
|
|
|
|
func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
|
|
// Uncertainty from the state
|
|
n_republican_win := 0
|
|
for _, party := range state.PresidentialElectoralHistory {
|
|
if party == "R" {
|
|
n_republican_win++
|
|
}
|
|
}
|
|
|
|
// Get the uncertainty from the poll
|
|
biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
|
|
trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
|
|
if !biden_exists || !trump_exists {
|
|
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
|
|
}
|
|
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
|
|
trump_share := trump_percentage / 100.0
|
|
|
|
normalized_trump_share := trump_share / (trump_share + biden_share)
|
|
normalized_biden_share := biden_share / (trump_share + biden_share)
|
|
|
|
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
|
|
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
|
|
|
|
/* Inject additional uncertainty */
|
|
/*
|
|
Possible factors:
|
|
- National drift between now and the election (biggest one)
|
|
- States more uncertain than the national average
|
|
- Idiosyncratic factors
|
|
- Polls not being as good as gallup
|
|
- Increased polarization
|
|
Also note that the polls already have some error already
|
|
*/
|
|
std_additional_uncertainty := 5.0 / 100.0
|
|
|
|
if n_republican_win == 0 || n_republican_win == 6 {
|
|
// if solid states for the last 6 elections
|
|
std_additional_uncertainty = std_additional_uncertainty / 3.0
|
|
|
|
if pretty_print {
|
|
fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
|
|
fmt.Printf("\n\t\t=> Reducing additional uncertainty")
|
|
}
|
|
}
|
|
|
|
std_error := std_error_poll_mean + std_additional_uncertainty
|
|
|
|
// std_error := std_error_poll_mean + 0.065
|
|
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
|
|
|
|
if pretty_print {
|
|
fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
|
|
fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
|
|
}
|
|
return p_republican_win
|
|
}
|
|
|
|
/* Print state by state data */
|
|
func printStates(states []State) {
|
|
for _, state := range states {
|
|
fmt.Printf("\n\nState: %s", state.Name)
|
|
fmt.Printf("\n\tVotes: %d", state.Votes)
|
|
fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
|
|
|
|
p_baserate_republican_win := 0.0
|
|
for _, party := range state.PresidentialElectoralHistory {
|
|
if party == "R" {
|
|
p_baserate_republican_win++
|
|
}
|
|
}
|
|
fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
|
|
|
|
// Individual poll
|
|
for _, poll := range state.Polls {
|
|
fmt.Printf("\n\tPoll: %+v", poll)
|
|
_ = getChanceRepublicanWinFromPoll(poll, true)
|
|
_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
|
|
}
|
|
|
|
// Aggregate poll
|
|
num_biden_votes := 0.0
|
|
num_trump_votes := 0.0
|
|
for _, poll := range state.Polls {
|
|
biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
|
|
trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
|
|
if !biden_exists || !trump_exists {
|
|
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
|
|
}
|
|
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
|
|
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
|
|
}
|
|
aggregate_sample_size := num_biden_votes + num_trump_votes
|
|
if aggregate_sample_size != 0.0 {
|
|
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
|
|
aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size
|
|
aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size
|
|
|
|
fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
|
|
_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
|
|
_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Sample state by state */
|
|
func sampleFromState(state State) VotesForEachParty {
|
|
switch state.Name {
|
|
case "Nebraska":
|
|
/*
|
|
2000: R
|
|
2004: R
|
|
2008: Split, 1 D, 4 R
|
|
2012: R
|
|
2016: R
|
|
2020: Split, 1 D, 4 R
|
|
*/
|
|
p_split := 2.0 / 6.0
|
|
if r.Float64() < p_split {
|
|
return VotesForEachParty{Democrats: 1, Republicans: 4}
|
|
} else {
|
|
return VotesForEachParty{Democrats: 0, Republicans: 5}
|
|
}
|
|
case "Maine":
|
|
/*
|
|
2000: D
|
|
2004: D
|
|
2008: D
|
|
2012: D
|
|
2016: Split: 3 D, 1 R
|
|
2020: Split, 3 D, 1 R
|
|
*/
|
|
p_split := 2.0 / 6.0
|
|
if r.Float64() < p_split {
|
|
return VotesForEachParty{Democrats: 3, Republicans: 1}
|
|
} else {
|
|
return VotesForEachParty{Democrats: 1, Republicans: 0}
|
|
}
|
|
default:
|
|
{
|
|
/* Consider the base rate for the state */
|
|
p_baserate_republican_win := 0.0
|
|
for _, party := range state.PresidentialElectoralHistory {
|
|
if party == "R" {
|
|
p_baserate_republican_win++
|
|
}
|
|
}
|
|
p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
|
|
p_republican_win := p_baserate_republican_win // if no polls
|
|
|
|
/* Consider polls */
|
|
num_biden_votes := 0.0
|
|
num_trump_votes := 0.0
|
|
for _, poll := range state.Polls {
|
|
biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
|
|
trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
|
|
if !biden_exists || !trump_exists {
|
|
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
|
|
}
|
|
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
|
|
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
|
|
}
|
|
|
|
aggregate_sample_size := num_biden_votes + num_trump_votes
|
|
if aggregate_sample_size != 0.0 {
|
|
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
|
|
aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size
|
|
aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size
|
|
|
|
p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
|
|
// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
|
|
|
|
weight_polls := 0.75
|
|
p_republican_win = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
|
|
// p_republican_win = p_republican_win_aggregate_polls
|
|
}
|
|
|
|
if r.Float64() < p_republican_win {
|
|
return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
|
|
} else {
|
|
return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Simulate election */
|
|
func simulateElection(states []State) int {
|
|
republican_seats := 0
|
|
for _, state := range states {
|
|
election_sample := sampleFromState(state)
|
|
republican_seats += election_sample.Republicans
|
|
}
|
|
return republican_seats
|
|
}
|
|
|
|
/* Histogram */
|
|
func barString(n int) string {
|
|
str := ""
|
|
for i := 0; i < n; i++ {
|
|
str += "█"
|
|
}
|
|
return str
|
|
}
|
|
|
|
func printElectoralCollegeHistogram(samples []int) {
|
|
histogram := [538]int{}
|
|
for _, sample := range samples {
|
|
histogram[sample]++
|
|
}
|
|
|
|
max_count := 0
|
|
for _, count := range histogram {
|
|
if count > max_count {
|
|
max_count = count
|
|
}
|
|
}
|
|
|
|
cp := 0.0
|
|
for i, count := range histogram {
|
|
bar_length := (count * 100) / max_count // Assuming max_count bar length is 50 characters. 75.
|
|
p := float64(count) / float64(len(samples)) * 100
|
|
cp += p
|
|
|
|
if i > 130 && i < 400 {
|
|
fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
|
|
} else if p >= 0.01 {
|
|
fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Load data from csvs */
|
|
// Glue code
|
|
func readStates() ([]State, error) {
|
|
var states map[string]State = make(map[string]State)
|
|
|
|
/* Electoral college votes for the 2024 election*/
|
|
votes_file, err := os.Open("data/num-electors/electoral-college-votes.csv")
|
|
// votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening the votes file: %v", err)
|
|
}
|
|
defer votes_file.Close()
|
|
|
|
votes_reader := csv.NewReader(votes_file)
|
|
if _, err := votes_reader.Read(); err != nil { // Skip header
|
|
return nil, fmt.Errorf("error reading votes header: %v", err)
|
|
}
|
|
for {
|
|
csv_record, err := votes_reader.Read()
|
|
if err != nil {
|
|
break // EOF or an error
|
|
}
|
|
votes, err := strconv.Atoi(csv_record[1])
|
|
if err != nil {
|
|
continue // Error in converting votes, skip this record
|
|
}
|
|
state := csv_record[0]
|
|
if _, exists := states[state]; !exists {
|
|
states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)}
|
|
}
|
|
}
|
|
|
|
/* Election results */
|
|
var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"}
|
|
for _, year := range years {
|
|
electoral_history_filename := fmt.Sprintf("data/electoral-history/%s.csv", year)
|
|
electoral_history_file, err := os.Open(electoral_history_filename)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening the electoral_history file for %s: %v", year, err)
|
|
}
|
|
electoral_history_reader := csv.NewReader(electoral_history_file)
|
|
if _, err := electoral_history_reader.Read(); err != nil { // Skip header
|
|
return nil, fmt.Errorf("error reading electoral_history header for %s: %v", year, err)
|
|
}
|
|
for {
|
|
record, err := electoral_history_reader.Read()
|
|
if err != nil {
|
|
break // EOF or an error
|
|
}
|
|
state, party := record[0], record[1]
|
|
data, exists := states[state]
|
|
if !exists {
|
|
continue // State not found in votes map, skip
|
|
}
|
|
// Update the party winning in the specific year
|
|
data.PresidentialElectoralHistory[year] = party
|
|
states[state] = data
|
|
}
|
|
|
|
electoral_history_file.Close()
|
|
}
|
|
|
|
/* Read polls */
|
|
polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening the polls file: %v", err)
|
|
}
|
|
defer polls_file.Close()
|
|
|
|
// Using a temporary map to group poll results by state and poll ID
|
|
state_polls_map := make(map[string]map[string]Poll)
|
|
|
|
polls_reader := csv.NewReader(polls_file)
|
|
_, err = polls_reader.Read() // Skip the header
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error reading polls header: %v", err)
|
|
}
|
|
|
|
for {
|
|
record, err := polls_reader.Read()
|
|
if err != nil {
|
|
break // EOF or an error
|
|
}
|
|
|
|
poll_id := record[0]
|
|
state_name := record[12]
|
|
end_date := record[14]
|
|
partisan := record[32]
|
|
candidate_name := record[44]
|
|
|
|
date_layout := "1/2/06"
|
|
parsed_date, err := time.Parse(date_layout, end_date)
|
|
if err != nil {
|
|
fmt.Println("Error parsing date: ", err)
|
|
}
|
|
|
|
sample_size, err := strconv.Atoi(record[22])
|
|
if err != nil {
|
|
continue // If error, skip this record
|
|
}
|
|
|
|
percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column
|
|
if err != nil {
|
|
fmt.Printf("Error parsing percentage")
|
|
continue // If error, skip this record
|
|
}
|
|
|
|
if _, exists := state_polls_map[state_name]; !exists {
|
|
state_polls_map[state_name] = make(map[string]Poll)
|
|
}
|
|
|
|
poll, exists := state_polls_map[state_name][poll_id]
|
|
if !exists {
|
|
poll = Poll{
|
|
PollId: poll_id,
|
|
SampleSize: sample_size,
|
|
PollResults: make(map[string]float64),
|
|
Date: parsed_date,
|
|
Partisan: partisan,
|
|
}
|
|
}
|
|
poll.PollResults[candidate_name] = percentage
|
|
state_polls_map[state_name][poll_id] = poll
|
|
}
|
|
|
|
// Add the aggregated poll data to the respective states
|
|
for state_name, polls := range state_polls_map {
|
|
|
|
// Filter polls by recency and by having both Biden and Trump
|
|
var recent_polls []Poll
|
|
for _, poll := range polls {
|
|
if poll.Date.After(time.Now().AddDate(0, 0, -30)) {
|
|
recent_polls = append(recent_polls, poll)
|
|
}
|
|
}
|
|
var recent_biden_trump_polls []Poll
|
|
for _, recent_poll := range recent_polls {
|
|
has_biden := false
|
|
has_trump := false
|
|
for candidate_name, _ := range recent_poll.PollResults {
|
|
if candidate_name == dem_nominee_name {
|
|
has_biden = true
|
|
} else if candidate_name == rep_nominee_name {
|
|
has_trump = true
|
|
}
|
|
}
|
|
if has_biden && has_trump {
|
|
recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll)
|
|
}
|
|
}
|
|
|
|
if state, exists := states[state_name]; exists {
|
|
state.Polls = recent_biden_trump_polls
|
|
states[state_name] = state // Not redundant
|
|
} else {
|
|
// fmt.Printf("Encountered new state: %s\n", state_name)
|
|
/*
|
|
states[state_name] = State{
|
|
Name: state_name,
|
|
Polls: polls_slice,
|
|
}
|
|
*/
|
|
}
|
|
}
|
|
|
|
// Convert statesData map to a slice for returning
|
|
var states_slice []State
|
|
for _, state := range states {
|
|
states_slice = append(states_slice, state)
|
|
}
|
|
return states_slice, nil
|
|
}
|
|
|
|
func main() {
|
|
states, err := readStates()
|
|
if err != nil {
|
|
fmt.Println("Error:", err)
|
|
return
|
|
}
|
|
|
|
n_sims := 1_000_000
|
|
|
|
printStates(states)
|
|
fmt.Printf("\n\n")
|
|
|
|
p_republicans := 0.0
|
|
results := make([]int, n_sims)
|
|
for i := 0; i < n_sims; i++ {
|
|
republican_seats := simulateElection(states)
|
|
results[i] = republican_seats
|
|
if republican_seats >= 270 {
|
|
p_republicans++
|
|
}
|
|
}
|
|
printElectoralCollegeHistogram(results)
|
|
|
|
p_republicans = p_republicans / float64(n_sims)
|
|
fmt.Printf("\n%% republicans: %f\n", p_republicans)
|
|
}
|