From e25942b2c30eb1b4dee31049a7f9c42dc09b074a Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sat, 13 Apr 2024 22:14:59 -0400 Subject: [PATCH] tweak polls in main and restructure README --- README.md | 55 ++++++++++++++++++++--------- data/polls/fields.sh | 2 ++ main.go | 84 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 data/polls/fields.sh diff --git a/README.md b/README.md index b2d23c0..81e7ec3 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,42 @@ -Electoral votes/election +# Nuño's deads imple election simulator -Year | Republican electoral college votes -2000 | 271 -2004 | 286 -2008 | 173 -2012 | 206 -2016 | 304 -2020 | 232 +## First round: just consider the base rates. -Year | Democrat electoral college votes -2000 | 266 -2004 | 251 -2008 | 365 -2012 | 332 -2016 | 227 -2020 | 232 +- [x] Get past electoral college results since 2000 +- [x] Get number of electors for each state with the new census +- [x] Combine the two to get an initial base rates analysis -Ok, so because Obama really trounced the opposition, it won by more than the Republicans won, which means that the baserates approach didn't really work. +This initial approach gives a 25% to republicans winning in the 2024 election. Why is this? Well, consider the number of electoral college votes: + +| Year | Republican electoral college votes | +| ---- | --- | +| 2000 | 271 | +| 2004 | 286 | +| 2008 | 173 | +| 2012 | 206 | +| 2016 | 304 | +| 2020 | 232 | + +| Year | Democrat electoral college votes +| ---- | --- | +| 2000 | 266 | +| 2004 | 251 | +| 2008 | 365 | +| 2012 | 332 | +| 2016 | 227 | +| 2020 | 232 | + +When Democrats won with Obama, they won by a lot, whereas when Republicans won with Bush and Trump, they won by a smaller amount. Or, in other words, this initial approach *doesn't take into account that states are correlated*. + +Remedy: consider the conditional probabilities? But how? Or, relax assumptions using Laplace's law? + +## Second round: just consider polls + +- [ ] Download and format +- [ ] Read +- [ ] Consider what the standards error should be +- [ ] Aggregate polls? +- [ ] Exclude polls older than one month? +- [ ] ... + +## Third diff --git a/data/polls/fields.sh b/data/polls/fields.sh new file mode 100644 index 0000000..99803b2 --- /dev/null +++ b/data/polls/fields.sh @@ -0,0 +1,2 @@ + +cat president_polls_state.csv | head -n 2 | cut -d, -f13 diff --git a/main.go b/main.go index 3d47891..2b9334e 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ type State struct { Name string Votes int VictoriousPartyPerElection map[string]string + Polls []Poll } type VotesForEachParty struct { @@ -21,6 +22,12 @@ type VotesForEachParty struct { Republicans int } +type Poll struct { + PollId string + SampleSize int + PollResults map[string]float64 +} + // type src = *rand.Rand /* Globals */ @@ -87,6 +94,79 @@ func readStates() ([]State, error) { results_file.Close() } + /* Read polls */ + polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path + if err != nil { + return nil, fmt.Errorf("error opening the polls file: %v", err) + } + defer polls_file.Close() + + // Using a temporary map to group poll results by state and poll ID + tmp_polls := make(map[string]map[string]Poll) + + polls_reader := csv.NewReader(polls_file) + _, err = polls_reader.Read() // Skip the header + if err != nil { + return nil, fmt.Errorf("error reading polls header: %v", err) + } + + for { + record, err := polls_reader.Read() + if err != nil { + break // EOF or an error + } + state_name := record[12] + // fmt.Printf("State: %s", state_name) + poll_id := record[0] + + sampleSize, err := strconv.Atoi(record[22]) + if err != nil { + continue // If error, skip this record + } + + candidateName := record[44] + pct, err := strconv.ParseFloat(record[47], 64) // pct is in the 42nd column + if err != nil { + fmt.Printf("Error parsing percentage") + continue // If error, skip this record + } + + if _, exists := tmp_polls[state_name]; !exists { + tmp_polls[state_name] = make(map[string]Poll) + } + + poll, exists := tmp_polls[state_name][poll_id] + if !exists { + poll = Poll{ + PollId: poll_id, + SampleSize: sampleSize, + PollResults: make(map[string]float64), + } + } + poll.PollResults[candidateName] = pct + tmp_polls[state_name][poll_id] = poll + } + + // Add the aggregated poll data to the respective states + for state_name, polls := range tmp_polls { + var pollsSlice []Poll + for _, poll := range polls { + pollsSlice = append(pollsSlice, poll) + } + if state, exists := states[state_name]; exists { + state.Polls = pollsSlice + states[state_name] = state // Not redundant + } else { + // fmt.Printf("Encountered new state: %s\n", state_name) + /* + states[state_name] = State{ + Name: state_name, + Polls: pollsSlice, + } + */ + } + } + // Convert statesData map to a slice for returning var states_slice []State for _, state := range states { @@ -97,6 +177,7 @@ func readStates() ([]State, error) { } func sampleFromState(state State) VotesForEachParty { + fmt.Printf("%s\n\n", state) switch state.Name { case "Nebraska": // 2000: R @@ -146,6 +227,7 @@ func simulateElection(states []State) int { republican_seats := 0 for _, state := range states { + // fmt.Printf("%s\n", state) election_sample := sampleFromState(state) republican_seats += election_sample.Republicans /* @@ -176,7 +258,7 @@ func main() { return } - n_sims := 100_000 + n_sims := 1 p_republicans := 0.0 for i := 0; i < n_sims; i++ {