move glue code function to the end

This commit is contained in:
NunoSempere 2024-04-14 21:42:38 -04:00
parent 212f72f596
commit cbda7b1e5c

562
main.go
View File

@ -34,9 +34,283 @@ type Poll struct {
/* Globals */
var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
var dev = false
/* Sampling helper functions */
func getNormalCDF(x float64, mean float64, std float64) float64 {
erf_term := (x - mean) / (std * math.Sqrt2)
return (1 + math.Erf(erf_term)) / 2
}
func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
return 1 - getNormalCDF(x, mean, std)
}
func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
return getProbabilityAboveX(0.5, candidate_p, std)
}
func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
trump_share := trump_percentage / 100.0
normalized_trump_share := trump_share / (trump_share + biden_share)
normalized_biden_share := biden_share / (trump_share + biden_share)
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
if pretty_print {
fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
}
return p_republican_win
}
func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
// Uncertainty from the state
n_republican_win := 0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
n_republican_win++
}
}
// Get the uncertainty from the poll
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
trump_share := trump_percentage / 100.0
normalized_trump_share := trump_share / (trump_share + biden_share)
normalized_biden_share := biden_share / (trump_share + biden_share)
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
/* Inject additional uncertainty */
/*
Possible factors:
- National drift between now and the election (biggest one)
- States more uncertain than the national average
- Idiosyncratic factors
- Polls not being as good as gallup
- Increased polarization
Also note that the polls already have some error already
*/
std_additional_uncertainty := 5.0 / 100.0
if n_republican_win == 0 || n_republican_win == 6 {
// if solid states for the last 6 elections
std_additional_uncertainty = std_additional_uncertainty / 3.0
if pretty_print {
fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
fmt.Printf("\n\t\t=> Reducing additional uncertainty")
}
}
std_error := std_error_poll_mean + std_additional_uncertainty
// std_error := std_error_poll_mean + 0.065
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
if pretty_print {
fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
}
return p_republican_win
}
/* Print state by state data */
func printStates(states []State) {
for _, state := range states {
fmt.Printf("\n\nState: %s", state.Name)
fmt.Printf("\n\tVotes: %d", state.Votes)
fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
p_baserate_republican_win := 0.0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
p_baserate_republican_win++
}
}
fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
// Individual poll
for _, poll := range state.Polls {
fmt.Printf("\n\tPoll: %+v", poll)
_ = getChanceRepublicanWinFromPoll(poll, true)
_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
}
// Aggregate poll
num_biden_votes := 0.0
num_trump_votes := 0.0
for _, poll := range state.Polls {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
}
}
}
/* Sample state by state */
func sampleFromState(state State) VotesForEachParty {
switch state.Name {
case "Nebraska":
/*
2000: R
2004: R
2008: Split, 1 D, 4 R
2012: R
2016: R
2020: Split, 1 D, 4 R
*/
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 1, Republicans: 4}
} else {
return VotesForEachParty{Democrats: 0, Republicans: 5}
}
case "Maine":
/*
2000: D
2004: D
2008: D
2012: D
2016: Split: 3 D, 1 R
2020: Split, 3 D, 1 R
*/
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 3, Republicans: 1}
} else {
return VotesForEachParty{Democrats: 1, Republicans: 0}
}
default:
{
/* Consider the base rate for the state */
p_baserate_republican_win := 0.0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
p_baserate_republican_win++
}
}
p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
p_republican_win := p_baserate_republican_win // if no polls
/* Consider polls */
num_biden_votes := 0.0
num_trump_votes := 0.0
for _, poll := range state.Polls {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
// weight_polls := 0.75
// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
p_republican_win = p_republican_win_aggregate_polls
}
if r.Float64() < p_republican_win {
return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
} else {
return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
}
}
}
}
/* Simulate election */
func simulateElection(states []State) int {
republican_seats := 0
for _, state := range states {
election_sample := sampleFromState(state)
republican_seats += election_sample.Republicans
}
return republican_seats
}
/* Histogram */
func barString(n int) string {
str := ""
for i := 0; i < n; i++ {
str += "█"
}
return str
}
func printElectoralCollegeHistogram(samples []int) {
histogram := [538]int{}
for _, sample := range samples {
histogram[sample]++
}
max_count := 0
for _, count := range histogram {
if count > max_count {
max_count = count
}
}
cp := 0.0
for i, count := range histogram {
bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
p := float64(count) / float64(len(samples)) * 100
cp += p
if i > 130 && i < 400 {
fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
} else if p >= 0.01 {
fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
}
}
}
/* Load data from csvs */
// Glue code
func readStates() ([]State, error) {
var states map[string]State = make(map[string]State)
@ -205,294 +479,9 @@ func readStates() ([]State, error) {
for _, state := range states {
states_slice = append(states_slice, state)
}
return states_slice, nil
}
/* Sampling helper functions */
func getNormalCDF(x float64, mean float64, std float64) float64 {
erf_term := (x - mean) / (std * math.Sqrt2)
return (1 + math.Erf(erf_term)) / 2
}
func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
return 1 - getNormalCDF(x, mean, std)
}
func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
return getProbabilityAboveX(0.5, candidate_p, std)
}
func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
trump_share := trump_percentage / 100.0
normalized_trump_share := trump_share / (trump_share + biden_share)
normalized_biden_share := biden_share / (trump_share + biden_share)
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
if pretty_print {
fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
}
return p_republican_win
}
func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
// Uncertainty from the state
n_republican_win := 0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
n_republican_win++
}
}
// Get the uncertainty from the poll
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
trump_share := trump_percentage / 100.0
normalized_trump_share := trump_share / (trump_share + biden_share)
normalized_biden_share := biden_share / (trump_share + biden_share)
joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
/* Inject additional uncertainty */
/*
Possible factors:
- National drift between now and the election (biggest one)
- States more uncertain than the national average
- Idiosyncratic factors
- Polls not being as good as gallup
- Increased polarization
Also note that the polls already have some error already
*/
std_additional_uncertainty := 5.0 / 100.0
if n_republican_win == 0 || n_republican_win == 6 {
// if solid states for the last 6 elections
std_additional_uncertainty = std_additional_uncertainty / 3.0
if pretty_print {
fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
fmt.Printf("\n\t\t=> Reducing additional uncertainty")
}
}
std_error := std_error_poll_mean + std_additional_uncertainty
// std_error := std_error_poll_mean + 0.065
p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
if pretty_print {
fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
}
return p_republican_win
}
/* Print state by state data */
func printStates(states []State) {
for _, state := range states {
fmt.Printf("\n\nState: %s", state.Name)
fmt.Printf("\n\tVotes: %d", state.Votes)
fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
p_baserate_republican_win := 0.0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
p_baserate_republican_win++
}
}
fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
// Individual poll
for _, poll := range state.Polls {
fmt.Printf("\n\tPoll: %+v", poll)
_ = getChanceRepublicanWinFromPoll(poll, true)
_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
}
// Aggregate poll
num_biden_votes := 0.0
num_trump_votes := 0.0
for _, poll := range state.Polls {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
}
}
}
/* Sample state by state */
func sampleFromState(state State) VotesForEachParty {
switch state.Name {
case "Nebraska":
/*
2000: R
2004: R
2008: Split, 1 D, 4 R
2012: R
2016: R
2020: Split, 1 D, 4 R
*/
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 1, Republicans: 4}
} else {
return VotesForEachParty{Democrats: 0, Republicans: 5}
}
case "Maine":
/*
2000: D
2004: D
2008: D
2012: D
2016: Split: 3 D, 1 R
2020: Split, 3 D, 1 R
*/
p_split := 2.0 / 6.0
if r.Float64() < p_split {
return VotesForEachParty{Democrats: 3, Republicans: 1}
} else {
return VotesForEachParty{Democrats: 1, Republicans: 0}
}
default:
{
/* Consider the base rate for the state */
p_baserate_republican_win := 0.0
for _, party := range state.PresidentialElectoralHistory {
if party == "R" {
p_baserate_republican_win++
}
}
p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
p_republican_win := p_baserate_republican_win // if no polls
/* Consider polls */
num_biden_votes := 0.0
num_trump_votes := 0.0
for _, poll := range state.Polls {
biden_percentage, biden_exists := poll.PollResults["Biden"]
trump_percentage, trump_exists := poll.PollResults["Trump"]
if !biden_exists || !trump_exists {
panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
}
num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
}
aggregate_sample_size := num_biden_votes + num_trump_votes
if aggregate_sample_size != 0.0 {
var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
// weight_polls := 0.75
// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
p_republican_win = p_republican_win_aggregate_polls
}
if r.Float64() < p_republican_win {
return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
} else {
return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
}
}
}
}
/* Simulate election */
func simulateElection(states []State) int {
republican_seats := 0
for _, state := range states {
election_sample := sampleFromState(state)
republican_seats += election_sample.Republicans
}
return republican_seats
}
/* Histogram */
func barString(n int) string {
str := ""
for i := 0; i < n; i++ {
str += "█"
}
return str
}
func printElectoralCollegeHistogram(samples []int) {
histogram := [538]int{}
for _, sample := range samples {
histogram[sample]++
}
max_count := 0
for _, count := range histogram {
if count > max_count {
max_count = count
}
}
cp := 0.0
for i, count := range histogram {
bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
p := float64(count) / float64(len(samples)) * 100
cp += p
if i > 130 && i < 400 {
fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
} else if p >= 0.01 {
fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
}
}
}
func main() {
states, err := readStates()
if err != nil {
@ -518,5 +507,4 @@ func main() {
p_republicans = p_republicans / float64(n_sims)
fmt.Printf("\n%% republicans: %f\n", p_republicans)
}