move glue code function to the end

2024-04-14 21:42:38 -04:00 · 2024-04-14 21:42:38 -04:00 · cbda7b1e5c
commit cbda7b1e5c
parent 212f72f596
1 changed files with 275 additions and 287 deletions
--- a/main.go
+++ b/main.go
@ -34,9 +34,283 @@ type Poll struct {
 /* Globals */
 var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
-var dev = false
+
 /* Sampling helper functions */
 func getNormalCDF(x float64, mean float64, std float64) float64 {
 	erf_term := (x - mean) / (std * math.Sqrt2)
 	return (1 + math.Erf(erf_term)) / 2
 }
 func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
 	return 1 - getNormalCDF(x, mean, std)
 }
 func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
 	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
 	return getProbabilityAboveX(0.5, candidate_p, std)
 }
 func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
 	biden_percentage, biden_exists := poll.PollResults["Biden"]
 	trump_percentage, trump_exists := poll.PollResults["Trump"]
 	if !biden_exists || !trump_exists {
 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 	}
 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
 	trump_share := trump_percentage / 100.0
 	normalized_trump_share := trump_share / (trump_share + biden_share)
 	normalized_biden_share := biden_share / (trump_share + biden_share)
 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
 	if pretty_print {
 		fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
 		fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
 		fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
 		fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
 	}
 	return p_republican_win
 }
 func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
 	// Uncertainty from the state
 	n_republican_win := 0
 	for _, party := range state.PresidentialElectoralHistory {
 		if party == "R" {
 			n_republican_win++
 		}
 	}
 	// Get the uncertainty from the poll
 	biden_percentage, biden_exists := poll.PollResults["Biden"]
 	trump_percentage, trump_exists := poll.PollResults["Trump"]
 	if !biden_exists || !trump_exists {
 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 	}
 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
 	trump_share := trump_percentage / 100.0
 	normalized_trump_share := trump_share / (trump_share + biden_share)
 	normalized_biden_share := biden_share / (trump_share + biden_share)
 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
 	/* Inject additional uncertainty */
 	/*
 		Possible factors:
 			- National drift between now and the election (biggest one)
 			- States more uncertain than the national average
 			- Idiosyncratic factors
 			- Polls not being as good as gallup
 			- Increased polarization
 		Also note that the polls already have some error already
 	*/
 	std_additional_uncertainty := 5.0 / 100.0
 	if n_republican_win == 0 || n_republican_win == 6 {
 		// if solid states for the last 6 elections
 		std_additional_uncertainty = std_additional_uncertainty / 3.0
 		if pretty_print {
 			fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
 			fmt.Printf("\n\t\t=> Reducing additional uncertainty")
 		}
 	}
 	std_error := std_error_poll_mean + std_additional_uncertainty
 	// std_error := std_error_poll_mean + 0.065
 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
 	if pretty_print {
 		fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
 		fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
 	}
 	return p_republican_win
 }
 /* Print state by state data */
 func printStates(states []State) {
 	for _, state := range states {
 		fmt.Printf("\n\nState: %s", state.Name)
 		fmt.Printf("\n\tVotes: %d", state.Votes)
 		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
 		p_baserate_republican_win := 0.0
 		for _, party := range state.PresidentialElectoralHistory {
 			if party == "R" {
 				p_baserate_republican_win++
 			}
 		}
 		fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
 		// Individual poll
 		for _, poll := range state.Polls {
 			fmt.Printf("\n\tPoll: %+v", poll)
 			_ = getChanceRepublicanWinFromPoll(poll, true)
 			_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
 		}
 		// Aggregate poll
 		num_biden_votes := 0.0
 		num_trump_votes := 0.0
 		for _, poll := range state.Polls {
 			biden_percentage, biden_exists := poll.PollResults["Biden"]
 			trump_percentage, trump_exists := poll.PollResults["Trump"]
 			if !biden_exists || !trump_exists {
 				panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 			}
 			num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
 			num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
 		}
 		aggregate_sample_size := num_biden_votes + num_trump_votes
 		if aggregate_sample_size != 0.0 {
 			var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
 			aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
 			aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
 			fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
 			_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
 			_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
 		}
 	}
 }
 /* Sample state by state */
 func sampleFromState(state State) VotesForEachParty {
 	switch state.Name {
 	case "Nebraska":
 		/*
 			2000: R
 			2004: R
 			2008: Split, 1 D, 4 R
 			2012: R
 			2016: R
 			2020: Split, 1 D, 4 R
 		*/
 		p_split := 2.0 / 6.0
 		if r.Float64() < p_split {
 			return VotesForEachParty{Democrats: 1, Republicans: 4}
 		} else {
 			return VotesForEachParty{Democrats: 0, Republicans: 5}
 		}
 	case "Maine":
 		/*
 			2000: D
 			2004: D
 			2008: D
 			2012: D
 			2016: Split: 3 D, 1 R
 			2020: Split, 3 D, 1 R
 		*/
 		p_split := 2.0 / 6.0
 		if r.Float64() < p_split {
 			return VotesForEachParty{Democrats: 3, Republicans: 1}
 		} else {
 			return VotesForEachParty{Democrats: 1, Republicans: 0}
 		}
 	default:
 		{
 			/* Consider the base rate for the state */
 			p_baserate_republican_win := 0.0
 			for _, party := range state.PresidentialElectoralHistory {
 				if party == "R" {
 					p_baserate_republican_win++
 				}
 			}
 			p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
 			p_republican_win := p_baserate_republican_win // if no polls
 			/* Consider polls */
 			num_biden_votes := 0.0
 			num_trump_votes := 0.0
 			for _, poll := range state.Polls {
 				biden_percentage, biden_exists := poll.PollResults["Biden"]
 				trump_percentage, trump_exists := poll.PollResults["Trump"]
 				if !biden_exists || !trump_exists {
 					panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 				}
 				num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
 				num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
 			}
 			aggregate_sample_size := num_biden_votes + num_trump_votes
 			if aggregate_sample_size != 0.0 {
 				var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
 				aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
 				aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
 				p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
 				// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
 				// weight_polls := 0.75
 				// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
 				p_republican_win = p_republican_win_aggregate_polls
 			}
 			if r.Float64() < p_republican_win {
 				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
 			} else {
 				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
 			}
 		}
 	}
 }
 /* Simulate election */
 func simulateElection(states []State) int {
 	republican_seats := 0
 	for _, state := range states {
 		election_sample := sampleFromState(state)
 		republican_seats += election_sample.Republicans
 	}
 	return republican_seats
 }
 /* Histogram */
 func barString(n int) string {
 	str := ""
 	for i := 0; i < n; i++ {
 		str += "█"
 	}
 	return str
 }
 func printElectoralCollegeHistogram(samples []int) {
 	histogram := [538]int{}
 	for _, sample := range samples {
 		histogram[sample]++
 	}
 	max_count := 0
 	for _, count := range histogram {
 		if count > max_count {
 			max_count = count
 		}
 	}
 	cp := 0.0
 	for i, count := range histogram {
 		bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
 		p := float64(count) / float64(len(samples)) * 100
 		cp += p
 		if i > 130 && i < 400 {
 			fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
 		} else if p >= 0.01 {
 			fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
 		}
 	}
 }
 /* Load data from csvs */
 // Glue code
 func readStates() ([]State, error) {
 	var states map[string]State = make(map[string]State)
@ -205,294 +479,9 @@ func readStates() ([]State, error) {
 	for _, state := range states {
 		states_slice = append(states_slice, state)
 	}
 	return states_slice, nil
 }
 /* Sampling helper functions */
 func getNormalCDF(x float64, mean float64, std float64) float64 {
 	erf_term := (x - mean) / (std * math.Sqrt2)
 	return (1 + math.Erf(erf_term)) / 2
 }
 func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
 	return 1 - getNormalCDF(x, mean, std)
 }
 func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
 	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
 	return getProbabilityAboveX(0.5, candidate_p, std)
 }
 func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
 	biden_percentage, biden_exists := poll.PollResults["Biden"]
 	trump_percentage, trump_exists := poll.PollResults["Trump"]
 	if !biden_exists || !trump_exists {
 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 	}
 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
 	trump_share := trump_percentage / 100.0
 	normalized_trump_share := trump_share / (trump_share + biden_share)
 	normalized_biden_share := biden_share / (trump_share + biden_share)
 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
 	if pretty_print {
 		fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
 		fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
 		fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
 		fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
 	}
 	return p_republican_win
 }
 func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
 	// Uncertainty from the state
 	n_republican_win := 0
 	for _, party := range state.PresidentialElectoralHistory {
 		if party == "R" {
 			n_republican_win++
 		}
 	}
 	// Get the uncertainty from the poll
 	biden_percentage, biden_exists := poll.PollResults["Biden"]
 	trump_percentage, trump_exists := poll.PollResults["Trump"]
 	if !biden_exists || !trump_exists {
 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 	}
 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
 	trump_share := trump_percentage / 100.0
 	normalized_trump_share := trump_share / (trump_share + biden_share)
 	normalized_biden_share := biden_share / (trump_share + biden_share)
 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
 	/* Inject additional uncertainty */
 	/*
 		Possible factors:
 			- National drift between now and the election (biggest one)
 			- States more uncertain than the national average
 			- Idiosyncratic factors
 			- Polls not being as good as gallup
 			- Increased polarization
 		Also note that the polls already have some error already
 	*/
 	std_additional_uncertainty := 5.0 / 100.0
 	if n_republican_win == 0 || n_republican_win == 6 {
 		// if solid states for the last 6 elections
 		std_additional_uncertainty = std_additional_uncertainty / 3.0
 		if pretty_print {
 			fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
 			fmt.Printf("\n\t\t=> Reducing additional uncertainty")
 		}
 	}
 	std_error := std_error_poll_mean + std_additional_uncertainty
 	// std_error := std_error_poll_mean + 0.065
 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
 	if pretty_print {
 		fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
 		fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
 	}
 	return p_republican_win
 }
 /* Print state by state data */
 func printStates(states []State) {
 	for _, state := range states {
 		fmt.Printf("\n\nState: %s", state.Name)
 		fmt.Printf("\n\tVotes: %d", state.Votes)
 		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
 		p_baserate_republican_win := 0.0
 		for _, party := range state.PresidentialElectoralHistory {
 			if party == "R" {
 				p_baserate_republican_win++
 			}
 		}
 		fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
 		// Individual poll
 		for _, poll := range state.Polls {
 			fmt.Printf("\n\tPoll: %+v", poll)
 			_ = getChanceRepublicanWinFromPoll(poll, true)
 			_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
 		}
 		// Aggregate poll
 		num_biden_votes := 0.0
 		num_trump_votes := 0.0
 		for _, poll := range state.Polls {
 			biden_percentage, biden_exists := poll.PollResults["Biden"]
 			trump_percentage, trump_exists := poll.PollResults["Trump"]
 			if !biden_exists || !trump_exists {
 				panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 			}
 			num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
 			num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
 		}
 		aggregate_sample_size := num_biden_votes + num_trump_votes
 		if aggregate_sample_size != 0.0 {
 			var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
 			aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
 			aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
 			fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
 			_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
 			_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
 		}
 	}
 }
 /* Sample state by state */
 func sampleFromState(state State) VotesForEachParty {
 	switch state.Name {
 	case "Nebraska":
 		/*
 			2000: R
 			2004: R
 			2008: Split, 1 D, 4 R
 			2012: R
 			2016: R
 			2020: Split, 1 D, 4 R
 		*/
 		p_split := 2.0 / 6.0
 		if r.Float64() < p_split {
 			return VotesForEachParty{Democrats: 1, Republicans: 4}
 		} else {
 			return VotesForEachParty{Democrats: 0, Republicans: 5}
 		}
 	case "Maine":
 		/*
 			2000: D
 			2004: D
 			2008: D
 			2012: D
 			2016: Split: 3 D, 1 R
 			2020: Split, 3 D, 1 R
 		*/
 		p_split := 2.0 / 6.0
 		if r.Float64() < p_split {
 			return VotesForEachParty{Democrats: 3, Republicans: 1}
 		} else {
 			return VotesForEachParty{Democrats: 1, Republicans: 0}
 		}
 	default:
 		{
 			/* Consider the base rate for the state */
 			p_baserate_republican_win := 0.0
 			for _, party := range state.PresidentialElectoralHistory {
 				if party == "R" {
 					p_baserate_republican_win++
 				}
 			}
 			p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
 			p_republican_win := p_baserate_republican_win // if no polls
 			/* Consider polls */
 			num_biden_votes := 0.0
 			num_trump_votes := 0.0
 			for _, poll := range state.Polls {
 				biden_percentage, biden_exists := poll.PollResults["Biden"]
 				trump_percentage, trump_exists := poll.PollResults["Trump"]
 				if !biden_exists || !trump_exists {
 					panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
 				}
 				num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
 				num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
 			}
 			aggregate_sample_size := num_biden_votes + num_trump_votes
 			if aggregate_sample_size != 0.0 {
 				var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
 				aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
 				aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
 				p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
 				// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
 				// weight_polls := 0.75
 				// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
 				p_republican_win = p_republican_win_aggregate_polls
 			}
 			if r.Float64() < p_republican_win {
 				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
 			} else {
 				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
 			}
 		}
 	}
 }
 /* Simulate election */
 func simulateElection(states []State) int {
 	republican_seats := 0
 	for _, state := range states {
 		election_sample := sampleFromState(state)
 		republican_seats += election_sample.Republicans
 	}
 	return republican_seats
 }
 /* Histogram */
 func barString(n int) string {
 	str := ""
 	for i := 0; i < n; i++ {
 		str += "█"
 	}
 	return str
 }
 func printElectoralCollegeHistogram(samples []int) {
 	histogram := [538]int{}
 	for _, sample := range samples {
 		histogram[sample]++
 	}
 	max_count := 0
 	for _, count := range histogram {
 		if count > max_count {
 			max_count = count
 		}
 	}
 	cp := 0.0
 	for i, count := range histogram {
 		bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
 		p := float64(count) / float64(len(samples)) * 100
 		cp += p
 		if i > 130 && i < 400 {
 			fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
 		} else if p >= 0.01 {
 			fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
 		}
 	}
 }
 func main() {
 	states, err := readStates()
 	if err != nil {
@ -518,5 +507,4 @@ func main() {
 	p_republicans = p_republicans / float64(n_sims)
 	fmt.Printf("\n%% republicans: %f\n", p_republicans)
 }