From cbda7b1e5c1bbd68ae7263ffc59be4baedca6fa6 Mon Sep 17 00:00:00 2001
From: NunoSempere <nuno.semperelh@protonmail.com>
Date: Sun, 14 Apr 2024 21:42:38 -0400
Subject: [PATCH] move glue code function to the end

---
 main.go | 562 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 275 insertions(+), 287 deletions(-)

diff --git a/main.go b/main.go
index 36e0d58..569cd4e 100644
--- a/main.go
+++ b/main.go
@@ -34,9 +34,283 @@ type Poll struct {
 
 /* Globals */
 var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
-var dev = false
+
+/* Sampling helper functions */
+func getNormalCDF(x float64, mean float64, std float64) float64 {
+	erf_term := (x - mean) / (std * math.Sqrt2)
+	return (1 + math.Erf(erf_term)) / 2
+}
+
+func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
+	return 1 - getNormalCDF(x, mean, std)
+}
+
+func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
+	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
+	return getProbabilityAboveX(0.5, candidate_p, std)
+}
+
+func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
+	biden_percentage, biden_exists := poll.PollResults["Biden"]
+	trump_percentage, trump_exists := poll.PollResults["Trump"]
+	if !biden_exists || !trump_exists {
+		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
+	}
+	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
+	trump_share := trump_percentage / 100.0
+
+	normalized_trump_share := trump_share / (trump_share + biden_share)
+	normalized_biden_share := biden_share / (trump_share + biden_share)
+
+	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
+	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
+
+	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
+
+	if pretty_print {
+		fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
+		fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
+		fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
+		fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
+	}
+	return p_republican_win
+}
+
+func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
+	// Uncertainty from the state
+	n_republican_win := 0
+	for _, party := range state.PresidentialElectoralHistory {
+		if party == "R" {
+			n_republican_win++
+		}
+	}
+
+	// Get the uncertainty from the poll
+	biden_percentage, biden_exists := poll.PollResults["Biden"]
+	trump_percentage, trump_exists := poll.PollResults["Trump"]
+	if !biden_exists || !trump_exists {
+		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
+	}
+	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
+	trump_share := trump_percentage / 100.0
+
+	normalized_trump_share := trump_share / (trump_share + biden_share)
+	normalized_biden_share := biden_share / (trump_share + biden_share)
+
+	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
+	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
+
+	/* Inject additional uncertainty */
+	/*
+		Possible factors:
+			- National drift between now and the election (biggest one)
+			- States more uncertain than the national average
+			- Idiosyncratic factors
+			- Polls not being as good as gallup
+			- Increased polarization
+		Also note that the polls already have some error already
+	*/
+	std_additional_uncertainty := 5.0 / 100.0
+
+	if n_republican_win == 0 || n_republican_win == 6 {
+		// if solid states for the last 6 elections
+		std_additional_uncertainty = std_additional_uncertainty / 3.0
+
+		if pretty_print {
+			fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
+			fmt.Printf("\n\t\t=> Reducing additional uncertainty")
+		}
+	}
+
+	std_error := std_error_poll_mean + std_additional_uncertainty
+
+	// std_error := std_error_poll_mean + 0.065
+	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
+
+	if pretty_print {
+		fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
+		fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
+	}
+	return p_republican_win
+}
+
+/* Print state by state data */
+func printStates(states []State) {
+	for _, state := range states {
+		fmt.Printf("\n\nState: %s", state.Name)
+		fmt.Printf("\n\tVotes: %d", state.Votes)
+		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
+
+		p_baserate_republican_win := 0.0
+		for _, party := range state.PresidentialElectoralHistory {
+			if party == "R" {
+				p_baserate_republican_win++
+			}
+		}
+		fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
+
+		// Individual poll
+		for _, poll := range state.Polls {
+			fmt.Printf("\n\tPoll: %+v", poll)
+			_ = getChanceRepublicanWinFromPoll(poll, true)
+			_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
+		}
+
+		// Aggregate poll
+		num_biden_votes := 0.0
+		num_trump_votes := 0.0
+		for _, poll := range state.Polls {
+			biden_percentage, biden_exists := poll.PollResults["Biden"]
+			trump_percentage, trump_exists := poll.PollResults["Trump"]
+			if !biden_exists || !trump_exists {
+				panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
+			}
+			num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
+			num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
+		}
+		aggregate_sample_size := num_biden_votes + num_trump_votes
+		if aggregate_sample_size != 0.0 {
+			var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
+			aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
+			aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
+
+			fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
+			_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
+			_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
+		}
+	}
+}
+
+/* Sample state by state */
+func sampleFromState(state State) VotesForEachParty {
+	switch state.Name {
+	case "Nebraska":
+		/*
+			2000: R
+			2004: R
+			2008: Split, 1 D, 4 R
+			2012: R
+			2016: R
+			2020: Split, 1 D, 4 R
+		*/
+		p_split := 2.0 / 6.0
+		if r.Float64() < p_split {
+			return VotesForEachParty{Democrats: 1, Republicans: 4}
+		} else {
+			return VotesForEachParty{Democrats: 0, Republicans: 5}
+		}
+	case "Maine":
+		/*
+			2000: D
+			2004: D
+			2008: D
+			2012: D
+			2016: Split: 3 D, 1 R
+			2020: Split, 3 D, 1 R
+		*/
+		p_split := 2.0 / 6.0
+		if r.Float64() < p_split {
+			return VotesForEachParty{Democrats: 3, Republicans: 1}
+		} else {
+			return VotesForEachParty{Democrats: 1, Republicans: 0}
+		}
+	default:
+		{
+			/* Consider the base rate for the state */
+			p_baserate_republican_win := 0.0
+			for _, party := range state.PresidentialElectoralHistory {
+				if party == "R" {
+					p_baserate_republican_win++
+				}
+			}
+			p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
+			p_republican_win := p_baserate_republican_win // if no polls
+
+			/* Consider polls */
+			num_biden_votes := 0.0
+			num_trump_votes := 0.0
+			for _, poll := range state.Polls {
+				biden_percentage, biden_exists := poll.PollResults["Biden"]
+				trump_percentage, trump_exists := poll.PollResults["Trump"]
+				if !biden_exists || !trump_exists {
+					panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
+				}
+				num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
+				num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
+			}
+
+			aggregate_sample_size := num_biden_votes + num_trump_votes
+			if aggregate_sample_size != 0.0 {
+				var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
+				aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
+				aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
+
+				p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
+				// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
+
+				// weight_polls := 0.75
+				// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
+				p_republican_win = p_republican_win_aggregate_polls
+			}
+
+			if r.Float64() < p_republican_win {
+				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
+			} else {
+				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
+			}
+		}
+	}
+}
+
+/* Simulate election */
+func simulateElection(states []State) int {
+	republican_seats := 0
+	for _, state := range states {
+		election_sample := sampleFromState(state)
+		republican_seats += election_sample.Republicans
+	}
+	return republican_seats
+}
+
+/* Histogram */
+func barString(n int) string {
+	str := ""
+	for i := 0; i < n; i++ {
+		str += "█"
+	}
+	return str
+}
+
+func printElectoralCollegeHistogram(samples []int) {
+	histogram := [538]int{}
+	for _, sample := range samples {
+		histogram[sample]++
+	}
+
+	max_count := 0
+	for _, count := range histogram {
+		if count > max_count {
+			max_count = count
+		}
+	}
+
+	cp := 0.0
+	for i, count := range histogram {
+		bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
+		p := float64(count) / float64(len(samples)) * 100
+		cp += p
+
+		if i > 130 && i < 400 {
+			fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
+		} else if p >= 0.01 {
+			fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
+
+		}
+	}
+}
 
 /* Load data from csvs */
+// Glue code
 func readStates() ([]State, error) {
 	var states map[string]State = make(map[string]State)
 
@@ -205,294 +479,9 @@ func readStates() ([]State, error) {
 	for _, state := range states {
 		states_slice = append(states_slice, state)
 	}
-
 	return states_slice, nil
 }
 
-/* Sampling helper functions */
-func getNormalCDF(x float64, mean float64, std float64) float64 {
-	erf_term := (x - mean) / (std * math.Sqrt2)
-	return (1 + math.Erf(erf_term)) / 2
-}
-
-func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
-	return 1 - getNormalCDF(x, mean, std)
-}
-
-func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
-	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
-	return getProbabilityAboveX(0.5, candidate_p, std)
-}
-
-func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
-
-	biden_percentage, biden_exists := poll.PollResults["Biden"]
-	trump_percentage, trump_exists := poll.PollResults["Trump"]
-	if !biden_exists || !trump_exists {
-		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
-	}
-	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
-	trump_share := trump_percentage / 100.0
-
-	normalized_trump_share := trump_share / (trump_share + biden_share)
-	normalized_biden_share := biden_share / (trump_share + biden_share)
-
-	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
-	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
-
-	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
-
-	if pretty_print {
-		fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
-		fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
-		fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
-		fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
-	}
-	return p_republican_win
-
-}
-
-func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
-
-	// Uncertainty from the state
-	n_republican_win := 0
-	for _, party := range state.PresidentialElectoralHistory {
-		if party == "R" {
-			n_republican_win++
-		}
-	}
-
-	// Get the uncertainty from the poll
-	biden_percentage, biden_exists := poll.PollResults["Biden"]
-	trump_percentage, trump_exists := poll.PollResults["Trump"]
-	if !biden_exists || !trump_exists {
-		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
-	}
-	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
-	trump_share := trump_percentage / 100.0
-
-	normalized_trump_share := trump_share / (trump_share + biden_share)
-	normalized_biden_share := biden_share / (trump_share + biden_share)
-
-	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
-	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
-
-	/* Inject additional uncertainty */
-	/*
-		Possible factors:
-			- National drift between now and the election (biggest one)
-			- States more uncertain than the national average
-			- Idiosyncratic factors
-			- Polls not being as good as gallup
-			- Increased polarization
-		Also note that the polls already have some error already
-	*/
-	std_additional_uncertainty := 5.0 / 100.0
-
-	if n_republican_win == 0 || n_republican_win == 6 {
-		// if solid states for the last 6 elections
-		std_additional_uncertainty = std_additional_uncertainty / 3.0
-
-		if pretty_print {
-			fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
-			fmt.Printf("\n\t\t=> Reducing additional uncertainty")
-		}
-
-	}
-
-	std_error := std_error_poll_mean + std_additional_uncertainty
-
-	// std_error := std_error_poll_mean + 0.065
-	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
-
-	if pretty_print {
-		fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
-		fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
-	}
-	return p_republican_win
-
-}
-
-/* Print state by state data */
-func printStates(states []State) {
-	for _, state := range states {
-		fmt.Printf("\n\nState: %s", state.Name)
-		fmt.Printf("\n\tVotes: %d", state.Votes)
-		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
-
-		p_baserate_republican_win := 0.0
-		for _, party := range state.PresidentialElectoralHistory {
-			if party == "R" {
-				p_baserate_republican_win++
-			}
-		}
-		fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
-
-		// Individual poll
-		for _, poll := range state.Polls {
-			fmt.Printf("\n\tPoll: %+v", poll)
-			_ = getChanceRepublicanWinFromPoll(poll, true)
-			_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
-		}
-
-		// Aggregate poll
-		num_biden_votes := 0.0
-		num_trump_votes := 0.0
-		for _, poll := range state.Polls {
-			biden_percentage, biden_exists := poll.PollResults["Biden"]
-			trump_percentage, trump_exists := poll.PollResults["Trump"]
-			if !biden_exists || !trump_exists {
-				panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
-			}
-			num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
-			num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
-		}
-		aggregate_sample_size := num_biden_votes + num_trump_votes
-		if aggregate_sample_size != 0.0 {
-			var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
-			aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
-			aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
-
-			fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
-			_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
-			_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
-		}
-
-	}
-
-}
-
-/* Sample state by state */
-func sampleFromState(state State) VotesForEachParty {
-	switch state.Name {
-	case "Nebraska":
-		/*
-			2000: R
-			2004: R
-			2008: Split, 1 D, 4 R
-			2012: R
-			2016: R
-			2020: Split, 1 D, 4 R
-		*/
-		p_split := 2.0 / 6.0
-		if r.Float64() < p_split {
-			return VotesForEachParty{Democrats: 1, Republicans: 4}
-		} else {
-			return VotesForEachParty{Democrats: 0, Republicans: 5}
-		}
-	case "Maine":
-		/*
-			2000: D
-			2004: D
-			2008: D
-			2012: D
-			2016: Split: 3 D, 1 R
-			2020: Split, 3 D, 1 R
-		*/
-		p_split := 2.0 / 6.0
-		if r.Float64() < p_split {
-			return VotesForEachParty{Democrats: 3, Republicans: 1}
-		} else {
-			return VotesForEachParty{Democrats: 1, Republicans: 0}
-		}
-	default:
-		{
-			/* Consider the base rate for the state */
-			p_baserate_republican_win := 0.0
-			for _, party := range state.PresidentialElectoralHistory {
-				if party == "R" {
-					p_baserate_republican_win++
-				}
-			}
-			p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
-			p_republican_win := p_baserate_republican_win // if no polls
-
-			/* Consider polls */
-			num_biden_votes := 0.0
-			num_trump_votes := 0.0
-			for _, poll := range state.Polls {
-				biden_percentage, biden_exists := poll.PollResults["Biden"]
-				trump_percentage, trump_exists := poll.PollResults["Trump"]
-				if !biden_exists || !trump_exists {
-					panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
-				}
-				num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
-				num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
-			}
-
-			aggregate_sample_size := num_biden_votes + num_trump_votes
-			if aggregate_sample_size != 0.0 {
-				var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
-				aggregate_poll.PollResults["Biden"] = 100.0 * num_biden_votes / aggregate_sample_size
-				aggregate_poll.PollResults["Trump"] = 100.0 * num_trump_votes / aggregate_sample_size
-
-				p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
-				// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
-
-				// weight_polls := 0.75
-				// p_republican = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
-				p_republican_win = p_republican_win_aggregate_polls
-			}
-
-			if r.Float64() < p_republican_win {
-				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
-			} else {
-				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
-			}
-		}
-	}
-}
-
-/* Simulate election */
-func simulateElection(states []State) int {
-
-	republican_seats := 0
-	for _, state := range states {
-		election_sample := sampleFromState(state)
-		republican_seats += election_sample.Republicans
-	}
-	return republican_seats
-}
-
-/* Histogram */
-func barString(n int) string {
-	str := ""
-	for i := 0; i < n; i++ {
-		str += "█"
-	}
-	return str
-}
-
-func printElectoralCollegeHistogram(samples []int) {
-
-	histogram := [538]int{}
-	for _, sample := range samples {
-		histogram[sample]++
-	}
-
-	max_count := 0
-	for _, count := range histogram {
-		if count > max_count {
-			max_count = count
-		}
-	}
-
-	cp := 0.0
-	for i, count := range histogram {
-		bar_length := (count * 75) / max_count // Assuming max_count bar length is 50 characters
-		p := float64(count) / float64(len(samples)) * 100
-		cp += p
-
-		if i > 130 && i < 400 {
-			fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
-		} else if p >= 0.01 {
-			fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
-
-		}
-	}
-
-}
-
 func main() {
 	states, err := readStates()
 	if err != nil {
@@ -518,5 +507,4 @@ func main() {
 
 	p_republicans = p_republicans / float64(n_sims)
 	fmt.Printf("\n%% republicans: %f\n", p_republicans)
-
 }