From 0b6b80accb27d5a30cf8598661b4c3f504b114c0 Mon Sep 17 00:00:00 2001
From: NunoSempere <nuno.semperelh@protonmail.com>
Date: Sun, 14 Apr 2024 11:21:50 -0400
Subject: [PATCH] add math to calculate chance of win per poll

---
 README.md |  2 ++
 main.go   | 50 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index de403ee..6989f5d 100644
--- a/README.md
+++ b/README.md
@@ -48,3 +48,5 @@ Remedy: consider the conditional probabilities? But how? Or, relax assumptions u
 - [ ] Exclude polls older than one month?
 - [ ] Exclude partisan polls
 - [ ] ...
+
+https://stats.stackexchange.com/questions/274211/calculating-the-probability-of-someone-winning-from-a-poll
diff --git a/main.go b/main.go
index 131fe70..fd47c03 100644
--- a/main.go
+++ b/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"encoding/csv"
 	"fmt"
+	"math"
 	rand "math/rand/v2"
 	"os"
 	"strconv"
@@ -30,8 +31,6 @@ type Poll struct {
 	Date        time.Time
 }
 
-// type src = *rand.Rand
-
 /* Globals */
 var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
 
@@ -186,6 +185,22 @@ func readStates() ([]State, error) {
 	return states_slice, nil
 }
 
+/* Sampling helper functions */
+func getNormalCDF(x float64, mean float64, std float64) float64 {
+	erf_term := (x - mean) / (std * math.Sqrt2)
+	return (1 + math.Erf(erf_term)) / 2
+}
+
+func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
+	return 1 - getNormalCDF(x, mean, std)
+}
+
+func getChanceCandidateWinsFromPoll(candidate_p float64, poll_sample_size float64) float64 {
+	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
+	return getProbabilityAboveX(0.5, candidate_p, std)
+}
+
+/* Sample state by state */
 func sampleFromState(state State) VotesForEachParty {
 	fmt.Printf("\n\nState: %s", state.Name)
 	fmt.Printf("\n\tVotes: %d", state.Votes)
@@ -256,25 +271,40 @@ func sampleFromState(state State) VotesForEachParty {
 				if has_biden && has_trump {
 					recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll)
 				}
-				fmt.Printf("\n\tPoll: %+v", recent_poll)
 			}
 
 			num_biden_votes := 0.0
 			num_trump_votes := 0.0
 			for _, recent_biden_trump_poll := range recent_biden_trump_polls {
-				biden_percentage := 0.0
-				trump_percentage := 0.0
+				biden_share := 0.0
+				trump_share := 0.0
 				for candidate_name, candidate_percentage := range recent_biden_trump_poll.PollResults {
 					if candidate_name == "Biden" {
-						biden_percentage = candidate_percentage
+						biden_share = candidate_percentage / 100
 					} else if candidate_name == "Trump" {
-						trump_percentage = candidate_percentage
+						trump_share = candidate_percentage / 100
 					}
 				}
-				num_biden_votes += biden_percentage * float64(recent_biden_trump_poll.SampleSize)
-				num_trump_votes += trump_percentage * float64(recent_biden_trump_poll.SampleSize)
-			}
+				sample_size := float64(recent_biden_trump_poll.SampleSize)
+				poll_biden_votes := biden_share * sample_size
+				poll_trump_votes := trump_share * sample_size
+				joint_trump_biden_sample_size := poll_biden_votes + poll_trump_votes
+				normalized_trump_share := trump_share / (trump_share + biden_share)
+				normalized_biden_share := biden_share / (trump_share + biden_share)
+				std_poll := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
 
+				p_trump_more_votes := getProbabilityAboveX(0.5, normalized_trump_share, std_poll)
+
+				fmt.Printf("\n\tPoll: %+v", recent_biden_trump_poll)
+				fmt.Printf("\n\t\tChance of R win: %f", p_trump_more_votes)
+
+				// Update general tally
+				num_biden_votes += poll_biden_votes
+				num_trump_votes += poll_trump_votes
+			}
+			// total_sample_size := num_biden_votes + num_trump_votes
+
+			fmt.Println("")
 			if r.Float64() < p_republican {
 				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
 			} else {