commit eb46da71b8802faa8ec3b568c042be957aa63df0
Author: NunoSempere <nuno.semperelh@protonmail.com>
Date:   Mon May 6 19:30:27 2024 +0100

    initial commit

diff --git a/code.R b/code.R
new file mode 100644
index 0000000..dce39ee
--- /dev/null
+++ b/code.R
@@ -0,0 +1,107 @@
+## ------------- secretary_problem.R ------------
+## Title: The Secretary (Marriage) Problem
+##
+## Description: Simulation of optimal halting problem. Trying
+##   to see how different utility scoring affects expected outcomes.
+##   - Uniform, set range to sqrt(12) for sd == 1
+##   - Normal, roughly the distribution of people?
+##   - Exponential, mostly not suitable, but some will change your world?
+##
+## Author: MollieTheMare
+## Date Created:  2024-04-19
+## Notes:
+##   - Subtracted 1 from exponential distribution to get mean 0
+##   - For ordinal information stopping at 1/e (~37%) is most
+##     likely to find the optimal partner, but you only have a 37% chance.
+##   - Wikipedia for the Cardinal payoff variant gives the solution to 
+##     a uniform as sqrt(n)
+##   - Also links Sakaguchi that gives a general solution, but looks
+##     like a pain to go through.
+## ---------------------------
+library(data.table)
+library(knitr)
+
+select_best_after_k <-
+  function(rnkx, k) {
+    n <- length(rnkx)
+    selected <- which(rnkx > max(rnkx[1:k]))
+    if (length(selected) == 0) {
+      selected <- n
+    } else if (length(selected) > 1) {
+      selected <- selected[[1]]
+    }
+    selected
+  }
+
+# x is a vector of utilities, the larger the better
+run_selection <- function(x) {
+  n <- length(x)
+  rnkx <- frank(x)
+  # actual best
+  i_best <- which(rnkx == length(x))
+  x_best <- x[[i_best]]
+  # exp stopping
+  i_exp <- select_best_after_k(rnkx, round(n / exp(1)))
+  x_exp <- x[[i_exp]]
+  # sqrt stopping
+  i_sqrt <- select_best_after_k(rnkx, round(sqrt(n)))
+  x_sqrt <- x[[i_sqrt]]
+  # fist love
+  x_1 <- x[[1]]
+  
+  result_table <-
+    data.table(
+      is_best = i_best == c(i_exp, i_sqrt),
+      x       = c(x_exp, x_sqrt),
+      loss    = x_best - c(x_exp, x_sqrt),
+      gain    = c(x_exp, x_sqrt) - x_1,
+      # best    = i_best,
+      r       = c("n/e","sqrt(n)")
+    )
+  
+  result_table
+}
+
+run_rounds <- function(FUN, rounds = 1000, n = 256, ...) {
+  runs <- list()
+  for (i in 1:rounds) {
+    x <- FUN(n, ...)
+    runs[[i]] <- run_selection(x)
+  }
+  rbindlist(runs)
+}
+
+# Expected 5%ile shortfall
+ES5 <- function(loss) {
+  mean(loss[loss > quantile(loss, 0.95)])
+}
+
+{# simulation runs ------------
+  set.seed(0)
+  rounds <- 50000
+  unif_runs <- run_rounds(runif, rounds, min = -sqrt(12)/2, max = sqrt(12)/2)
+  normal_runs <- run_rounds(rnorm, rounds)
+  exp_runs <- run_rounds(FUN = function(n) {rexp(n) - 1}, rounds)
+  unif_runs[, gen_dist := 'unif']
+  normal_runs[, gen_dist := 'norm']
+  exp_runs[, gen_dist := 'exp']
+  results <- rbind(unif_runs, normal_runs, exp_runs)
+}
+
+summary_table <-
+  results[, .(
+    `P(r)` = scales::label_percent()(sum(is_best) / .N),
+    `P(miss)`= scales::label_percent()(sum(gain < 0) / .N),
+    `<u>` = mean(x), #since we have set the mean 0 ~ gain
+    # `<gain>` = mean(gain),
+    `<loss>` = mean(loss),
+    sd_loss = sd(loss),
+    #sd_x = sd(x),
+    ES_5 = ES5(loss),
+    # best_stop = mean((best-1) / 256),
+    max_loss = max(loss)
+  ),
+  keyby = .(gen_dist, r)]
+summary_table
+
+kable(summary_table, digits = 1, align = c('c'))