fussy-suitor/code.R

## ------------- secretary_problem.R ------------
## Title: The Secretary (Marriage) Problem
##
## Description: Simulation of optimal halting problem. Trying
##   to see how different utility scoring affects expected outcomes.
##   - Uniform, set range to sqrt(12) for sd == 1
##   - Normal, roughly the distribution of people?
##   - Exponential, mostly not suitable, but some will change your world?
##
## Author: MollieTheMare
## Date Created:  2024-04-19
## Notes:
##   - Subtracted 1 from exponential distribution to get mean 0
##   - For ordinal information stopping at 1/e (~37%) is most
##     likely to find the optimal partner, but you only have a 37% chance.
##   - Wikipedia for the Cardinal payoff variant gives the solution to
##     a uniform as sqrt(n)
##   - Also links Sakaguchi that gives a general solution, but looks
##     like a pain to go through.
## ---------------------------
library(data.table)
library(knitr)

select_best_after_k <-
  function(rnkx, k) {
    n <- length(rnkx)
    selected <- which(rnkx > max(rnkx[1:k]))
    if (length(selected) == 0) {
      selected <- n
    } else if (length(selected) > 1) {
      selected <- selected[[1]]
    }
    selected
  }

# x is a vector of utilities, the larger the better
run_selection <- function(x) {
  n <- length(x)
  rnkx <- frank(x)
  # actual best
  i_best <- which(rnkx == length(x))
  x_best <- x[[i_best]]
  # exp stopping
  i_exp <- select_best_after_k(rnkx, round(n / exp(1)))
  x_exp <- x[[i_exp]]
  # sqrt stopping
  i_sqrt <- select_best_after_k(rnkx, round(sqrt(n)))
  x_sqrt <- x[[i_sqrt]]
  # fist love
  x_1 <- x[[1]]

  result_table <-
    data.table(
      is_best = i_best == c(i_exp, i_sqrt),
      x       = c(x_exp, x_sqrt),
      loss    = x_best - c(x_exp, x_sqrt),
      gain    = c(x_exp, x_sqrt) - x_1,
      # best    = i_best,
      r       = c("n/e","sqrt(n)")
    )

  result_table
}

run_rounds <- function(FUN, rounds = 1000, n = 256, ...) {
  runs <- list()
  for (i in 1:rounds) {
    x <- FUN(n, ...)
    runs[[i]] <- run_selection(x)
  }
  rbindlist(runs)
}

# Expected 5%ile shortfall
ES5 <- function(loss) {
  mean(loss[loss > quantile(loss, 0.95)])
}

{# simulation runs ------------
  set.seed(0)
  rounds <- 50000
  unif_runs <- run_rounds(runif, rounds, min = -sqrt(12)/2, max = sqrt(12)/2)
  normal_runs <- run_rounds(rnorm, rounds)
  exp_runs <- run_rounds(FUN = function(n) {rexp(n) - 1}, rounds)
  unif_runs[, gen_dist := 'unif']
  normal_runs[, gen_dist := 'norm']
  exp_runs[, gen_dist := 'exp']
  results <- rbind(unif_runs, normal_runs, exp_runs)
}

summary_table <-
  results[, .(
    `P(r)` = scales::label_percent()(sum(is_best) / .N),
    `P(miss)`= scales::label_percent()(sum(gain < 0) / .N),
    `<u>` = mean(x), #since we have set the mean 0 ~ gain
    # `<gain>` = mean(gain),
    `<loss>` = mean(loss),
    sd_loss = sd(loss),
    #sd_x = sd(x),
    ES_5 = ES5(loss),
    # best_stop = mean((best-1) / 256),
    max_loss = max(loss)
  ),
  keyby = .(gen_dist, r)]
summary_table

kable(summary_table, digits = 1, align = c('c'))