## ------------- secretary_problem.R ------------ ## Title: The Secretary (Marriage) Problem ## ## Description: Simulation of optimal halting problem. Trying ## to see how different utility scoring affects expected outcomes. ## - Uniform, set range to sqrt(12) for sd == 1 ## - Normal, roughly the distribution of people? ## - Exponential, mostly not suitable, but some will change your world? ## ## Author: MollieTheMare ## Date Created: 2024-04-19 ## Notes: ## - Subtracted 1 from exponential distribution to get mean 0 ## - For ordinal information stopping at 1/e (~37%) is most ## likely to find the optimal partner, but you only have a 37% chance. ## - Wikipedia for the Cardinal payoff variant gives the solution to ## a uniform as sqrt(n) ## - Also links Sakaguchi that gives a general solution, but looks ## like a pain to go through. ## --------------------------- library(data.table) library(knitr) select_best_after_k <- function(rnkx, k) { n <- length(rnkx) selected <- which(rnkx > max(rnkx[1:k])) if (length(selected) == 0) { selected <- n } else if (length(selected) > 1) { selected <- selected[[1]] } selected } # x is a vector of utilities, the larger the better run_selection <- function(x) { n <- length(x) rnkx <- frank(x) # actual best i_best <- which(rnkx == length(x)) x_best <- x[[i_best]] # exp stopping i_exp <- select_best_after_k(rnkx, round(n / exp(1))) x_exp <- x[[i_exp]] # sqrt stopping i_sqrt <- select_best_after_k(rnkx, round(sqrt(n))) x_sqrt <- x[[i_sqrt]] # fist love x_1 <- x[[1]] result_table <- data.table( is_best = i_best == c(i_exp, i_sqrt), x = c(x_exp, x_sqrt), loss = x_best - c(x_exp, x_sqrt), gain = c(x_exp, x_sqrt) - x_1, # best = i_best, r = c("n/e","sqrt(n)") ) result_table } run_rounds <- function(FUN, rounds = 1000, n = 256, ...) { runs <- list() for (i in 1:rounds) { x <- FUN(n, ...) runs[[i]] <- run_selection(x) } rbindlist(runs) } # Expected 5%ile shortfall ES5 <- function(loss) { mean(loss[loss > quantile(loss, 0.95)]) } {# simulation runs ------------ set.seed(0) rounds <- 50000 unif_runs <- run_rounds(runif, rounds, min = -sqrt(12)/2, max = sqrt(12)/2) normal_runs <- run_rounds(rnorm, rounds) exp_runs <- run_rounds(FUN = function(n) {rexp(n) - 1}, rounds) unif_runs[, gen_dist := 'unif'] normal_runs[, gen_dist := 'norm'] exp_runs[, gen_dist := 'exp'] results <- rbind(unif_runs, normal_runs, exp_runs) } summary_table <- results[, .( `P(r)` = scales::label_percent()(sum(is_best) / .N), `P(miss)`= scales::label_percent()(sum(gain < 0) / .N), `` = mean(x), #since we have set the mean 0 ~ gain # `` = mean(gain), `` = mean(loss), sd_loss = sd(loss), #sd_x = sd(x), ES_5 = ES5(loss), # best_stop = mean((best-1) / 256), max_loss = max(loss) ), keyby = .(gen_dist, r)] summary_table kable(summary_table, digits = 1, align = c('c'))