compute-constrained-bayes/src/compute_constrained_bayes.nim

import print
import strutils
import sequtils
import std/math
import std/sugar
import std/algorithm

## Prediction type & helpers.
type prediction = (string, float)
# string represents a hypothesis,
# prediction represents the predictionability mass
proc comparePredictions (x: prediction, y: prediction): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0

proc getProbability (t: prediction): float =
  let (_, p) = t
  return p

proc getHypothesis (t: prediction): string =
  let (h, _) = t
  return h

## Utils
## Find index (or -1)
proc findIndex(xs: seq[string], y: string): int =
  for i, x in xs:
    if x == y:
      return i
  return -1

## Get sequences
## let file_path = "../data/one_to_three"
let file_path = "../data/stripped"
proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
  var line : string
  var seqs: seq[seq[string]]
  while f.read_line(line):
    if i > 3:
      let seq = split(line, ",")
      let l = seq.len
      let nums = seq[1..(l-2)]
      seqs.add(nums)
    i = i + 1
  f.close()
  return seqs
var seqs = getOEIS()

## Sequence helpers
proc startsWithSubsequence(subseq: seq[string], xs: seq[string]): bool =
  if subseq.len == 0:
    return true
  elif xs.len == 0:
    return false
  elif subseq[0] == xs[0]:
    return startsWithSubsequence(subseq[1..<subseq.len], xs[1..<xs.len])
  else:
    return false

proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
  var continuations: seq[seq[string]]
  for seq in seqs:
    if startsWithSubsequence(start, seq):
      continuations.add(seq)
  return continuations

## Pretty print sequences
# var start = @["1", "2", "3", "4", "5"]
# var continuations = getSequencesWithStart(seqs, start)
# print continuations

proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =

  let continuations = getSequencesWithStart(seqs, observations)
  let l = observations.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
    let next = c[l]
    let i = findIndex(nexts, next)
    if i == -1:
      nexts.add(next)
      ps.add(1.0)
    else:
      ps[i] = ps[i] + 1.0
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
  sort(next_and_ps, comparePredictions)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps

## Predict continuation but without access to all oeis sequences

proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)

proc showPredictionsWithIncreasinglyManyHypotheses(seqs: seq[seq[string]], start: seq[string]) =
  echo "Showing predictions with increasingly many hypotheses after seeing ", start
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
    echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
    let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
    print predictions

## showPredictionsWithIncreasinglyManyHypotheses()

proc jitBayesLoop(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
) =
  print "## Prediction with limited number of hypotheses (~JIT-Bayes)"

  var num_hypotheses = initial_num_hypotheses
  var hypotheses = seqs[0..<num_hypotheses]

  for i in n_observations_seen..<observations.len:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
    echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions

    let correct_continuation = observations[i]
    let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
    let correct_continuation_index = findIndex(considered_continuations, correct_continuation)

    if correct_continuation_index == -1:

      var found_concordant_hypothesis = false
      var concordant_hypotheses: seq[seq[string]]

      while (not found_concordant_hypothesis) and ( num_hypotheses < seqs.len ):
        echo "Correct continuation, " , correct_continuation, " not found in set of hypotheses of size ", num_hypotheses, "/", seqs.len, ". Increasing size of the set of hypotheses."
        num_hypotheses = num_hypotheses + num_hypotheses_step
        if num_hypotheses > seqs.len:
          num_hypotheses = seqs.len
        hypotheses = seqs[0..<num_hypotheses]
        concordant_hypotheses = filter(hypotheses, proc(h: seq[string]): bool = (h.len > i) and startsWithSubsequence(observations[0..i], h))
        if concordant_hypotheses.len > 0:
          found_concordant_hypothesis = true

      if not found_concordant_hypothesis:
        echo "Increased number of hypotheses to ", num_hypotheses, ", but didn't find any hypotheses concordant with observations. Giving up."
        return
      else:
        echo "Increased number of hypotheses to ", num_hypotheses, ", and found ", concordant_hypotheses.len, " concordant hypotheses. Continuing"
        ## print concordant_hypotheses

    else:
      echo "Correct continuation was ", correct_continuation
      echo "It was assigned a probability of ", getProbability(predictions[correct_continuation_index])

## Infrabayesianism

proc miniInfraBayesArgminMaxLoss(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  utility_function: string
  ) =
  if utility_function != "logloss":
    echo "miniInfraBayes function only programmed for the logloss utility function"
    return
  else:
    echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment."

  var losses: seq[float]
  for i in n_observations_seen..<observations.len:
    let predictions = predictContinuation(seqs, observations[0..<i]) ## See the README for why this ends up being equivalent.
    echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
    let correct_continuation = observations[i]
    let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
    let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
    let p_correct_continuation = getProbability(predictions[correct_continuation_index])
    let new_loss = ln(p_correct_continuation)
    losses.add(new_loss)

    echo "Correct continuation was ", correct_continuation
    echo "It was assigned a probability of ", p_correct_continuation
    echo "And hence a loss of ", new_loss
    echo "Total loss is: ", foldl(losses, a + b, 0.0)

proc getEvens(xs: seq[string]): seq[string] =
  var evens: seq[string]
  for i,x in xs:
    if i mod 2 == 0:
      evens.add(x)
  return evens

proc getOdds(xs: seq[string]): seq[string] =
  var odds: seq[string]
  for i,x in xs:
    if i mod 2 == 1:
      odds.add(x)
  return odds

proc interleave(xs: seq[string], ys: seq[string]): seq[string] =
  if xs.len != ys.len:
    echo "Interleaved sequences have to have the same length; returning empty sequence."
    return @[]
  else:
    var zs: seq[string]
    for i in 0..<xs.len:
      zs.add(xs[i])
      zs.add(ys[i])
    return zs

proc miniInfraBayesArgminMaxLossInterleavedHypotheses(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  utility_function: string
  ) =
  if utility_function != "logloss":
    echo "miniInfraBayes function only programmed for the logloss utility function"
    return
  else:
    echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment. This time with a twist: You don't have hypotheses over the sequences you observe, but rather over their odd and even position, i.e., you think that you observe interleaved OEIS sequences, (a1, b1, a2, b2, a3, b3). See the README.md for more."

  var losses: seq[float]
  for i in n_observations_seen..<observations.len:
    var parity_subsequence: seq[string]
    if i mod 2 == 0:
      parity_subsequence = getEvens(observations[0..<i])
    else:
      parity_subsequence = getOdds(observations[0..<i])
    let predictions = predictContinuation(seqs, parity_subsequence)
    echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
    let correct_continuation = observations[i]
    let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
    let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
    let p_correct_continuation = getProbability(predictions[correct_continuation_index])
    let new_loss = ln(p_correct_continuation)
    losses.add(new_loss)

    echo "Correct continuation was ", correct_continuation
    echo "It was assigned a probability of ", p_correct_continuation
    echo "And hence a loss of ", new_loss
    echo "Total loss is: ", foldl(losses, a + b, 0.0)

## Display outputs
echo ""

## var observations = @["1", "2", "3", "4", "5", "6"]

echo "## Full prediction with access to all hypotheses (~Solomonoff)"
var observations = @["1", "2", "3"]
echo "## Initial sequence: ", observations
let continuation_probabilities = predictContinuation(seqs, observations)
print continuation_probabilities
echo ""

echo "## Predictions with increasingly many hypotheses"
observations = @["1", "2", "3", "23"]
showPredictionsWithIncreasinglyManyHypotheses(seqs, observations)
echo ""

observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
jitBayesLoop(seqs, observations, 3, 1_000, 30_000)
echo ""

observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
miniInfraBayesArgminMaxLoss(seqs, observations, 3, "logloss")
echo ""

observations = interleave(@["1", "2", "3", "23", "11", "18", "77", "46", "84"], @["2", "11", "13", "23", "47", "59", "71", "83", "107"])
miniInfraBayesArgminMaxLossInterleavedHypotheses(seqs, observations, 6, "logloss")
echo ""