compute-constrained-bayes/src/jit_bayes.nim

import print
import strutils
import sequtils
import std/sugar
import std/algorithm

## Define type
type prediction = (string, float)
# string represents a hypothesis,
# prediction represents the predictionability mass

## Utils
## Find index (or -1)
proc findIndex(xs: seq[string], y: string): int =
  for i, x in xs:
    if x == y:
      return i
  return -1

## Do simple predictions
proc comparePredictions (x: prediction, y: prediction): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0

proc getProbability (t: prediction): float =
  let (_, p) = t
  return p

proc getHypothesis (t: prediction): string =
  let (h, _) = t
  return h

## Get sequences
let file_path = "../data/one_to_three"
## let file_path = "../data/stripped"
proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
  var line : string
  var seqs: seq[seq[string]]
  while f.read_line(line):
    if i > 3:
      let seq = split(line, ",")
      let l = seq.len
      let nums = seq[1..(l-2)]
      seqs.add(nums)
    i = i + 1
  f.close()
  return seqs
var seqs = getOEIS()

## Sequence helpers
proc startsWithSubsequence(xs: seq[string], ys: seq[string]): bool =
  if xs.len == 0:
    return true
  elif ys.len == 0:
    return false
  elif xs[0] == ys[0]:
    return startsWithSubsequence(xs[1..<xs.len], ys[1..<ys.len])
  else:
    return false

proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
  var continuations: seq[seq[string]]
  for seq in seqs:
    if startsWithSubsequence(start, seq):
      continuations.add(seq)
  return continuations

## Pretty print sequences

# var start = @["1", "2", "3", "4", "5"]
# var continuations = getSequencesWithStart(seqs, start)
# print continuations

proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
  let continuations = getSequencesWithStart(seqs, observations)
  let l = observations.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
    let next = c[l]
    let i = findIndex(nexts, next)
    if i == -1:
      nexts.add(next)
      ps.add(1.0)
    else:
      ps[i] = ps[i] + 1.0
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
  sort(next_and_ps, comparePredictions)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps

## Predict continuation but without access to all oeis sequences

proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)

proc showPredictionsWithMoreHypotheses(seqs: seq[seq[string]], start: seq[string]) =
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
    echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
    let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
    print predictions

## showPredictionsWithMoreHypotheses()

proc jitBayesLoop(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
) =
  let l = observations.len

  var hypotheses = seqs[0..initial_num_hypotheses]
  for i in n_observations_seen..<l:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
    echo "Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions

    ## let possible_continuations = predictions.map()

## Display outputs
var observations = @["1", "2", "3", "4", "5", "6"]
echo "Initial sequence", observations

print "Full prediction with access to all hypotheses (~Solomonoff)"
let continuation_probabilities = predictContinuation(seqs, observations)
print continuation_probabilities

print "Prediction with limited number of hypotheses (~JIT-Bayes)"
jitBayesLoop(seqs, observations, 3, 1_000, 1_000)