tweak: small refactor; add a type for prediction

2023-05-24 15:16:50 -07:00 · 2023-05-24 15:16:50 -07:00 · c2e2836df5
commit c2e2836df5
parent d4581605e0
2 changed files with 53 additions and 33 deletions
--- a/src/jit_bayes
+++ b/src/jit_bayes
--- a/src/jit_bayes.nim
+++ b/src/jit_bayes.nim
@ -4,9 +4,38 @@ import sequtils
 import std/sugar
 import std/algorithm
 ## Define type
 type prediction = (string, float)
 # string represents a hypothesis, 
 # prediction represents the predictionability mass
 ## Utils
 ## Find index (or -1)
 proc findIndex(xs: seq[string], y: string): int =
  for i, x in xs:
    if x == y:
      return i
  return -1
 ## Do simple predictions
 proc comparePredictions (x: prediction, y: prediction): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0
 proc getProbability (t: prediction): float =
  let (_, p) = t
  return p
 proc getHypothesis (t: prediction): string =
  let (h, _) = t
  return h
 ## Get sequences
 ## let file_path = "../data/stripped"
 let file_path = "../data/one_to_three"
 ## let file_path = "../data/stripped"
 proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
@ -47,25 +76,9 @@ proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[
 # var continuations = getSequencesWithStart(seqs, start)
 # print continuations
-## Find index (or -1)
+proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
-
+  let continuations = getSequencesWithStart(seqs, observations)
-proc findIndex(xs: seq[string], y: string): int =
+  let l = observations.len
  for i, x in xs:
    if x == y:
      return i
  return -1
 ## Do simple predictions
 proc compareTuple (x: (string, float), y: (string, float)): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0
 proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(string, float)] =
  let continuations = getSequencesWithStart(seqs, start)
  let l = start.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
@ -79,26 +92,19 @@ proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(strin
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
-  # next_and_ps = sort(next_and_ps, compareTuple)
+  sort(next_and_ps, comparePredictions)
  sort(next_and_ps, compareTuple)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps
 var start = @["1", "2", "3", "4", "5", "6"]
 echo "Initial sequence", start
 print "Full prediction with access to all hypotheses:"
 let continuation_probabilities = predictContinuation(seqs, start)
 print continuation_probabilities
 ## Predict continuation but without access to all oeis sequences
-proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[(string, float)] =
+proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)
-proc showPredictionsWithMoreHypotheses() =
+proc showPredictionsWithMoreHypotheses(seqs: seq[seq[string]], start: seq[string]) =
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
@ -110,16 +116,30 @@ proc showPredictionsWithMoreHypotheses() =
 proc jitBayesLoop(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
  observations: seq[string]
 ) =
  let l = observations.len
  var hypotheses = seqs[0..initial_num_hypotheses]
-  for i in 1..<l:
+  for i in n_observations_seen..<l:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
    echo "Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
    ## let possible_continuations = predictions.map()
 ## Display outputs
 var observations = @["1", "2", "3", "4", "5", "6"]
 echo "Initial sequence", observations
 print "Full prediction with access to all hypotheses (~Solomonoff)"
 let continuation_probabilities = predictContinuation(seqs, observations)
 print continuation_probabilities
 print "Prediction with limited number of hypotheses (~JIT-Bayes)"
 jitBayesLoop(seqs, observations, 3, 1_000, 1_000)
 jitBayesLoop()