tweak: small refactor; add a type for prediction

2023-05-24 15:16:50 -07:00 · 2023-05-24 15:16:50 -07:00 · c2e2836df5
commit c2e2836df5
parent d4581605e0
2 changed files with 53 additions and 33 deletions
--- a/src/jit_bayes
+++ b/src/jit_bayes
--- a/src/jit_bayes.nim
+++ b/src/jit_bayes.nim
@ -4,9 +4,38 @@ import sequtils
 import std/sugar
 import std/algorithm

+## Define type
+type prediction = (string, float)
+# string represents a hypothesis, 
+# prediction represents the predictionability mass
+
+## Utils
+## Find index (or -1)
+proc findIndex(xs: seq[string], y: string): int =
+  for i, x in xs:
+    if x == y:
+      return i
+  return -1
+
+## Do simple predictions
+proc comparePredictions (x: prediction, y: prediction): int =
+  let (_, p1) = x
+  let (_, p2) = y
+  if p1 < p2: return 1
+  elif p1 > p2: return -1
+  else: return 0
+
+proc getProbability (t: prediction): float =
+  let (_, p) = t
+  return p
+
+proc getHypothesis (t: prediction): string =
+  let (h, _) = t
+  return h
+
 ## Get sequences
-## let file_path = "../data/stripped"
 let file_path = "../data/one_to_three"
+## let file_path = "../data/stripped"
 proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
@ -47,25 +76,9 @@ proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[
 # var continuations = getSequencesWithStart(seqs, start)
 # print continuations

-## Find index (or -1)
-
-proc findIndex(xs: seq[string], y: string): int =
-  for i, x in xs:
-    if x == y:
-      return i
-  return -1
-
-## Do simple predictions
-proc compareTuple (x: (string, float), y: (string, float)): int =
-  let (_, p1) = x
-  let (_, p2) = y
-  if p1 < p2: return 1
-  elif p1 > p2: return -1
-  else: return 0
-
-proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(string, float)] =
-  let continuations = getSequencesWithStart(seqs, start)
-  let l = start.len
+proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
+  let continuations = getSequencesWithStart(seqs, observations)
+  let l = observations.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
@ -79,26 +92,19 @@ proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(strin
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
-  # next_and_ps = sort(next_and_ps, compareTuple)
-  sort(next_and_ps, compareTuple)
+  sort(next_and_ps, comparePredictions)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps

-var start = @["1", "2", "3", "4", "5", "6"]
-echo "Initial sequence", start
-print "Full prediction with access to all hypotheses:"
-let continuation_probabilities = predictContinuation(seqs, start)
-print continuation_probabilities
-
 ## Predict continuation but without access to all oeis sequences

-proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[(string, float)] =
+proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)

-proc showPredictionsWithMoreHypotheses() =
+proc showPredictionsWithMoreHypotheses(seqs: seq[seq[string]], start: seq[string]) =
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
@ -110,16 +116,30 @@ proc showPredictionsWithMoreHypotheses() =

 proc jitBayesLoop(
  seqs: seq[seq[string]],
+  observations: seq[string],
+  n_observations_seen: int,
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
-  observations: seq[string]
 ) =
  let l = observations.len
  
  var hypotheses = seqs[0..initial_num_hypotheses]
-  for i in 1..<l:
+  for i in n_observations_seen..<l:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
+    echo "Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
+    
+    ## let possible_continuations = predictions.map()
+
+## Display outputs
+var observations = @["1", "2", "3", "4", "5", "6"]
+echo "Initial sequence", observations
+
+print "Full prediction with access to all hypotheses (~Solomonoff)"
+let continuation_probabilities = predictContinuation(seqs, observations)
+print continuation_probabilities
+
+print "Prediction with limited number of hypotheses (~JIT-Bayes)"
+jitBayesLoop(seqs, observations, 3, 1_000, 1_000)

-jitBayesLoop()