step: savepoint & README tweaks

mini-infrabayes && use full oeis after switching to use danger compilation
danger compilation really increases speed
2023-05-24 18:07:58 -07:00 · 2023-05-24 17:35:46 -07:00 · 2023-05-24 17:25:49 -07:00 · 2023-05-24 17:12:47 -07:00 · 2023-05-24 17:12:15 -07:00 · 2023-05-24 16:58:38 -07:00
6 changed files with 264 additions and 142 deletions
--- a/index.md
+++ b/index.md
@ -1,14 +1,11 @@
 ## Dependencies
 nimble install https://github.com/nim-lang/bigints
 https://nimdocs.com/nim-lang/bigints/bigints.html
 nimble install print
 https://github.com/treeform/print
 ## Dependencies
 The data folder is not included, but its contents are:
 .
 ├── data
 │   ├── stripped
@ -16,9 +13,13 @@ The data folder is not included, but its contents are:
 Where stripped.gz can be found at <https://oeis.org/wiki/JSON_Format,_Compressed_Files>
 ## Gotchas
 Nimble doesn't play well with relative directories. Ignore and use make instead?
 ## To do
- [ ] Exploration of OEIS data
+- [x] Exploration of OEIS data
 - [ ] Subdivide subsequent tasks into steps
  - [x] Simple prediction of the next integer
    - [x] Simple predictions v1
@ -26,11 +27,15 @@ Where stripped.gz can be found at <https://oeis.org/wiki/JSON_Format,_Compressed
    - [-] Maybe add some caching, e.g., write continuations to file, and read them next time.
  - [ ] JIT Bayesianism:
    - [x] Function to predict with a variable number of hypotheses
-    - [ ] Function to start predicting with a small number of hypotheses, and get more if the initial ones aren't enough. 
+    - [x] Function to start predicting with a small number of hypotheses, and get more if the initial ones aren't enough. 
-    - [ ] Add the loop of: start with some small number of sequences, and if these aren't enough, read more.
+    - [x] Add the loop of: start with some small number of sequences, and if these aren't enough, read more.
-    - [ ] ...
+    - [x] Clean-up
-  - [ ] Infrabayesianism x1: Predicting interleaved sequences
+  - [ ] Infrabayesianism
-  - [ ] Infrabayesianism x2: Deterministic game of producing a fixed deterministic prediction, and then the adversary picking whatever minimizes your loss
+      - [ ] Infrabayesianism x1: Predicting interleaved sequences.
        - Yeah, actually, I think this just captures an implicit assumption of Bayesianism as actually practiced.
      - [ ] Infrabayesianism x2: Deterministic game of producing a fixed deterministic prediction, and then the adversary picking whatever minimizes your loss
        - I am actually not sure of what the procedure is exactly for computing that loss. Do you minimize over subsequent rounds of the game, or only for the first round? Look this up.
        - Also maybe ask for help from e.g., Alex Mennen.
    - [ ] Write the actor 
 --- 
--- a/src/compute_constrained_bayes
+++ b/src/compute_constrained_bayes
--- a/src/compute_constrained_bayes.nim
+++ b/src/compute_constrained_bayes.nim
@ -0,0 +1,224 @@
 import print
 import strutils
 import sequtils
 import std/math
 import std/sugar
 import std/algorithm
 ## Prediction type & helpers.
 type prediction = (string, float)
 # string represents a hypothesis, 
 # prediction represents the predictionability mass
 proc comparePredictions (x: prediction, y: prediction): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0
 proc getProbability (t: prediction): float =
  let (_, p) = t
  return p
 proc getHypothesis (t: prediction): string =
  let (h, _) = t
  return h
 ## Utils
 ## Find index (or -1)
 proc findIndex(xs: seq[string], y: string): int =
  for i, x in xs:
    if x == y:
      return i
  return -1
 ## Get sequences
 ## let file_path = "../data/one_to_three"
 let file_path = "../data/stripped"
 proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
  var line : string
  var seqs: seq[seq[string]]
  while f.read_line(line):
    if i > 3:
      let seq = split(line, ",")
      let l = seq.len
      let nums = seq[1..(l-2)]
      seqs.add(nums)
    i = i + 1
  f.close()
  return seqs
 var seqs = getOEIS()
 ## Sequence helpers
 proc startsWithSubsequence(subseq: seq[string], xs: seq[string]): bool =
  if subseq.len == 0:
    return true
  elif xs.len == 0:
    return false
  elif subseq[0] == xs[0]:
    return startsWithSubsequence(subseq[1..<subseq.len], xs[1..<xs.len])
  else:
    return false
 proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
  var continuations: seq[seq[string]]
  for seq in seqs:
    if startsWithSubsequence(start, seq):
      continuations.add(seq)
  return continuations
 ## Pretty print sequences
 # var start = @["1", "2", "3", "4", "5"]
 # var continuations = getSequencesWithStart(seqs, start)
 # print continuations
 proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
  let continuations = getSequencesWithStart(seqs, observations)
  let l = observations.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
    let next = c[l]
    let i = findIndex(nexts, next)
    if i == -1:
      nexts.add(next)
      ps.add(1.0)
    else:
      ps[i] = ps[i] + 1.0
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
  sort(next_and_ps, comparePredictions)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps
 ## Predict continuation but without access to all oeis sequences
 proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)
 proc showPredictionsWithIncreasinglyManyHypotheses(seqs: seq[seq[string]], start: seq[string]) =
  echo "Showing predictions with increasingly many hypotheses after seeing ", start
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
    echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
    let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
    print predictions
 ## showPredictionsWithIncreasinglyManyHypotheses()
 proc jitBayesLoop(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
 ) =
  print "## Prediction with limited number of hypotheses (~JIT-Bayes)"
  var num_hypotheses = initial_num_hypotheses
  var hypotheses = seqs[0..<num_hypotheses]
  for i in n_observations_seen..<observations.len:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
    echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
    let correct_continuation = observations[i]
    let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
    let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
    if correct_continuation_index == -1:
      var found_concordant_hypothesis = false
      var concordant_hypotheses: seq[seq[string]]
      while (not found_concordant_hypothesis) and ( num_hypotheses < seqs.len ):
        echo "Correct continuation, " , correct_continuation, " not found in set of hypotheses of size ", num_hypotheses, "/", seqs.len, ". Increasing size of the set of hypotheses."
        num_hypotheses = num_hypotheses + num_hypotheses_step
        if num_hypotheses > seqs.len:
          num_hypotheses = seqs.len
        hypotheses = seqs[0..<num_hypotheses]
        concordant_hypotheses = filter(hypotheses, proc(h: seq[string]): bool = (h.len > i) and startsWithSubsequence(observations[0..i], h))
        if concordant_hypotheses.len > 0:
          found_concordant_hypothesis = true
      if not found_concordant_hypothesis:
        echo "Increased number of hypotheses to ", num_hypotheses, ", but didn't find any hypotheses concordant with observations. Giving up."
        return
      else:
        echo "Increased number of hypotheses to ", num_hypotheses, ", and found ", concordant_hypotheses.len, " concordant hypotheses. Continuing"
        ## print concordant_hypotheses
    else:
      echo "Correct continuation was ", correct_continuation
      echo "It was assigned a probability of ", getProbability(predictions[correct_continuation_index])
 ## Infrabayesianism
 proc miniInfraBayes(
  seqs: seq[seq[string]],
  observations: seq[string],
  n_observations_seen: int,
  utility_function: string
  ) =
  if utility_function != "logloss":
    echo "miniInfraBayes function only programmed for the logloss utility function"
    return
  else:
    echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment."
  var losses: seq[float]
  for i in n_observations_seen..<observations.len:
    let predictions = predictContinuation(seqs, observations[0..<i]) ## See the README for why this ends up being equivalent.
    echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
    print predictions
    let correct_continuation = observations[i]
    let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
    let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
    let p_correct_continuation = getProbability(predictions[correct_continuation_index])
    let new_loss = ln(p_correct_continuation)
    losses.add(new_loss)
    echo "Correct continuation was ", correct_continuation
    echo "It was assigned a probability of ", p_correct_continuation
    echo "And hence a loss of ", new_loss
    echo "Total loss is: ", foldl(losses, a + b, 0.0)
 ## Infrabayesianism. Part 1: Have hypotheses over just part of the world.
 ## Infrabayesianism. Part 2: Take the infimum over the possible loss.
 ## Display outputs
 echo ""
 ## var observations = @["1", "2", "3", "4", "5", "6"]
 echo "## Full prediction with access to all hypotheses (~Solomonoff)"
 var observations = @["1", "2", "3"]
 echo "## Initial sequence: ", observations
 let continuation_probabilities = predictContinuation(seqs, observations)
 print continuation_probabilities
 echo ""
 echo "## Predictions with increasingly many hypotheses"
 observations = @["1", "2", "3", "23"]
 showPredictionsWithIncreasinglyManyHypotheses(seqs, observations)
 echo "" 
 observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
 jitBayesLoop(seqs, observations, 3, 1_000, 30_000)
 echo ""
 observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
 miniInfraBayes(seqs, observations, 3, "logloss")
 echo ""
--- a/src/jit_bayes
+++ b/src/jit_bayes
--- a/src/jit_bayes.nim
+++ b/src/jit_bayes.nim
@ -1,125 +0,0 @@
 import print
 import strutils
 import sequtils
 import std/sugar
 import std/algorithm
 ## Get sequences
 ## let file_path = "../data/stripped"
 let file_path = "../data/one_to_three"
 proc getOEIS(): seq[seq[string]] =
  let f = open(file_path)
  var i = 0
  var line : string
  var seqs: seq[seq[string]]
  while f.read_line(line):
    if i > 3:
      let seq = split(line, ",")
      let l = seq.len
      let nums = seq[1..(l-2)]
      seqs.add(nums)
    i = i + 1
  f.close()
  return seqs
 var seqs = getOEIS()
 ## Sequence helpers
 proc startsWithSubsequence(xs: seq[string], ys: seq[string]): bool =
  if xs.len == 0:
    return true
  elif ys.len == 0:
    return false
  elif xs[0] == ys[0]:
    return startsWithSubsequence(xs[1..<xs.len], ys[1..<ys.len])
  else:
    return false
 proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
  var continuations: seq[seq[string]]
  for seq in seqs:
    if startsWithSubsequence(start, seq):
      continuations.add(seq)
  return continuations
 ## Pretty print sequences
 # var start = @["1", "2", "3", "4", "5"]
 # var continuations = getSequencesWithStart(seqs, start)
 # print continuations
 ## Find index (or -1)
 proc findIndex(xs: seq[string], y: string): int =
  for i, x in xs:
    if x == y:
      return i
  return -1
 ## Do simple predictions
 proc compareTuple (x: (string, float), y: (string, float)): int =
  let (_, p1) = x
  let (_, p2) = y
  if p1 < p2: return 1
  elif p1 > p2: return -1
  else: return 0
 proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(string, float)] =
  let continuations = getSequencesWithStart(seqs, start)
  let l = start.len
  var nexts: seq[string]
  var ps: seq[float]
  for c in continuations:
    let next = c[l]
    let i = findIndex(nexts, next)
    if i == -1:
      nexts.add(next)
      ps.add(1.0)
    else:
      ps[i] = ps[i] + 1.0
  let sum = foldl(ps, a + b, 0.0)
  ps = ps.map( p => p/sum)
  var next_and_ps = zip(nexts, ps)
  # next_and_ps = sort(next_and_ps, compareTuple)
  sort(next_and_ps, compareTuple)
  # ^ sorts in place
  # also, openArray refers to both arrays and sequences.
  return next_and_ps
 var start = @["1", "2", "3", "4", "5", "6"]
 echo "Initial sequence", start
 print "Full prediction with access to all hypotheses:"
 let continuation_probabilities = predictContinuation(seqs, start)
 print continuation_probabilities
 ## Predict continuation but without access to all oeis sequences
 proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[(string, float)] =
  let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
  let truncated_seqs = seqs[0..<n]
  return predictContinuation(truncated_seqs, start)
 proc showPredictionsWithMoreHypotheses() =
  let l = seqs.len
  for i in 1..10:
    let n = (l.float * (i.float/10.0)).int
    echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
    let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
    print predictions
 ## showPredictionsWithMoreHypotheses()
 proc jitBayesLoop(
  seqs: seq[seq[string]],
  initial_num_hypotheses: int,
  num_hypotheses_step: int,
  observations: seq[string]
 ) =
  let l = observations.len
  var hypotheses = seqs[0..initial_num_hypotheses]
  for i in 1..<l:
    let predictions = predictContinuation(hypotheses, observations[0..<i])
    print predictions
 jitBayesLoop()
--- a/src/makefile
+++ b/src/makefile
@ -1,16 +1,34 @@
 ## make
 ## make = make build
 ## make deps
 ## make build
 ## make fast ## < recommended for users
 ## make run
 ## make examine
 ## make time
 SHELL := /bin/bash ## <= required to use time
 VERBOSE=--verbosity:0
-build: jit_bayes.nim
+build: compute_constrained_bayes.nim
-	nim c $(VERBOSE) jit_bayes.nim
+	nim c $(VERBOSE) compute_constrained_bayes.nim
-run: jit_bayes 
+fast: 
-	./jit_bayes $(VERBOSE)
+	nim c $(VERBOSE) -d:danger compute_constrained_bayes.nim && echo
-examine: jit_bayes
+deps: 
-	nim c $(VERBOSE) jit_bayes.nim && time ./jit_bayes $(VERBOSE) && echo
+	nimble install print@1.0.2
-	nim c $(VERBOSE) -d:release jit_bayes.nim && time ./jit_bayes $(VERBOSE) && echo
+	nimble install https://github.com/CosmicToast/pipe ## backup at github.com/NunoSempere/nim-pipe
-	nim c $(VERBOSE) -d:danger jit_bayes.nim && time ./jit_bayes $(VERBOSE)
+	gzip -d ../data/stripped.gz -c > ../data/stripped
 run: compute_constrained_bayes 
 	./compute_constrained_bayes $(VERBOSE)
 ## Misc: 
 examine: compute_constrained_bayes
 	nim c $(VERBOSE) compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE) && echo
 	nim c $(VERBOSE) -d:release compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE) && echo
 	nim c $(VERBOSE) -d:danger compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE)
 time:
 	time make && echo && time make run
Author	SHA1	Message	Date
NunoSempere	824593accc	step: savepoint & README tweaks	2023-05-24 18:07:58 -07:00
NunoSempere	32e8a32e4c	mini-infrabayes && use full oeis after switching to use danger compilation danger compilation really increases speed	2023-05-24 17:35:46 -07:00
NunoSempere	98d982066c	step: use "showPredictionsWithIncreasinglyManyHypotheses" func.	2023-05-24 17:25:49 -07:00
NunoSempere	7dbd689548	tweak: delete .nimble file	2023-05-24 17:12:47 -07:00
NunoSempere	34fb95ff37	tweak: experiment with nimble file, add deps to makefile.	2023-05-24 17:12:15 -07:00
NunoSempere	73372ad2ab	mv jit_bayes => compute_constrained_bayes	2023-05-24 16:58:38 -07:00
NunoSempere	90739ce4a8	finalize jit-bayes	2023-05-24 16:28:31 -07:00
NunoSempere	218df1179f	savepoint.	2023-05-24 15:58:31 -07:00
NunoSempere	d70eac48f0	step: develop jit-bayes loop.	2023-05-24 15:56:08 -07:00
NunoSempere	c2e2836df5	tweak: small refactor; add a type for prediction	2023-05-24 15:16:50 -07:00