compute-constrained-bayes/src/jit_bayes.nim
NunoSempere 8ec8f1eda9 tweak: use only subset of all oeis sequences
full set is too expensive to process,
making it too expensive.
2023-05-24 14:21:48 -07:00

108 lines
3.0 KiB
Nim

import print
import strutils
import sequtils
import std/sugar
import std/algorithm
## Get sequences
## let file_path = "../data/stripped"
let file_path = "../data/one_to_three"
proc getOEIS(): seq[seq[string]] =
let f = open(file_path)
var i = 0
var line : string
var seqs: seq[seq[string]]
while f.read_line(line):
if i > 3:
let seq = split(line, ",")
let l = seq.len
let nums = seq[1..(l-2)]
seqs.add(nums)
i = i + 1
f.close()
return seqs
var seqs = getOEIS()
## Sequence helpers
proc startsWithSubsequence(xs: seq[string], ys: seq[string]): bool =
if xs.len == 0:
return true
elif ys.len == 0:
return false
elif xs[0] == ys[0]:
return startsWithSubsequence(xs[1..<xs.len], ys[1..<ys.len])
else:
return false
proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
var continuations: seq[seq[string]]
for seq in seqs:
if startsWithSubsequence(start, seq):
continuations.add(seq)
return continuations
## Pretty print sequences
# var start = @["1", "2", "3", "4", "5"]
# var continuations = getSequencesWithStart(seqs, start)
# print continuations
## Find index (or -1)
proc findIndex(xs: seq[string], y: string): int =
for i, x in xs:
if x == y:
return i
return -1
## Do simple predictions
proc compareTuple (x: (string, float), y: (string, float)): int =
let (_, p1) = x
let (_, p2) = y
if p1 < p2: return 1
elif p1 > p2: return -1
else: return 0
proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(string, float)] =
let continuations = getSequencesWithStart(seqs, start)
let l = start.len
var nexts: seq[string]
var ps: seq[float]
for c in continuations:
let next = c[l]
let i = findIndex(nexts, next)
if i == -1:
nexts.add(next)
ps.add(1.0)
else:
ps[i] = ps[i] + 1.0
let sum = foldl(ps, a + b, 0.0)
ps = ps.map( p => p/sum)
var next_and_ps = zip(nexts, ps)
# next_and_ps = sort(next_and_ps, compareTuple)
sort(next_and_ps, compareTuple)
# ^ sorts in place
# also, openArray refers to both arrays and sequences.
return next_and_ps
var start = @["1", "2", "3", "4", "5", "6"]
echo "Initial sequence", start
print "Full prediction with access to all hypotheses:"
let continuation_probabilities = predictContinuation(seqs, start)
print continuation_probabilities
## Predict continuation but without access to all oeis sequences
proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[(string, float)] =
let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
let truncated_seqs = seqs[0..<n]
return predictContinuation(truncated_seqs, start)
let l = seqs.len
for i in 1..10:
let n = (l.float * (i.float/10.0)).int
echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
print predictions