182 lines
5.7 KiB
Nim
182 lines
5.7 KiB
Nim
import print
|
|
import strutils
|
|
import sequtils
|
|
import std/sugar
|
|
import std/algorithm
|
|
|
|
## Prediction type & helpers.
|
|
type prediction = (string, float)
|
|
# string represents a hypothesis,
|
|
# prediction represents the predictionability mass
|
|
proc comparePredictions (x: prediction, y: prediction): int =
|
|
let (_, p1) = x
|
|
let (_, p2) = y
|
|
if p1 < p2: return 1
|
|
elif p1 > p2: return -1
|
|
else: return 0
|
|
|
|
proc getProbability (t: prediction): float =
|
|
let (_, p) = t
|
|
return p
|
|
|
|
proc getHypothesis (t: prediction): string =
|
|
let (h, _) = t
|
|
return h
|
|
|
|
## Utils
|
|
## Find index (or -1)
|
|
proc findIndex(xs: seq[string], y: string): int =
|
|
for i, x in xs:
|
|
if x == y:
|
|
return i
|
|
return -1
|
|
|
|
## Get sequences
|
|
let file_path = "../data/one_to_three"
|
|
## let file_path = "../data/stripped"
|
|
proc getOEIS(): seq[seq[string]] =
|
|
let f = open(file_path)
|
|
var i = 0
|
|
var line : string
|
|
var seqs: seq[seq[string]]
|
|
while f.read_line(line):
|
|
if i > 3:
|
|
let seq = split(line, ",")
|
|
let l = seq.len
|
|
let nums = seq[1..(l-2)]
|
|
seqs.add(nums)
|
|
i = i + 1
|
|
f.close()
|
|
return seqs
|
|
var seqs = getOEIS()
|
|
|
|
## Sequence helpers
|
|
proc startsWithSubsequence(xs: seq[string], ys: seq[string]): bool =
|
|
if xs.len == 0:
|
|
return true
|
|
elif ys.len == 0:
|
|
return false
|
|
elif xs[0] == ys[0]:
|
|
return startsWithSubsequence(xs[1..<xs.len], ys[1..<ys.len])
|
|
else:
|
|
return false
|
|
|
|
proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
|
|
var continuations: seq[seq[string]]
|
|
for seq in seqs:
|
|
if startsWithSubsequence(start, seq):
|
|
continuations.add(seq)
|
|
return continuations
|
|
|
|
## Pretty print sequences
|
|
# var start = @["1", "2", "3", "4", "5"]
|
|
# var continuations = getSequencesWithStart(seqs, start)
|
|
# print continuations
|
|
|
|
proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
|
|
|
|
let continuations = getSequencesWithStart(seqs, observations)
|
|
let l = observations.len
|
|
var nexts: seq[string]
|
|
var ps: seq[float]
|
|
for c in continuations:
|
|
let next = c[l]
|
|
let i = findIndex(nexts, next)
|
|
if i == -1:
|
|
nexts.add(next)
|
|
ps.add(1.0)
|
|
else:
|
|
ps[i] = ps[i] + 1.0
|
|
let sum = foldl(ps, a + b, 0.0)
|
|
ps = ps.map( p => p/sum)
|
|
var next_and_ps = zip(nexts, ps)
|
|
sort(next_and_ps, comparePredictions)
|
|
# ^ sorts in place
|
|
# also, openArray refers to both arrays and sequences.
|
|
return next_and_ps
|
|
|
|
## Predict continuation but without access to all oeis sequences
|
|
|
|
proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
|
|
let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
|
|
let truncated_seqs = seqs[0..<n]
|
|
return predictContinuation(truncated_seqs, start)
|
|
|
|
proc showPredictionsWithMoreHypotheses(seqs: seq[seq[string]], start: seq[string]) =
|
|
let l = seqs.len
|
|
for i in 1..10:
|
|
let n = (l.float * (i.float/10.0)).int
|
|
echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
|
|
let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
|
|
print predictions
|
|
|
|
## showPredictionsWithMoreHypotheses()
|
|
|
|
proc jitBayesLoop(
|
|
seqs: seq[seq[string]],
|
|
observations: seq[string],
|
|
n_observations_seen: int,
|
|
initial_num_hypotheses: int,
|
|
num_hypotheses_step: int,
|
|
) =
|
|
print "## Prediction with limited number of hypotheses (~JIT-Bayes)"
|
|
if n_observations_seen < 1:
|
|
echo "in jitBayesLoop function, n_observations_seen must be 1 or greater"
|
|
return
|
|
|
|
var num_hypotheses = initial_num_hypotheses
|
|
var hypotheses = seqs[0..<num_hypotheses]
|
|
|
|
let l = observations.len
|
|
for i in n_observations_seen..<l: # to do: make so that this can start at 0.
|
|
let predictions = predictContinuation(hypotheses, observations[0..<i])
|
|
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
|
|
print predictions
|
|
|
|
let correct_continuation = observations[i]
|
|
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
|
|
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
|
|
|
|
if correct_continuation_index == -1:
|
|
echo "Correct continuation not found in set of hypotheses of size ", num_hypotheses, "/", seqs.len, ". Increasing size of the set of hypotheses."
|
|
|
|
var found_concordant_hypothesis = false
|
|
var concordant_hypotheses: seq[seq[string]]
|
|
|
|
while (not found_concordant_hypothesis) and ( num_hypotheses < seqs.len ):
|
|
num_hypotheses = num_hypotheses + num_hypotheses_step
|
|
if num_hypotheses > seqs.len:
|
|
num_hypotheses = seqs.len
|
|
hypotheses = seqs[0..<num_hypotheses]
|
|
concordant_hypotheses = filter(hypotheses, proc(h: seq[string]): bool = (h.len > i) and h[i] == observations[i])
|
|
if concordant_hypotheses.len > 0:
|
|
found_concordant_hypothesis = true
|
|
|
|
if not found_concordant_hypothesis:
|
|
echo "Increased number of hypotheses to ", num_hypotheses, ", but didn't find any hypotheses concordant with observations. Giving up."
|
|
return
|
|
else:
|
|
echo "Increased number of hypotheses to ", num_hypotheses, ", and found ", concordant_hypotheses.len, " concordant hypotheses. Continuing"
|
|
|
|
else:
|
|
echo "Correct continuation was ", correct_continuation
|
|
echo "It was assigned a probability of ", getProbability(predictions[correct_continuation_index])
|
|
|
|
echo ""
|
|
|
|
## Display outputs
|
|
echo ""
|
|
|
|
## var observations = @["1", "2", "3", "4", "5", "6"]
|
|
var observations = @["1", "2", "3", "109", "5", "6"]
|
|
echo "## Full prediction with access to all hypotheses (~Solomonoff)"
|
|
echo "## Initial sequence: ", observations
|
|
let continuation_probabilities = predictContinuation(seqs, observations)
|
|
print continuation_probabilities
|
|
echo ""
|
|
|
|
jitBayesLoop(seqs, observations, 3, 1_000, 2_000)
|
|
|
|
echo ""
|
|
|