compute-constrained-bayes/src/compute_constrained_bayes.nim

282 lines
10 KiB
Nim
Raw Normal View History

2023-05-23 22:29:41 +00:00
import print
2023-05-23 00:19:04 +00:00
import strutils
import sequtils
2023-05-25 01:07:58 +00:00
import std/math
2023-05-23 22:29:41 +00:00
import std/sugar
2023-05-24 04:19:05 +00:00
import std/algorithm
2023-05-23 00:19:04 +00:00
2023-05-24 22:56:08 +00:00
## Prediction type & helpers.
type prediction = (string, float)
# string represents a hypothesis,
# prediction represents the predictionability mass
proc comparePredictions (x: prediction, y: prediction): int =
let (_, p1) = x
let (_, p2) = y
if p1 < p2: return 1
elif p1 > p2: return -1
else: return 0
proc getProbability (t: prediction): float =
let (_, p) = t
return p
proc getHypothesis (t: prediction): string =
let (h, _) = t
return h
2023-05-24 22:56:08 +00:00
## Utils
## Find index (or -1)
proc findIndex(xs: seq[string], y: string): int =
for i, x in xs:
if x == y:
return i
return -1
2023-05-23 01:17:29 +00:00
## Get sequences
## let file_path = "../data/one_to_three"
let file_path = "../data/stripped"
2023-05-23 01:12:29 +00:00
proc getOEIS(): seq[seq[string]] =
let f = open(file_path)
var i = 0
var line : string
var seqs: seq[seq[string]]
while f.read_line(line):
if i > 3:
let seq = split(line, ",")
let l = seq.len
2023-05-24 21:47:34 +00:00
let nums = seq[1..(l-2)]
2023-05-23 01:12:29 +00:00
seqs.add(nums)
i = i + 1
f.close()
return seqs
var seqs = getOEIS()
2023-05-23 00:19:04 +00:00
2023-05-23 01:17:29 +00:00
## Sequence helpers
2023-05-24 23:28:31 +00:00
proc startsWithSubsequence(subseq: seq[string], xs: seq[string]): bool =
if subseq.len == 0:
return true
2023-05-24 23:28:31 +00:00
elif xs.len == 0:
return false
2023-05-24 23:28:31 +00:00
elif subseq[0] == xs[0]:
return startsWithSubsequence(subseq[1..<subseq.len], xs[1..<xs.len])
else:
return false
2023-05-24 21:47:34 +00:00
proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
2023-05-23 01:12:29 +00:00
var continuations: seq[seq[string]]
for seq in seqs:
if startsWithSubsequence(start, seq):
continuations.add(seq)
return continuations
2023-05-23 01:17:29 +00:00
## Pretty print sequences
2023-05-23 22:29:41 +00:00
# var start = @["1", "2", "3", "4", "5"]
# var continuations = getSequencesWithStart(seqs, start)
# print continuations
proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
2023-05-24 22:56:08 +00:00
let continuations = getSequencesWithStart(seqs, observations)
let l = observations.len
2023-05-23 22:59:54 +00:00
var nexts: seq[string]
var ps: seq[float]
for c in continuations:
let next = c[l]
let i = findIndex(nexts, next)
if i == -1:
nexts.add(next)
ps.add(1.0)
else:
ps[i] = ps[i] + 1.0
let sum = foldl(ps, a + b, 0.0)
2023-05-24 04:19:05 +00:00
ps = ps.map( p => p/sum)
2023-05-24 21:47:34 +00:00
var next_and_ps = zip(nexts, ps)
sort(next_and_ps, comparePredictions)
2023-05-24 04:19:05 +00:00
# ^ sorts in place
# also, openArray refers to both arrays and sequences.
return next_and_ps
2023-05-23 22:29:41 +00:00
## Predict continuation but without access to all oeis sequences
proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
2023-05-24 21:47:34 +00:00
let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
let truncated_seqs = seqs[0..<n]
return predictContinuation(truncated_seqs, start)
proc showPredictionsWithIncreasinglyManyHypotheses(seqs: seq[seq[string]], start: seq[string]) =
echo "Showing predictions with increasingly many hypotheses after seeing ", start
2023-05-24 21:47:34 +00:00
let l = seqs.len
for i in 1..10:
let n = (l.float * (i.float/10.0)).int
echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
print predictions
## showPredictionsWithIncreasinglyManyHypotheses()
2023-05-24 21:47:34 +00:00
proc jitBayesLoop(
seqs: seq[seq[string]],
observations: seq[string],
n_observations_seen: int,
initial_num_hypotheses: int,
num_hypotheses_step: int,
) =
2023-05-24 22:56:08 +00:00
print "## Prediction with limited number of hypotheses (~JIT-Bayes)"
var num_hypotheses = initial_num_hypotheses
var hypotheses = seqs[0..<num_hypotheses]
2023-05-25 01:07:58 +00:00
for i in n_observations_seen..<observations.len:
let predictions = predictContinuation(hypotheses, observations[0..<i])
2023-05-24 22:56:08 +00:00
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
print predictions
2023-05-24 22:56:08 +00:00
let correct_continuation = observations[i]
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
if correct_continuation_index == -1:
var found_concordant_hypothesis = false
var concordant_hypotheses: seq[seq[string]]
while (not found_concordant_hypothesis) and ( num_hypotheses < seqs.len ):
2023-05-24 23:28:31 +00:00
echo "Correct continuation, " , correct_continuation, " not found in set of hypotheses of size ", num_hypotheses, "/", seqs.len, ". Increasing size of the set of hypotheses."
2023-05-24 22:56:08 +00:00
num_hypotheses = num_hypotheses + num_hypotheses_step
if num_hypotheses > seqs.len:
num_hypotheses = seqs.len
hypotheses = seqs[0..<num_hypotheses]
2023-05-24 23:28:31 +00:00
concordant_hypotheses = filter(hypotheses, proc(h: seq[string]): bool = (h.len > i) and startsWithSubsequence(observations[0..i], h))
2023-05-24 22:56:08 +00:00
if concordant_hypotheses.len > 0:
found_concordant_hypothesis = true
if not found_concordant_hypothesis:
echo "Increased number of hypotheses to ", num_hypotheses, ", but didn't find any hypotheses concordant with observations. Giving up."
return
else:
echo "Increased number of hypotheses to ", num_hypotheses, ", and found ", concordant_hypotheses.len, " concordant hypotheses. Continuing"
2023-05-24 23:28:31 +00:00
## print concordant_hypotheses
2023-05-24 22:56:08 +00:00
else:
echo "Correct continuation was ", correct_continuation
echo "It was assigned a probability of ", getProbability(predictions[correct_continuation_index])
## Infrabayesianism
proc miniInfraBayesArgminMaxLoss(
seqs: seq[seq[string]],
observations: seq[string],
2023-05-25 01:07:58 +00:00
n_observations_seen: int,
utility_function: string
) =
if utility_function != "logloss":
echo "miniInfraBayes function only programmed for the logloss utility function"
return
else:
echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment."
2023-05-25 01:07:58 +00:00
var losses: seq[float]
for i in n_observations_seen..<observations.len:
let predictions = predictContinuation(seqs, observations[0..<i]) ## See the README for why this ends up being equivalent.
2023-05-25 01:07:58 +00:00
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
print predictions
let correct_continuation = observations[i]
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
let p_correct_continuation = getProbability(predictions[correct_continuation_index])
let new_loss = ln(p_correct_continuation)
losses.add(new_loss)
echo "Correct continuation was ", correct_continuation
echo "It was assigned a probability of ", p_correct_continuation
echo "And hence a loss of ", new_loss
echo "Total loss is: ", foldl(losses, a + b, 0.0)
proc getEvens(xs: seq[string]): seq[string] =
var evens: seq[string]
for i,x in xs:
if i mod 2 == 0:
evens.add(x)
return evens
proc getOdds(xs: seq[string]): seq[string] =
var odds: seq[string]
for i,x in xs:
if i mod 2 == 1:
odds.add(x)
return odds
proc interleave(xs: seq[string], ys: seq[string]): seq[string] =
if xs.len != ys.len:
echo "Interleaved sequences have to have the same length; returning empty sequence."
return @[]
else:
var zs: seq[string]
for i in 0..<xs.len:
zs.add(xs[i])
zs.add(ys[i])
return zs
proc miniInfraBayesArgminMaxLossInterleavedHypotheses(
seqs: seq[seq[string]],
observations: seq[string],
n_observations_seen: int,
utility_function: string
) =
if utility_function != "logloss":
echo "miniInfraBayes function only programmed for the logloss utility function"
return
else:
echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment. This time with a twist: You don't have hypotheses over the sequences you observe, but rather over their odd and even position, i.e., you think that you observe interleaved OEIS sequences, (a1, b1, a2, b2, a3, b3). See the README.md for more."
var losses: seq[float]
for i in n_observations_seen..<observations.len:
var parity_subsequence: seq[string]
if i mod 2 == 0:
parity_subsequence = getEvens(observations[0..<i])
else:
parity_subsequence = getOdds(observations[0..<i])
let predictions = predictContinuation(seqs, parity_subsequence)
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
print predictions
let correct_continuation = observations[i]
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
let p_correct_continuation = getProbability(predictions[correct_continuation_index])
let new_loss = ln(p_correct_continuation)
losses.add(new_loss)
echo "Correct continuation was ", correct_continuation
echo "It was assigned a probability of ", p_correct_continuation
echo "And hence a loss of ", new_loss
echo "Total loss is: ", foldl(losses, a + b, 0.0)
## Display outputs
2023-05-24 22:56:08 +00:00
echo ""
2023-05-24 22:56:08 +00:00
## var observations = @["1", "2", "3", "4", "5", "6"]
2023-05-24 23:28:31 +00:00
2023-05-24 22:56:08 +00:00
echo "## Full prediction with access to all hypotheses (~Solomonoff)"
2023-05-24 23:28:31 +00:00
var observations = @["1", "2", "3"]
2023-05-24 22:56:08 +00:00
echo "## Initial sequence: ", observations
let continuation_probabilities = predictContinuation(seqs, observations)
print continuation_probabilities
2023-05-24 22:56:08 +00:00
echo ""
echo "## Predictions with increasingly many hypotheses"
observations = @["1", "2", "3", "23"]
showPredictionsWithIncreasinglyManyHypotheses(seqs, observations)
echo ""
2023-05-24 23:28:31 +00:00
observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
jitBayesLoop(seqs, observations, 3, 1_000, 30_000)
2023-05-24 22:56:08 +00:00
echo ""
2023-05-25 01:07:58 +00:00
observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
miniInfraBayesArgminMaxLoss(seqs, observations, 3, "logloss")
echo ""
observations = interleave(@["1", "2", "3", "23", "11", "18", "77", "46", "84"], @["2", "11", "13", "23", "47", "59", "71", "83", "107"])
miniInfraBayesArgminMaxLossInterleavedHypotheses(seqs, observations, 6, "logloss")
2023-05-25 01:07:58 +00:00
echo ""