Compare commits
10 Commits
d4581605e0
...
824593accc
Author | SHA1 | Date | |
---|---|---|---|
824593accc | |||
32e8a32e4c | |||
98d982066c | |||
7dbd689548 | |||
34fb95ff37 | |||
73372ad2ab | |||
90739ce4a8 | |||
218df1179f | |||
d70eac48f0 | |||
c2e2836df5 |
23
index.md
23
index.md
|
@ -1,14 +1,11 @@
|
|||
## Dependencies
|
||||
|
||||
nimble install https://github.com/nim-lang/bigints
|
||||
https://nimdocs.com/nim-lang/bigints/bigints.html
|
||||
nimble install print
|
||||
https://github.com/treeform/print
|
||||
|
||||
## Dependencies
|
||||
|
||||
The data folder is not included, but its contents are:
|
||||
|
||||
.
|
||||
├── data
|
||||
│ ├── stripped
|
||||
|
@ -16,9 +13,13 @@ The data folder is not included, but its contents are:
|
|||
|
||||
Where stripped.gz can be found at <https://oeis.org/wiki/JSON_Format,_Compressed_Files>
|
||||
|
||||
## Gotchas
|
||||
|
||||
Nimble doesn't play well with relative directories. Ignore and use make instead?
|
||||
|
||||
## To do
|
||||
|
||||
- [ ] Exploration of OEIS data
|
||||
- [x] Exploration of OEIS data
|
||||
- [ ] Subdivide subsequent tasks into steps
|
||||
- [x] Simple prediction of the next integer
|
||||
- [x] Simple predictions v1
|
||||
|
@ -26,11 +27,15 @@ Where stripped.gz can be found at <https://oeis.org/wiki/JSON_Format,_Compressed
|
|||
- [-] Maybe add some caching, e.g., write continuations to file, and read them next time.
|
||||
- [ ] JIT Bayesianism:
|
||||
- [x] Function to predict with a variable number of hypotheses
|
||||
- [ ] Function to start predicting with a small number of hypotheses, and get more if the initial ones aren't enough.
|
||||
- [ ] Add the loop of: start with some small number of sequences, and if these aren't enough, read more.
|
||||
- [ ] ...
|
||||
- [ ] Infrabayesianism x1: Predicting interleaved sequences
|
||||
- [ ] Infrabayesianism x2: Deterministic game of producing a fixed deterministic prediction, and then the adversary picking whatever minimizes your loss
|
||||
- [x] Function to start predicting with a small number of hypotheses, and get more if the initial ones aren't enough.
|
||||
- [x] Add the loop of: start with some small number of sequences, and if these aren't enough, read more.
|
||||
- [x] Clean-up
|
||||
- [ ] Infrabayesianism
|
||||
- [ ] Infrabayesianism x1: Predicting interleaved sequences.
|
||||
- Yeah, actually, I think this just captures an implicit assumption of Bayesianism as actually practiced.
|
||||
- [ ] Infrabayesianism x2: Deterministic game of producing a fixed deterministic prediction, and then the adversary picking whatever minimizes your loss
|
||||
- I am actually not sure of what the procedure is exactly for computing that loss. Do you minimize over subsequent rounds of the game, or only for the first round? Look this up.
|
||||
- Also maybe ask for help from e.g., Alex Mennen.
|
||||
- [ ] Write the actor
|
||||
|
||||
---
|
||||
|
|
BIN
src/compute_constrained_bayes
Executable file
BIN
src/compute_constrained_bayes
Executable file
Binary file not shown.
224
src/compute_constrained_bayes.nim
Normal file
224
src/compute_constrained_bayes.nim
Normal file
|
@ -0,0 +1,224 @@
|
|||
import print
|
||||
import strutils
|
||||
import sequtils
|
||||
import std/math
|
||||
import std/sugar
|
||||
import std/algorithm
|
||||
|
||||
## Prediction type & helpers.
|
||||
type prediction = (string, float)
|
||||
# string represents a hypothesis,
|
||||
# prediction represents the predictionability mass
|
||||
proc comparePredictions (x: prediction, y: prediction): int =
|
||||
let (_, p1) = x
|
||||
let (_, p2) = y
|
||||
if p1 < p2: return 1
|
||||
elif p1 > p2: return -1
|
||||
else: return 0
|
||||
|
||||
proc getProbability (t: prediction): float =
|
||||
let (_, p) = t
|
||||
return p
|
||||
|
||||
proc getHypothesis (t: prediction): string =
|
||||
let (h, _) = t
|
||||
return h
|
||||
|
||||
## Utils
|
||||
## Find index (or -1)
|
||||
proc findIndex(xs: seq[string], y: string): int =
|
||||
for i, x in xs:
|
||||
if x == y:
|
||||
return i
|
||||
return -1
|
||||
|
||||
## Get sequences
|
||||
## let file_path = "../data/one_to_three"
|
||||
let file_path = "../data/stripped"
|
||||
proc getOEIS(): seq[seq[string]] =
|
||||
let f = open(file_path)
|
||||
var i = 0
|
||||
var line : string
|
||||
var seqs: seq[seq[string]]
|
||||
while f.read_line(line):
|
||||
if i > 3:
|
||||
let seq = split(line, ",")
|
||||
let l = seq.len
|
||||
let nums = seq[1..(l-2)]
|
||||
seqs.add(nums)
|
||||
i = i + 1
|
||||
f.close()
|
||||
return seqs
|
||||
var seqs = getOEIS()
|
||||
|
||||
## Sequence helpers
|
||||
proc startsWithSubsequence(subseq: seq[string], xs: seq[string]): bool =
|
||||
if subseq.len == 0:
|
||||
return true
|
||||
elif xs.len == 0:
|
||||
return false
|
||||
elif subseq[0] == xs[0]:
|
||||
return startsWithSubsequence(subseq[1..<subseq.len], xs[1..<xs.len])
|
||||
else:
|
||||
return false
|
||||
|
||||
proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
|
||||
var continuations: seq[seq[string]]
|
||||
for seq in seqs:
|
||||
if startsWithSubsequence(start, seq):
|
||||
continuations.add(seq)
|
||||
return continuations
|
||||
|
||||
## Pretty print sequences
|
||||
# var start = @["1", "2", "3", "4", "5"]
|
||||
# var continuations = getSequencesWithStart(seqs, start)
|
||||
# print continuations
|
||||
|
||||
proc predictContinuation(seqs: seq[seq[string]], observations: seq[string]): seq[prediction] =
|
||||
|
||||
let continuations = getSequencesWithStart(seqs, observations)
|
||||
let l = observations.len
|
||||
var nexts: seq[string]
|
||||
var ps: seq[float]
|
||||
for c in continuations:
|
||||
let next = c[l]
|
||||
let i = findIndex(nexts, next)
|
||||
if i == -1:
|
||||
nexts.add(next)
|
||||
ps.add(1.0)
|
||||
else:
|
||||
ps[i] = ps[i] + 1.0
|
||||
let sum = foldl(ps, a + b, 0.0)
|
||||
ps = ps.map( p => p/sum)
|
||||
var next_and_ps = zip(nexts, ps)
|
||||
sort(next_and_ps, comparePredictions)
|
||||
# ^ sorts in place
|
||||
# also, openArray refers to both arrays and sequences.
|
||||
return next_and_ps
|
||||
|
||||
## Predict continuation but without access to all oeis sequences
|
||||
|
||||
proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[prediction] =
|
||||
let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
|
||||
let truncated_seqs = seqs[0..<n]
|
||||
return predictContinuation(truncated_seqs, start)
|
||||
|
||||
proc showPredictionsWithIncreasinglyManyHypotheses(seqs: seq[seq[string]], start: seq[string]) =
|
||||
echo "Showing predictions with increasingly many hypotheses after seeing ", start
|
||||
let l = seqs.len
|
||||
for i in 1..10:
|
||||
let n = (l.float * (i.float/10.0)).int
|
||||
echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
|
||||
let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
|
||||
print predictions
|
||||
|
||||
## showPredictionsWithIncreasinglyManyHypotheses()
|
||||
|
||||
proc jitBayesLoop(
|
||||
seqs: seq[seq[string]],
|
||||
observations: seq[string],
|
||||
n_observations_seen: int,
|
||||
initial_num_hypotheses: int,
|
||||
num_hypotheses_step: int,
|
||||
) =
|
||||
print "## Prediction with limited number of hypotheses (~JIT-Bayes)"
|
||||
|
||||
var num_hypotheses = initial_num_hypotheses
|
||||
var hypotheses = seqs[0..<num_hypotheses]
|
||||
|
||||
for i in n_observations_seen..<observations.len:
|
||||
let predictions = predictContinuation(hypotheses, observations[0..<i])
|
||||
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
|
||||
print predictions
|
||||
|
||||
let correct_continuation = observations[i]
|
||||
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
|
||||
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
|
||||
|
||||
if correct_continuation_index == -1:
|
||||
|
||||
var found_concordant_hypothesis = false
|
||||
var concordant_hypotheses: seq[seq[string]]
|
||||
|
||||
while (not found_concordant_hypothesis) and ( num_hypotheses < seqs.len ):
|
||||
echo "Correct continuation, " , correct_continuation, " not found in set of hypotheses of size ", num_hypotheses, "/", seqs.len, ". Increasing size of the set of hypotheses."
|
||||
num_hypotheses = num_hypotheses + num_hypotheses_step
|
||||
if num_hypotheses > seqs.len:
|
||||
num_hypotheses = seqs.len
|
||||
hypotheses = seqs[0..<num_hypotheses]
|
||||
concordant_hypotheses = filter(hypotheses, proc(h: seq[string]): bool = (h.len > i) and startsWithSubsequence(observations[0..i], h))
|
||||
if concordant_hypotheses.len > 0:
|
||||
found_concordant_hypothesis = true
|
||||
|
||||
if not found_concordant_hypothesis:
|
||||
echo "Increased number of hypotheses to ", num_hypotheses, ", but didn't find any hypotheses concordant with observations. Giving up."
|
||||
return
|
||||
else:
|
||||
echo "Increased number of hypotheses to ", num_hypotheses, ", and found ", concordant_hypotheses.len, " concordant hypotheses. Continuing"
|
||||
## print concordant_hypotheses
|
||||
|
||||
else:
|
||||
echo "Correct continuation was ", correct_continuation
|
||||
echo "It was assigned a probability of ", getProbability(predictions[correct_continuation_index])
|
||||
|
||||
## Infrabayesianism
|
||||
|
||||
proc miniInfraBayes(
|
||||
seqs: seq[seq[string]],
|
||||
observations: seq[string],
|
||||
n_observations_seen: int,
|
||||
utility_function: string
|
||||
) =
|
||||
if utility_function != "logloss":
|
||||
echo "miniInfraBayes function only programmed for the logloss utility function"
|
||||
return
|
||||
else:
|
||||
echo "## Mini-infra-bayesianism over environments, where your utility in an environment is just the log-loss in the predictions you make until you become certain that you are in that environment."
|
||||
|
||||
var losses: seq[float]
|
||||
for i in n_observations_seen..<observations.len:
|
||||
let predictions = predictContinuation(seqs, observations[0..<i]) ## See the README for why this ends up being equivalent.
|
||||
echo "### Prediction after seeing ", i, " observations: ", observations[0..<i]
|
||||
print predictions
|
||||
let correct_continuation = observations[i]
|
||||
let considered_continuations = predictions.map(prediction => getHypothesis(prediction))
|
||||
let correct_continuation_index = findIndex(considered_continuations, correct_continuation)
|
||||
let p_correct_continuation = getProbability(predictions[correct_continuation_index])
|
||||
let new_loss = ln(p_correct_continuation)
|
||||
losses.add(new_loss)
|
||||
|
||||
echo "Correct continuation was ", correct_continuation
|
||||
echo "It was assigned a probability of ", p_correct_continuation
|
||||
echo "And hence a loss of ", new_loss
|
||||
echo "Total loss is: ", foldl(losses, a + b, 0.0)
|
||||
|
||||
|
||||
## Infrabayesianism. Part 1: Have hypotheses over just part of the world.
|
||||
|
||||
|
||||
## Infrabayesianism. Part 2: Take the infimum over the possible loss.
|
||||
|
||||
## Display outputs
|
||||
echo ""
|
||||
|
||||
## var observations = @["1", "2", "3", "4", "5", "6"]
|
||||
|
||||
echo "## Full prediction with access to all hypotheses (~Solomonoff)"
|
||||
var observations = @["1", "2", "3"]
|
||||
echo "## Initial sequence: ", observations
|
||||
let continuation_probabilities = predictContinuation(seqs, observations)
|
||||
print continuation_probabilities
|
||||
echo ""
|
||||
|
||||
echo "## Predictions with increasingly many hypotheses"
|
||||
observations = @["1", "2", "3", "23"]
|
||||
showPredictionsWithIncreasinglyManyHypotheses(seqs, observations)
|
||||
echo ""
|
||||
|
||||
observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
|
||||
jitBayesLoop(seqs, observations, 3, 1_000, 30_000)
|
||||
echo ""
|
||||
|
||||
observations = @["1", "2", "3", "23", "11", "18", "77", "46", "84"]
|
||||
miniInfraBayes(seqs, observations, 3, "logloss")
|
||||
echo ""
|
BIN
src/jit_bayes
BIN
src/jit_bayes
Binary file not shown.
|
@ -1,125 +0,0 @@
|
|||
import print
|
||||
import strutils
|
||||
import sequtils
|
||||
import std/sugar
|
||||
import std/algorithm
|
||||
|
||||
## Get sequences
|
||||
## let file_path = "../data/stripped"
|
||||
let file_path = "../data/one_to_three"
|
||||
proc getOEIS(): seq[seq[string]] =
|
||||
let f = open(file_path)
|
||||
var i = 0
|
||||
var line : string
|
||||
var seqs: seq[seq[string]]
|
||||
while f.read_line(line):
|
||||
if i > 3:
|
||||
let seq = split(line, ",")
|
||||
let l = seq.len
|
||||
let nums = seq[1..(l-2)]
|
||||
seqs.add(nums)
|
||||
i = i + 1
|
||||
f.close()
|
||||
return seqs
|
||||
var seqs = getOEIS()
|
||||
|
||||
## Sequence helpers
|
||||
proc startsWithSubsequence(xs: seq[string], ys: seq[string]): bool =
|
||||
if xs.len == 0:
|
||||
return true
|
||||
elif ys.len == 0:
|
||||
return false
|
||||
elif xs[0] == ys[0]:
|
||||
return startsWithSubsequence(xs[1..<xs.len], ys[1..<ys.len])
|
||||
else:
|
||||
return false
|
||||
|
||||
proc getSequencesWithStart(seqs: seq[seq[string]], start: seq[string]): seq[seq[string]] =
|
||||
var continuations: seq[seq[string]]
|
||||
for seq in seqs:
|
||||
if startsWithSubsequence(start, seq):
|
||||
continuations.add(seq)
|
||||
return continuations
|
||||
|
||||
## Pretty print sequences
|
||||
|
||||
# var start = @["1", "2", "3", "4", "5"]
|
||||
# var continuations = getSequencesWithStart(seqs, start)
|
||||
# print continuations
|
||||
|
||||
## Find index (or -1)
|
||||
|
||||
proc findIndex(xs: seq[string], y: string): int =
|
||||
for i, x in xs:
|
||||
if x == y:
|
||||
return i
|
||||
return -1
|
||||
|
||||
## Do simple predictions
|
||||
proc compareTuple (x: (string, float), y: (string, float)): int =
|
||||
let (_, p1) = x
|
||||
let (_, p2) = y
|
||||
if p1 < p2: return 1
|
||||
elif p1 > p2: return -1
|
||||
else: return 0
|
||||
|
||||
proc predictContinuation(seqs: seq[seq[string]], start: seq[string]): seq[(string, float)] =
|
||||
let continuations = getSequencesWithStart(seqs, start)
|
||||
let l = start.len
|
||||
var nexts: seq[string]
|
||||
var ps: seq[float]
|
||||
for c in continuations:
|
||||
let next = c[l]
|
||||
let i = findIndex(nexts, next)
|
||||
if i == -1:
|
||||
nexts.add(next)
|
||||
ps.add(1.0)
|
||||
else:
|
||||
ps[i] = ps[i] + 1.0
|
||||
let sum = foldl(ps, a + b, 0.0)
|
||||
ps = ps.map( p => p/sum)
|
||||
var next_and_ps = zip(nexts, ps)
|
||||
# next_and_ps = sort(next_and_ps, compareTuple)
|
||||
sort(next_and_ps, compareTuple)
|
||||
# ^ sorts in place
|
||||
# also, openArray refers to both arrays and sequences.
|
||||
return next_and_ps
|
||||
|
||||
var start = @["1", "2", "3", "4", "5", "6"]
|
||||
echo "Initial sequence", start
|
||||
print "Full prediction with access to all hypotheses:"
|
||||
let continuation_probabilities = predictContinuation(seqs, start)
|
||||
print continuation_probabilities
|
||||
|
||||
## Predict continuation but without access to all oeis sequences
|
||||
|
||||
proc predictContinuationWithTruncatedHypotheses(seqs: seq[seq[string]], start: seq[string], num_hypotheses: int): seq[(string, float)] =
|
||||
let n = if num_hypotheses < seqs.len: num_hypotheses else: seqs.len
|
||||
let truncated_seqs = seqs[0..<n]
|
||||
return predictContinuation(truncated_seqs, start)
|
||||
|
||||
proc showPredictionsWithMoreHypotheses() =
|
||||
let l = seqs.len
|
||||
for i in 1..10:
|
||||
let n = (l.float * (i.float/10.0)).int
|
||||
echo "Predictions with ", (100.0 * i.float/10.0).int, "% of the hypotheses"
|
||||
let predictions = predictContinuationWithTruncatedHypotheses(seqs, start, n)
|
||||
print predictions
|
||||
|
||||
## showPredictionsWithMoreHypotheses()
|
||||
|
||||
proc jitBayesLoop(
|
||||
seqs: seq[seq[string]],
|
||||
initial_num_hypotheses: int,
|
||||
num_hypotheses_step: int,
|
||||
observations: seq[string]
|
||||
) =
|
||||
let l = observations.len
|
||||
|
||||
var hypotheses = seqs[0..initial_num_hypotheses]
|
||||
for i in 1..<l:
|
||||
let predictions = predictContinuation(hypotheses, observations[0..<i])
|
||||
print predictions
|
||||
|
||||
jitBayesLoop()
|
||||
|
34
src/makefile
34
src/makefile
|
@ -1,16 +1,34 @@
|
|||
## make
|
||||
## make = make build
|
||||
## make deps
|
||||
## make build
|
||||
## make fast ## < recommended for users
|
||||
## make run
|
||||
## make examine
|
||||
## make time
|
||||
|
||||
SHELL := /bin/bash ## <= required to use time
|
||||
VERBOSE=--verbosity:0
|
||||
|
||||
build: jit_bayes.nim
|
||||
nim c $(VERBOSE) jit_bayes.nim
|
||||
build: compute_constrained_bayes.nim
|
||||
nim c $(VERBOSE) compute_constrained_bayes.nim
|
||||
|
||||
run: jit_bayes
|
||||
./jit_bayes $(VERBOSE)
|
||||
fast:
|
||||
nim c $(VERBOSE) -d:danger compute_constrained_bayes.nim && echo
|
||||
|
||||
examine: jit_bayes
|
||||
nim c $(VERBOSE) jit_bayes.nim && time ./jit_bayes $(VERBOSE) && echo
|
||||
nim c $(VERBOSE) -d:release jit_bayes.nim && time ./jit_bayes $(VERBOSE) && echo
|
||||
nim c $(VERBOSE) -d:danger jit_bayes.nim && time ./jit_bayes $(VERBOSE)
|
||||
deps:
|
||||
nimble install print@1.0.2
|
||||
nimble install https://github.com/CosmicToast/pipe ## backup at github.com/NunoSempere/nim-pipe
|
||||
gzip -d ../data/stripped.gz -c > ../data/stripped
|
||||
|
||||
run: compute_constrained_bayes
|
||||
./compute_constrained_bayes $(VERBOSE)
|
||||
|
||||
## Misc:
|
||||
examine: compute_constrained_bayes
|
||||
nim c $(VERBOSE) compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE) && echo
|
||||
nim c $(VERBOSE) -d:release compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE) && echo
|
||||
nim c $(VERBOSE) -d:danger compute_constrained_bayes.nim && time ./compute_constrained_bayes $(VERBOSE)
|
||||
|
||||
time:
|
||||
time make && echo && time make run
|
||||
|
|
Loading…
Reference in New Issue
Block a user