Added docstrings and cleanup

This commit is contained in:
Ozzie Gooen 2022-04-28 07:56:23 -04:00
parent 335d0b5d04
commit 2eec389b55
4 changed files with 36 additions and 9 deletions

View File

@ -2,7 +2,7 @@ open Jest
open TestHelpers open TestHelpers
let prepareInputs = (ar, minWeight) => let prepareInputs = (ar, minWeight) =>
E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(ar, minWeight) |> ( E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> (
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray) ((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)
) )
@ -33,14 +33,14 @@ describe("Continuous and discrete splits", () => {
let (_, discrete1) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight( let (_, discrete1) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
makeDuplicatedArray(10), makeDuplicatedArray(10),
2, ~minDiscreteWeight=2,
) )
let toArr1 = discrete1 |> E.FloatFloatMap.toArray let toArr1 = discrete1 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10) makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
let (_c, discrete2) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight( let (_c, discrete2) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
makeDuplicatedArray(500), makeDuplicatedArray(500),
2, ~minDiscreteWeight=2,
) )
let toArr2 = discrete2 |> E.FloatFloatMap.toArray let toArr2 = discrete2 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)

View File

@ -63,8 +63,11 @@ let toPointSetDist = (
(), (),
): Internals.Types.outputs => { ): Internals.Types.outputs => {
Array.fast_sort(compare, samples) Array.fast_sort(compare, samples)
let minDiscreteToKeep = max(2, E.A.length(samples) / 10); let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
let (continuousPart, discretePart) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(samples, minDiscreteToKeep) let (continuousPart, discretePart) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
samples,
~minDiscreteWeight=minDiscreteToKeep,
)
let length = samples |> E.A.length |> float_of_int let length = samples |> E.A.length |> float_of_int
let discrete: PointSetTypes.discreteShape = let discrete: PointSetTypes.discreteShape =
discretePart discretePart

View File

@ -22,3 +22,16 @@ module OpCost = {
let wildcardCost = 1000 let wildcardCost = 1000
let monteCarloCost = Environment.defaultSampleCount let monteCarloCost = Environment.defaultSampleCount
} }
module ToPointSet = {
/*
This function chooses the minimum amount of duplicate samples that need
to exist in order for this to be considered discrete. The tricky thing
is that there are some operations that create duplicate continuous samples,
so we can't guarantee that these only will occur because the fundamental
structure is meant to be discrete. I chose this heuristic because I think
it would strike a reasonable trade-off, but Im really unsure whats
best right now.
*/
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
}

View File

@ -522,7 +522,12 @@ module A = {
let makeIncrementalDown = (a, b) => let makeIncrementalDown = (a, b) =>
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int) Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
let splitContinuousAndDiscreteForDuplicates = (sortedArray: array<float>) => { /*
This function goes through a sorted array and divides it into two different clusters:
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
Samples are thought to be discrete if they have any duplicates.
*/
let _splitContinuousAndDiscreteForDuplicates = (sortedArray: array<float>) => {
let continuous: array<float> = [] let continuous: array<float> = []
let discrete = FloatFloatMap.empty() let discrete = FloatFloatMap.empty()
Belt.Array.forEachWithIndex(sortedArray, (index, element) => { Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
@ -545,11 +550,18 @@ module A = {
(continuous, discrete) (continuous, discrete)
} }
/*
This function works very similarly to splitContinuousAndDiscreteForDuplicates. The one major difference
is that you can specify a minDiscreteWeight. If the min discreet weight is 4, that would mean that
at least four elements needed from a specific value for that to be kept as discrete. This is important
because in some cases, we can expect that some common elements will be generated by regular operations.
The final continous array will be sorted.
*/
let splitContinuousAndDiscreteForMinWeight = ( let splitContinuousAndDiscreteForMinWeight = (
sortedArray: array<float>, sortedArray: array<float>,
minDiscreteWeight: int, ~minDiscreteWeight: int,
) => { ) => {
let (continuous, discrete) = splitContinuousAndDiscreteForDuplicates(sortedArray) let (continuous, discrete) = _splitContinuousAndDiscreteForDuplicates(sortedArray)
let keepFn = v => Belt.Float.toInt(v) >= minDiscreteWeight let keepFn = v => Belt.Float.toInt(v) >= minDiscreteWeight
let (discreteToKeep, discreteToIntegrate) = FloatFloatMap.partition( let (discreteToKeep, discreteToIntegrate) = FloatFloatMap.partition(
((_, v)) => keepFn(v), ((_, v)) => keepFn(v),
@ -559,7 +571,6 @@ module A = {
discreteToIntegrate->FloatFloatMap.toArray discreteToIntegrate->FloatFloatMap.toArray
|> fmap(((k, v)) => Belt.Array.makeBy(Belt.Float.toInt(v), _ => k)) |> fmap(((k, v)) => Belt.Array.makeBy(Belt.Float.toInt(v), _ => k))
|> Belt.Array.concatMany |> Belt.Array.concatMany
let newContinuous = concat(continuous, newContinousSamples) let newContinuous = concat(continuous, newContinousSamples)
newContinuous |> Array.fast_sort(floatCompare) newContinuous |> Array.fast_sort(floatCompare)
(newContinuous, discreteToKeep) (newContinuous, discreteToKeep)