Merge pull request #409 from quantified-uncertainty/splidcontinuousDiscrete-refactor
Refactor of splitContinuousAndDiscrete to allow for customization
This commit is contained in:
commit
e1551cb1d7
|
@ -1,41 +0,0 @@
|
||||||
open Jest
|
|
||||||
open TestHelpers
|
|
||||||
|
|
||||||
describe("Continuous and discrete splits", () => {
|
|
||||||
makeTest(
|
|
||||||
"splits (1)",
|
|
||||||
SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete([1.432, 1.33455, 2.0]),
|
|
||||||
([1.432, 1.33455, 2.0], E.FloatFloatMap.empty()),
|
|
||||||
)
|
|
||||||
makeTest(
|
|
||||||
"splits (2)",
|
|
||||||
SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete([
|
|
||||||
1.432,
|
|
||||||
1.33455,
|
|
||||||
2.0,
|
|
||||||
2.0,
|
|
||||||
2.0,
|
|
||||||
2.0,
|
|
||||||
]) |> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)),
|
|
||||||
([1.432, 1.33455], [(2.0, 4.0)]),
|
|
||||||
)
|
|
||||||
|
|
||||||
let makeDuplicatedArray = count => {
|
|
||||||
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int)
|
|
||||||
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare)
|
|
||||||
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
|
|
||||||
}
|
|
||||||
|
|
||||||
let (_, discrete1) = SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete(
|
|
||||||
makeDuplicatedArray(10),
|
|
||||||
)
|
|
||||||
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
|
|
||||||
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
|
|
||||||
|
|
||||||
let (_c, discrete2) = SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete(
|
|
||||||
makeDuplicatedArray(500),
|
|
||||||
)
|
|
||||||
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
|
||||||
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
|
||||||
// makeTest("foo", [] |> Belt.Array.length, 500)
|
|
||||||
})
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
open Jest
|
||||||
|
open TestHelpers
|
||||||
|
|
||||||
|
let prepareInputs = (ar, minWeight) =>
|
||||||
|
E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> (
|
||||||
|
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)
|
||||||
|
)
|
||||||
|
|
||||||
|
describe("Continuous and discrete splits", () => {
|
||||||
|
makeTest(
|
||||||
|
"is empty, with no common elements",
|
||||||
|
prepareInputs([1.432, 1.33455, 2.0], 2),
|
||||||
|
([1.33455, 1.432, 2.0], []),
|
||||||
|
)
|
||||||
|
|
||||||
|
makeTest(
|
||||||
|
"only stores 3.5 as discrete when minWeight is 3",
|
||||||
|
prepareInputs([1.432, 1.33455, 2.0, 2.0, 3.5, 3.5, 3.5], 3),
|
||||||
|
([1.33455, 1.432, 2.0, 2.0], [(3.5, 3.0)]),
|
||||||
|
)
|
||||||
|
|
||||||
|
makeTest(
|
||||||
|
"doesn't store 3.5 as discrete when minWeight is 5",
|
||||||
|
prepareInputs([1.432, 1.33455, 2.0, 2.0, 3.5, 3.5, 3.5], 5),
|
||||||
|
([1.33455, 1.432, 2.0, 2.0, 3.5, 3.5, 3.5], []),
|
||||||
|
)
|
||||||
|
|
||||||
|
let makeDuplicatedArray = count => {
|
||||||
|
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int)
|
||||||
|
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare)
|
||||||
|
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
|
||||||
|
}
|
||||||
|
|
||||||
|
let (_, discrete1) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
|
||||||
|
makeDuplicatedArray(10),
|
||||||
|
~minDiscreteWeight=2,
|
||||||
|
)
|
||||||
|
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
|
||||||
|
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
|
||||||
|
|
||||||
|
let (_c, discrete2) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
|
||||||
|
makeDuplicatedArray(500),
|
||||||
|
~minDiscreteWeight=2,
|
||||||
|
)
|
||||||
|
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
||||||
|
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
||||||
|
// makeTest("foo", [] |> Belt.Array.length, 500)
|
||||||
|
})
|
|
@ -39,28 +39,6 @@ module Internals = {
|
||||||
module T = {
|
module T = {
|
||||||
type t = array<float>
|
type t = array<float>
|
||||||
|
|
||||||
let splitContinuousAndDiscrete = (sortedArray: t) => {
|
|
||||||
let continuous = []
|
|
||||||
let discrete = E.FloatFloatMap.empty()
|
|
||||||
Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
|
|
||||||
let maxIndex = (sortedArray |> Array.length) - 1
|
|
||||||
let possiblySimilarElements = switch index {
|
|
||||||
| 0 => [index + 1]
|
|
||||||
| n if n == maxIndex => [index - 1]
|
|
||||||
| _ => [index - 1, index + 1]
|
|
||||||
} |> Belt.Array.map(_, r => sortedArray[r])
|
|
||||||
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
|
||||||
hasSimilarElement
|
|
||||||
? E.FloatFloatMap.increment(element, discrete)
|
|
||||||
: {
|
|
||||||
let _ = Js.Array.push(element, continuous)
|
|
||||||
}
|
|
||||||
|
|
||||||
()
|
|
||||||
})
|
|
||||||
(continuous, discrete)
|
|
||||||
}
|
|
||||||
|
|
||||||
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => {
|
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => {
|
||||||
let xyPointRange = E.A.Sorted.range(samples) |> E.O.default(0.0)
|
let xyPointRange = E.A.Sorted.range(samples) |> E.O.default(0.0)
|
||||||
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
|
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
|
||||||
|
@ -85,7 +63,11 @@ let toPointSetDist = (
|
||||||
(),
|
(),
|
||||||
): Internals.Types.outputs => {
|
): Internals.Types.outputs => {
|
||||||
Array.fast_sort(compare, samples)
|
Array.fast_sort(compare, samples)
|
||||||
let (continuousPart, discretePart) = E.A.Sorted.Floats.split(samples)
|
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
|
||||||
|
let (continuousPart, discretePart) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(
|
||||||
|
samples,
|
||||||
|
~minDiscreteWeight=minDiscreteToKeep,
|
||||||
|
)
|
||||||
let length = samples |> E.A.length |> float_of_int
|
let length = samples |> E.A.length |> float_of_int
|
||||||
let discrete: PointSetTypes.discreteShape =
|
let discrete: PointSetTypes.discreteShape =
|
||||||
discretePart
|
discretePart
|
||||||
|
|
|
@ -22,3 +22,16 @@ module OpCost = {
|
||||||
let wildcardCost = 1000
|
let wildcardCost = 1000
|
||||||
let monteCarloCost = Environment.defaultSampleCount
|
let monteCarloCost = Environment.defaultSampleCount
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module ToPointSet = {
|
||||||
|
/*
|
||||||
|
This function chooses the minimum amount of duplicate samples that need
|
||||||
|
to exist in order for this to be considered discrete. The tricky thing
|
||||||
|
is that there are some operations that create duplicate continuous samples,
|
||||||
|
so we can't guarantee that these only will occur because the fundamental
|
||||||
|
structure is meant to be discrete. I chose this heuristic because I think
|
||||||
|
it would strike a reasonable trade-off, but I’m really unsure what’s
|
||||||
|
best right now.
|
||||||
|
*/
|
||||||
|
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
|
||||||
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ module FloatFloatMap = {
|
||||||
type t = Belt.MutableMap.t<Id.t, float, Id.identity>
|
type t = Belt.MutableMap.t<Id.t, float, Id.identity>
|
||||||
|
|
||||||
let fromArray = (ar: array<(float, float)>) => Belt.MutableMap.fromArray(ar, ~id=module(Id))
|
let fromArray = (ar: array<(float, float)>) => Belt.MutableMap.fromArray(ar, ~id=module(Id))
|
||||||
let toArray = (t: t) => Belt.MutableMap.toArray(t)
|
let toArray = (t: t): array<(float, float)> => Belt.MutableMap.toArray(t)
|
||||||
let empty = () => Belt.MutableMap.make(~id=module(Id))
|
let empty = () => Belt.MutableMap.make(~id=module(Id))
|
||||||
let increment = (el, t: t) =>
|
let increment = (el, t: t) =>
|
||||||
Belt.MutableMap.update(t, el, x =>
|
Belt.MutableMap.update(t, el, x =>
|
||||||
|
@ -20,6 +20,10 @@ module FloatFloatMap = {
|
||||||
|
|
||||||
let get = (el, t: t) => Belt.MutableMap.get(t, el)
|
let get = (el, t: t) => Belt.MutableMap.get(t, el)
|
||||||
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn)
|
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn)
|
||||||
|
let partition = (fn, t: t) => {
|
||||||
|
let (match, noMatch) = Belt.Array.partition(toArray(t), fn)
|
||||||
|
(fromArray(match), fromArray(noMatch))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module Int = {
|
module Int = {
|
||||||
|
@ -518,18 +522,22 @@ module A = {
|
||||||
let makeIncrementalDown = (a, b) =>
|
let makeIncrementalDown = (a, b) =>
|
||||||
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
|
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
|
||||||
|
|
||||||
let split = (sortedArray: array<float>) => {
|
/*
|
||||||
let continuous = []
|
This function goes through a sorted array and divides it into two different clusters:
|
||||||
|
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
|
||||||
|
Samples are thought to be discrete if they have any duplicates.
|
||||||
|
*/
|
||||||
|
let _splitContinuousAndDiscreteForDuplicates = (sortedArray: array<float>) => {
|
||||||
|
let continuous: array<float> = []
|
||||||
let discrete = FloatFloatMap.empty()
|
let discrete = FloatFloatMap.empty()
|
||||||
Belt.Array.forEachWithIndex(sortedArray, (_, element) => {
|
Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
|
||||||
// let maxIndex = (sortedArray |> Array.length) - 1
|
let maxIndex = (sortedArray |> Array.length) - 1
|
||||||
// let possiblySimilarElements = switch index {
|
let possiblySimilarElements = switch index {
|
||||||
// | 0 => [index + 1]
|
| 0 => [index + 1]
|
||||||
// | n if n == maxIndex => [index - 1]
|
| n if n == maxIndex => [index - 1]
|
||||||
// | _ => [index - 1, index + 1]
|
| _ => [index - 1, index + 1]
|
||||||
// } |> Belt.Array.map(_, r => sortedArray[r])
|
} |> Belt.Array.map(_, r => sortedArray[r])
|
||||||
// let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
||||||
let hasSimilarElement = false
|
|
||||||
hasSimilarElement
|
hasSimilarElement
|
||||||
? FloatFloatMap.increment(element, discrete)
|
? FloatFloatMap.increment(element, discrete)
|
||||||
: {
|
: {
|
||||||
|
@ -541,6 +549,32 @@ module A = {
|
||||||
|
|
||||||
(continuous, discrete)
|
(continuous, discrete)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
This function works very similarly to splitContinuousAndDiscreteForDuplicates. The one major difference
|
||||||
|
is that you can specify a minDiscreteWeight. If the min discreet weight is 4, that would mean that
|
||||||
|
at least four elements needed from a specific value for that to be kept as discrete. This is important
|
||||||
|
because in some cases, we can expect that some common elements will be generated by regular operations.
|
||||||
|
The final continous array will be sorted.
|
||||||
|
*/
|
||||||
|
let splitContinuousAndDiscreteForMinWeight = (
|
||||||
|
sortedArray: array<float>,
|
||||||
|
~minDiscreteWeight: int,
|
||||||
|
) => {
|
||||||
|
let (continuous, discrete) = _splitContinuousAndDiscreteForDuplicates(sortedArray)
|
||||||
|
let keepFn = v => Belt.Float.toInt(v) >= minDiscreteWeight
|
||||||
|
let (discreteToKeep, discreteToIntegrate) = FloatFloatMap.partition(
|
||||||
|
((_, v)) => keepFn(v),
|
||||||
|
discrete,
|
||||||
|
)
|
||||||
|
let newContinousSamples =
|
||||||
|
discreteToIntegrate->FloatFloatMap.toArray
|
||||||
|
|> fmap(((k, v)) => Belt.Array.makeBy(Belt.Float.toInt(v), _ => k))
|
||||||
|
|> Belt.Array.concatMany
|
||||||
|
let newContinuous = concat(continuous, newContinousSamples)
|
||||||
|
newContinuous |> Array.fast_sort(floatCompare)
|
||||||
|
(newContinuous, discreteToKeep)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user