Merge pull request #409 from quantified-uncertainty/splidcontinuousDiscrete-refactor
Refactor of splitContinuousAndDiscrete to allow for customization
This commit is contained in:
		
						commit
						e1551cb1d7
					
				|  | @ -1,41 +0,0 @@ | ||||||
| open Jest |  | ||||||
| open TestHelpers |  | ||||||
| 
 |  | ||||||
| describe("Continuous and discrete splits", () => { |  | ||||||
|   makeTest( |  | ||||||
|     "splits (1)", |  | ||||||
|     SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete([1.432, 1.33455, 2.0]), |  | ||||||
|     ([1.432, 1.33455, 2.0], E.FloatFloatMap.empty()), |  | ||||||
|   ) |  | ||||||
|   makeTest( |  | ||||||
|     "splits (2)", |  | ||||||
|     SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete([ |  | ||||||
|       1.432, |  | ||||||
|       1.33455, |  | ||||||
|       2.0, |  | ||||||
|       2.0, |  | ||||||
|       2.0, |  | ||||||
|       2.0, |  | ||||||
|     ]) |> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)), |  | ||||||
|     ([1.432, 1.33455], [(2.0, 4.0)]), |  | ||||||
|   ) |  | ||||||
| 
 |  | ||||||
|   let makeDuplicatedArray = count => { |  | ||||||
|     let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int) |  | ||||||
|     let sorted = arr |> Belt.SortArray.stableSortBy(_, compare) |  | ||||||
|     E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare) |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   let (_, discrete1) = SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete( |  | ||||||
|     makeDuplicatedArray(10), |  | ||||||
|   ) |  | ||||||
|   let toArr1 = discrete1 |> E.FloatFloatMap.toArray |  | ||||||
|   makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10) |  | ||||||
| 
 |  | ||||||
|   let (_c, discrete2) = SampleSetDist_ToPointSet.Internals.T.splitContinuousAndDiscrete( |  | ||||||
|     makeDuplicatedArray(500), |  | ||||||
|   ) |  | ||||||
|   let toArr2 = discrete2 |> E.FloatFloatMap.toArray |  | ||||||
|   makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) |  | ||||||
|   // makeTest("foo", [] |> Belt.Array.length, 500) |  | ||||||
| }) |  | ||||||
|  | @ -0,0 +1,48 @@ | ||||||
|  | open Jest | ||||||
|  | open TestHelpers | ||||||
|  | 
 | ||||||
|  | let prepareInputs = (ar, minWeight) => | ||||||
|  |   E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> ( | ||||||
|  |     ((c, disc)) => (c, disc |> E.FloatFloatMap.toArray) | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  | describe("Continuous and discrete splits", () => { | ||||||
|  |   makeTest( | ||||||
|  |     "is empty, with no common elements", | ||||||
|  |     prepareInputs([1.432, 1.33455, 2.0], 2), | ||||||
|  |     ([1.33455, 1.432, 2.0], []), | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  |   makeTest( | ||||||
|  |     "only stores 3.5 as discrete when minWeight is 3", | ||||||
|  |     prepareInputs([1.432, 1.33455, 2.0, 2.0, 3.5, 3.5, 3.5], 3), | ||||||
|  |     ([1.33455, 1.432, 2.0, 2.0], [(3.5, 3.0)]), | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  |   makeTest( | ||||||
|  |     "doesn't store 3.5 as discrete when minWeight is 5", | ||||||
|  |     prepareInputs([1.432, 1.33455, 2.0, 2.0, 3.5, 3.5, 3.5], 5), | ||||||
|  |     ([1.33455, 1.432, 2.0, 2.0, 3.5, 3.5, 3.5], []), | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  |   let makeDuplicatedArray = count => { | ||||||
|  |     let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int) | ||||||
|  |     let sorted = arr |> Belt.SortArray.stableSortBy(_, compare) | ||||||
|  |     E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare) | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   let (_, discrete1) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight( | ||||||
|  |     makeDuplicatedArray(10), | ||||||
|  |     ~minDiscreteWeight=2, | ||||||
|  |   ) | ||||||
|  |   let toArr1 = discrete1 |> E.FloatFloatMap.toArray | ||||||
|  |   makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10) | ||||||
|  | 
 | ||||||
|  |   let (_c, discrete2) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight( | ||||||
|  |     makeDuplicatedArray(500), | ||||||
|  |     ~minDiscreteWeight=2, | ||||||
|  |   ) | ||||||
|  |   let toArr2 = discrete2 |> E.FloatFloatMap.toArray | ||||||
|  |   makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) | ||||||
|  |   // makeTest("foo", [] |> Belt.Array.length, 500) | ||||||
|  | }) | ||||||
|  | @ -39,28 +39,6 @@ module Internals = { | ||||||
|   module T = { |   module T = { | ||||||
|     type t = array<float> |     type t = array<float> | ||||||
| 
 | 
 | ||||||
|     let splitContinuousAndDiscrete = (sortedArray: t) => { |  | ||||||
|       let continuous = [] |  | ||||||
|       let discrete = E.FloatFloatMap.empty() |  | ||||||
|       Belt.Array.forEachWithIndex(sortedArray, (index, element) => { |  | ||||||
|         let maxIndex = (sortedArray |> Array.length) - 1 |  | ||||||
|         let possiblySimilarElements = switch index { |  | ||||||
|         | 0 => [index + 1] |  | ||||||
|         | n if n == maxIndex => [index - 1] |  | ||||||
|         | _ => [index - 1, index + 1] |  | ||||||
|         } |> Belt.Array.map(_, r => sortedArray[r]) |  | ||||||
|         let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) |  | ||||||
|         hasSimilarElement |  | ||||||
|           ? E.FloatFloatMap.increment(element, discrete) |  | ||||||
|           : { |  | ||||||
|               let _ = Js.Array.push(element, continuous) |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|         () |  | ||||||
|       }) |  | ||||||
|       (continuous, discrete) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => { |     let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => { | ||||||
|       let xyPointRange = E.A.Sorted.range(samples) |> E.O.default(0.0) |       let xyPointRange = E.A.Sorted.range(samples) |> E.O.default(0.0) | ||||||
|       let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints) |       let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints) | ||||||
|  | @ -85,7 +63,11 @@ let toPointSetDist = ( | ||||||
|   (), |   (), | ||||||
| ): Internals.Types.outputs => { | ): Internals.Types.outputs => { | ||||||
|   Array.fast_sort(compare, samples) |   Array.fast_sort(compare, samples) | ||||||
|   let (continuousPart, discretePart) = E.A.Sorted.Floats.split(samples) |   let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples) | ||||||
|  |   let (continuousPart, discretePart) = E.A.Sorted.Floats.splitContinuousAndDiscreteForMinWeight( | ||||||
|  |     samples, | ||||||
|  |     ~minDiscreteWeight=minDiscreteToKeep, | ||||||
|  |   ) | ||||||
|   let length = samples |> E.A.length |> float_of_int |   let length = samples |> E.A.length |> float_of_int | ||||||
|   let discrete: PointSetTypes.discreteShape = |   let discrete: PointSetTypes.discreteShape = | ||||||
|     discretePart |     discretePart | ||||||
|  |  | ||||||
|  | @ -22,3 +22,16 @@ module OpCost = { | ||||||
|   let wildcardCost = 1000 |   let wildcardCost = 1000 | ||||||
|   let monteCarloCost = Environment.defaultSampleCount |   let monteCarloCost = Environment.defaultSampleCount | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | module ToPointSet = { | ||||||
|  |   /* | ||||||
|  |   This function chooses the minimum amount of duplicate samples that need | ||||||
|  |   to exist in order for this to be considered discrete. The tricky thing  | ||||||
|  |   is that there are some operations that create duplicate continuous samples,  | ||||||
|  |   so we can't guarantee that these only will occur because the fundamental  | ||||||
|  |   structure is meant to be discrete. I chose this heuristic because I think  | ||||||
|  |   it would strike a reasonable trade-off, but I’m really unsure what’s  | ||||||
|  |   best right now. | ||||||
|  |  */ | ||||||
|  |   let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -8,7 +8,7 @@ module FloatFloatMap = { | ||||||
|   type t = Belt.MutableMap.t<Id.t, float, Id.identity> |   type t = Belt.MutableMap.t<Id.t, float, Id.identity> | ||||||
| 
 | 
 | ||||||
|   let fromArray = (ar: array<(float, float)>) => Belt.MutableMap.fromArray(ar, ~id=module(Id)) |   let fromArray = (ar: array<(float, float)>) => Belt.MutableMap.fromArray(ar, ~id=module(Id)) | ||||||
|   let toArray = (t: t) => Belt.MutableMap.toArray(t) |   let toArray = (t: t): array<(float, float)> => Belt.MutableMap.toArray(t) | ||||||
|   let empty = () => Belt.MutableMap.make(~id=module(Id)) |   let empty = () => Belt.MutableMap.make(~id=module(Id)) | ||||||
|   let increment = (el, t: t) => |   let increment = (el, t: t) => | ||||||
|     Belt.MutableMap.update(t, el, x => |     Belt.MutableMap.update(t, el, x => | ||||||
|  | @ -20,6 +20,10 @@ module FloatFloatMap = { | ||||||
| 
 | 
 | ||||||
|   let get = (el, t: t) => Belt.MutableMap.get(t, el) |   let get = (el, t: t) => Belt.MutableMap.get(t, el) | ||||||
|   let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn) |   let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn) | ||||||
|  |   let partition = (fn, t: t) => { | ||||||
|  |     let (match, noMatch) = Belt.Array.partition(toArray(t), fn) | ||||||
|  |     (fromArray(match), fromArray(noMatch)) | ||||||
|  |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| module Int = { | module Int = { | ||||||
|  | @ -518,18 +522,22 @@ module A = { | ||||||
|       let makeIncrementalDown = (a, b) => |       let makeIncrementalDown = (a, b) => | ||||||
|         Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int) |         Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int) | ||||||
| 
 | 
 | ||||||
|       let split = (sortedArray: array<float>) => { |       /* | ||||||
|         let continuous = [] |       This function goes through a sorted array and divides it into two different clusters: | ||||||
|  |       continuous samples and discrete samples. The discrete samples are stored in a mutable map. | ||||||
|  |       Samples are thought to be discrete if they have any duplicates. | ||||||
|  |  */ | ||||||
|  |       let _splitContinuousAndDiscreteForDuplicates = (sortedArray: array<float>) => { | ||||||
|  |         let continuous: array<float> = [] | ||||||
|         let discrete = FloatFloatMap.empty() |         let discrete = FloatFloatMap.empty() | ||||||
|         Belt.Array.forEachWithIndex(sortedArray, (_, element) => { |         Belt.Array.forEachWithIndex(sortedArray, (index, element) => { | ||||||
|           // let maxIndex = (sortedArray |> Array.length) - 1 |           let maxIndex = (sortedArray |> Array.length) - 1 | ||||||
|           // let possiblySimilarElements = switch index { |           let possiblySimilarElements = switch index { | ||||||
|           // | 0 => [index + 1] |           | 0 => [index + 1] | ||||||
|           // | n if n == maxIndex => [index - 1] |           | n if n == maxIndex => [index - 1] | ||||||
|           // | _ => [index - 1, index + 1] |           | _ => [index - 1, index + 1] | ||||||
|           // } |> Belt.Array.map(_, r => sortedArray[r]) |           } |> Belt.Array.map(_, r => sortedArray[r]) | ||||||
|           // let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) |           let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) | ||||||
|           let hasSimilarElement = false |  | ||||||
|           hasSimilarElement |           hasSimilarElement | ||||||
|             ? FloatFloatMap.increment(element, discrete) |             ? FloatFloatMap.increment(element, discrete) | ||||||
|             : { |             : { | ||||||
|  | @ -541,6 +549,32 @@ module A = { | ||||||
| 
 | 
 | ||||||
|         (continuous, discrete) |         (continuous, discrete) | ||||||
|       } |       } | ||||||
|  | 
 | ||||||
|  |       /* | ||||||
|  |       This function works very similarly to splitContinuousAndDiscreteForDuplicates. The one major difference | ||||||
|  |       is that you can specify a minDiscreteWeight.  If the min discreet weight is 4, that would mean that | ||||||
|  |       at least four elements needed from a specific value for that to be kept as discrete. This is important | ||||||
|  |       because in some cases, we can expect that some common elements will be generated by regular operations. | ||||||
|  |       The final continous array will be sorted. | ||||||
|  |  */ | ||||||
|  |       let splitContinuousAndDiscreteForMinWeight = ( | ||||||
|  |         sortedArray: array<float>, | ||||||
|  |         ~minDiscreteWeight: int, | ||||||
|  |       ) => { | ||||||
|  |         let (continuous, discrete) = _splitContinuousAndDiscreteForDuplicates(sortedArray) | ||||||
|  |         let keepFn = v => Belt.Float.toInt(v) >= minDiscreteWeight | ||||||
|  |         let (discreteToKeep, discreteToIntegrate) = FloatFloatMap.partition( | ||||||
|  |           ((_, v)) => keepFn(v), | ||||||
|  |           discrete, | ||||||
|  |         ) | ||||||
|  |         let newContinousSamples = | ||||||
|  |           discreteToIntegrate->FloatFloatMap.toArray | ||||||
|  |           |> fmap(((k, v)) => Belt.Array.makeBy(Belt.Float.toInt(v), _ => k)) | ||||||
|  |           |> Belt.Array.concatMany | ||||||
|  |         let newContinuous = concat(continuous, newContinousSamples) | ||||||
|  |         newContinuous |> Array.fast_sort(floatCompare) | ||||||
|  |         (newContinuous, discreteToKeep) | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user