567 lines
14 KiB
JavaScript
567 lines
14 KiB
JavaScript
(function(jStat, Math) {
|
|
|
|
var isFunction = jStat.utils.isFunction;
|
|
|
|
// Ascending functions for sort
|
|
function ascNum(a, b) { return a - b; }
|
|
|
|
function clip(arg, min, max) {
|
|
return Math.max(min, Math.min(arg, max));
|
|
}
|
|
|
|
|
|
// sum of an array
|
|
jStat.sum = function sum(arr) {
|
|
var sum = 0;
|
|
var i = arr.length;
|
|
while (--i >= 0)
|
|
sum += arr[i];
|
|
return sum;
|
|
};
|
|
|
|
|
|
// sum squared
|
|
jStat.sumsqrd = function sumsqrd(arr) {
|
|
var sum = 0;
|
|
var i = arr.length;
|
|
while (--i >= 0)
|
|
sum += arr[i] * arr[i];
|
|
return sum;
|
|
};
|
|
|
|
|
|
// sum of squared errors of prediction (SSE)
|
|
jStat.sumsqerr = function sumsqerr(arr) {
|
|
var mean = jStat.mean(arr);
|
|
var sum = 0;
|
|
var i = arr.length;
|
|
var tmp;
|
|
while (--i >= 0) {
|
|
tmp = arr[i] - mean;
|
|
sum += tmp * tmp;
|
|
}
|
|
return sum;
|
|
};
|
|
|
|
// sum of an array in each row
|
|
jStat.sumrow = function sumrow(arr) {
|
|
var sum = 0;
|
|
var i = arr.length;
|
|
while (--i >= 0)
|
|
sum += arr[i];
|
|
return sum;
|
|
};
|
|
|
|
// product of an array
|
|
jStat.product = function product(arr) {
|
|
var prod = 1;
|
|
var i = arr.length;
|
|
while (--i >= 0)
|
|
prod *= arr[i];
|
|
return prod;
|
|
};
|
|
|
|
|
|
// minimum value of an array
|
|
jStat.min = function min(arr) {
|
|
var low = arr[0];
|
|
var i = 0;
|
|
while (++i < arr.length)
|
|
if (arr[i] < low)
|
|
low = arr[i];
|
|
return low;
|
|
};
|
|
|
|
|
|
// maximum value of an array
|
|
jStat.max = function max(arr) {
|
|
var high = arr[0];
|
|
var i = 0;
|
|
while (++i < arr.length)
|
|
if (arr[i] > high)
|
|
high = arr[i];
|
|
return high;
|
|
};
|
|
|
|
|
|
// unique values of an array
|
|
jStat.unique = function unique(arr) {
|
|
var hash = {}, _arr = [];
|
|
for(var i = 0; i < arr.length; i++) {
|
|
if (!hash[arr[i]]) {
|
|
hash[arr[i]] = true;
|
|
_arr.push(arr[i]);
|
|
}
|
|
}
|
|
return _arr;
|
|
};
|
|
|
|
|
|
// mean value of an array
|
|
jStat.mean = function mean(arr) {
|
|
return jStat.sum(arr) / arr.length;
|
|
};
|
|
|
|
|
|
// mean squared error (MSE)
|
|
jStat.meansqerr = function meansqerr(arr) {
|
|
return jStat.sumsqerr(arr) / arr.length;
|
|
};
|
|
|
|
|
|
// geometric mean of an array
|
|
jStat.geomean = function geomean(arr) {
|
|
var logs = arr.map(Math.log)
|
|
var meanOfLogs = jStat.mean(logs)
|
|
return Math.exp(meanOfLogs)
|
|
};
|
|
|
|
|
|
// median of an array
|
|
jStat.median = function median(arr) {
|
|
var arrlen = arr.length;
|
|
var _arr = arr.slice().sort(ascNum);
|
|
// check if array is even or odd, then return the appropriate
|
|
return !(arrlen & 1)
|
|
? (_arr[(arrlen / 2) - 1 ] + _arr[(arrlen / 2)]) / 2
|
|
: _arr[(arrlen / 2) | 0 ];
|
|
};
|
|
|
|
|
|
// cumulative sum of an array
|
|
jStat.cumsum = function cumsum(arr) {
|
|
return jStat.cumreduce(arr, function (a, b) { return a + b; });
|
|
};
|
|
|
|
|
|
// cumulative product of an array
|
|
jStat.cumprod = function cumprod(arr) {
|
|
return jStat.cumreduce(arr, function (a, b) { return a * b; });
|
|
};
|
|
|
|
|
|
// successive differences of a sequence
|
|
jStat.diff = function diff(arr) {
|
|
var diffs = [];
|
|
var arrLen = arr.length;
|
|
var i;
|
|
for (i = 1; i < arrLen; i++)
|
|
diffs.push(arr[i] - arr[i - 1]);
|
|
return diffs;
|
|
};
|
|
|
|
|
|
// ranks of an array
|
|
jStat.rank = function (arr) {
|
|
var i;
|
|
var distinctNumbers = [];
|
|
var numberCounts = {};
|
|
for (i = 0; i < arr.length; i++) {
|
|
var number = arr[i];
|
|
if (numberCounts[number]) {
|
|
numberCounts[number]++;
|
|
} else {
|
|
numberCounts[number] = 1;
|
|
distinctNumbers.push(number);
|
|
}
|
|
}
|
|
|
|
var sortedDistinctNumbers = distinctNumbers.sort(ascNum);
|
|
var numberRanks = {};
|
|
var currentRank = 1;
|
|
for (i = 0; i < sortedDistinctNumbers.length; i++) {
|
|
var number = sortedDistinctNumbers[i];
|
|
var count = numberCounts[number];
|
|
var first = currentRank;
|
|
var last = currentRank + count - 1;
|
|
var rank = (first + last) / 2;
|
|
numberRanks[number] = rank;
|
|
currentRank += count;
|
|
}
|
|
|
|
return arr.map(function (number) {
|
|
return numberRanks[number];
|
|
});
|
|
};
|
|
|
|
|
|
// mode of an array
|
|
// if there are multiple modes of an array, return all of them
|
|
// is this the appropriate way of handling it?
|
|
jStat.mode = function mode(arr) {
|
|
var arrLen = arr.length;
|
|
var _arr = arr.slice().sort(ascNum);
|
|
var count = 1;
|
|
var maxCount = 0;
|
|
var numMaxCount = 0;
|
|
var mode_arr = [];
|
|
var i;
|
|
|
|
for (i = 0; i < arrLen; i++) {
|
|
if (_arr[i] === _arr[i + 1]) {
|
|
count++;
|
|
} else {
|
|
if (count > maxCount) {
|
|
mode_arr = [_arr[i]];
|
|
maxCount = count;
|
|
numMaxCount = 0;
|
|
}
|
|
// are there multiple max counts
|
|
else if (count === maxCount) {
|
|
mode_arr.push(_arr[i]);
|
|
numMaxCount++;
|
|
}
|
|
// resetting count for new value in array
|
|
count = 1;
|
|
}
|
|
}
|
|
|
|
return numMaxCount === 0 ? mode_arr[0] : mode_arr;
|
|
};
|
|
|
|
|
|
// range of an array
|
|
jStat.range = function range(arr) {
|
|
return jStat.max(arr) - jStat.min(arr);
|
|
};
|
|
|
|
// variance of an array
|
|
// flag = true indicates sample instead of population
|
|
jStat.variance = function variance(arr, flag) {
|
|
return jStat.sumsqerr(arr) / (arr.length - (flag ? 1 : 0));
|
|
};
|
|
|
|
// pooled variance of an array of arrays
|
|
jStat.pooledvariance = function pooledvariance(arr) {
|
|
var sumsqerr = arr.reduce(function (a, samples) {return a + jStat.sumsqerr(samples);}, 0);
|
|
var count = arr.reduce(function (a, samples) {return a + samples.length;}, 0);
|
|
return sumsqerr / (count - arr.length);
|
|
};
|
|
|
|
// deviation of an array
|
|
jStat.deviation = function (arr) {
|
|
var mean = jStat.mean(arr);
|
|
var arrlen = arr.length;
|
|
var dev = new Array(arrlen);
|
|
for (var i = 0; i < arrlen; i++) {
|
|
dev[i] = arr[i] - mean;
|
|
}
|
|
return dev;
|
|
};
|
|
|
|
// standard deviation of an array
|
|
// flag = true indicates sample instead of population
|
|
jStat.stdev = function stdev(arr, flag) {
|
|
return Math.sqrt(jStat.variance(arr, flag));
|
|
};
|
|
|
|
// pooled standard deviation of an array of arrays
|
|
jStat.pooledstdev = function pooledstdev(arr) {
|
|
return Math.sqrt(jStat.pooledvariance(arr));
|
|
};
|
|
|
|
// mean deviation (mean absolute deviation) of an array
|
|
jStat.meandev = function meandev(arr) {
|
|
var mean = jStat.mean(arr);
|
|
var a = [];
|
|
for (var i = arr.length - 1; i >= 0; i--) {
|
|
a.push(Math.abs(arr[i] - mean));
|
|
}
|
|
return jStat.mean(a);
|
|
};
|
|
|
|
|
|
// median deviation (median absolute deviation) of an array
|
|
jStat.meddev = function meddev(arr) {
|
|
var median = jStat.median(arr);
|
|
var a = [];
|
|
for (var i = arr.length - 1; i >= 0; i--) {
|
|
a.push(Math.abs(arr[i] - median));
|
|
}
|
|
return jStat.median(a);
|
|
};
|
|
|
|
|
|
// coefficient of variation
|
|
jStat.coeffvar = function coeffvar(arr) {
|
|
return jStat.stdev(arr) / jStat.mean(arr);
|
|
};
|
|
|
|
|
|
// quartiles of an array
|
|
jStat.quartiles = function quartiles(arr) {
|
|
var arrlen = arr.length;
|
|
var _arr = arr.slice().sort(ascNum);
|
|
return [
|
|
_arr[ Math.round((arrlen) / 4) - 1 ],
|
|
_arr[ Math.round((arrlen) / 2) - 1 ],
|
|
_arr[ Math.round((arrlen) * 3 / 4) - 1 ]
|
|
];
|
|
};
|
|
|
|
|
|
// Arbitary quantiles of an array. Direct port of the scipy.stats
|
|
// implementation by Pierre GF Gerard-Marchant.
|
|
jStat.quantiles = function quantiles(arr, quantilesArray, alphap, betap) {
|
|
var sortedArray = arr.slice().sort(ascNum);
|
|
var quantileVals = [quantilesArray.length];
|
|
var n = arr.length;
|
|
var i, p, m, aleph, k, gamma;
|
|
|
|
if (typeof alphap === 'undefined')
|
|
alphap = 3 / 8;
|
|
if (typeof betap === 'undefined')
|
|
betap = 3 / 8;
|
|
|
|
for (i = 0; i < quantilesArray.length; i++) {
|
|
p = quantilesArray[i];
|
|
m = alphap + p * (1 - alphap - betap);
|
|
aleph = n * p + m;
|
|
k = Math.floor(clip(aleph, 1, n - 1));
|
|
gamma = clip(aleph - k, 0, 1);
|
|
quantileVals[i] = (1 - gamma) * sortedArray[k - 1] + gamma * sortedArray[k];
|
|
}
|
|
|
|
return quantileVals;
|
|
};
|
|
|
|
// Return the k-th percentile of values in a range, where k is in the range 0..1, inclusive.
|
|
// Passing true for the exclusive parameter excludes both endpoints of the range.
|
|
jStat.percentile = function percentile(arr, k, exclusive) {
|
|
var _arr = arr.slice().sort(ascNum);
|
|
var realIndex = k * (_arr.length + (exclusive ? 1 : -1)) + (exclusive ? 0 : 1);
|
|
var index = parseInt(realIndex);
|
|
var frac = realIndex - index;
|
|
if (index + 1 < _arr.length) {
|
|
return _arr[index - 1] + frac * (_arr[index] - _arr[index - 1]);
|
|
} else {
|
|
return _arr[index - 1];
|
|
}
|
|
}
|
|
|
|
// The percentile rank of score in a given array. Returns the percentage
|
|
// of all values in the input array that are less than (kind='strict') or
|
|
// less or equal than (kind='weak') score. Default is weak.
|
|
jStat.percentileOfScore = function percentileOfScore(arr, score, kind) {
|
|
var counter = 0;
|
|
var len = arr.length;
|
|
var strict = false;
|
|
var value, i;
|
|
|
|
if (kind === 'strict')
|
|
strict = true;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
value = arr[i];
|
|
if ((strict && value < score) ||
|
|
(!strict && value <= score)) {
|
|
counter++;
|
|
}
|
|
}
|
|
|
|
return counter / len;
|
|
};
|
|
|
|
|
|
// Histogram (bin count) data
|
|
jStat.histogram = function histogram(arr, binCnt) {
|
|
binCnt = binCnt || 4;
|
|
var first = jStat.min(arr);
|
|
var binWidth = (jStat.max(arr) - first) / binCnt;
|
|
var len = arr.length;
|
|
var bins = [];
|
|
var i;
|
|
|
|
for (i = 0; i < binCnt; i++)
|
|
bins[i] = 0;
|
|
for (i = 0; i < len; i++)
|
|
bins[Math.min(Math.floor(((arr[i] - first) / binWidth)), binCnt - 1)] += 1;
|
|
|
|
return bins;
|
|
};
|
|
|
|
|
|
// covariance of two arrays
|
|
jStat.covariance = function covariance(arr1, arr2) {
|
|
var u = jStat.mean(arr1);
|
|
var v = jStat.mean(arr2);
|
|
var arr1Len = arr1.length;
|
|
var sq_dev = new Array(arr1Len);
|
|
var i;
|
|
|
|
for (i = 0; i < arr1Len; i++)
|
|
sq_dev[i] = (arr1[i] - u) * (arr2[i] - v);
|
|
|
|
return jStat.sum(sq_dev) / (arr1Len - 1);
|
|
};
|
|
|
|
|
|
// (pearson's) population correlation coefficient, rho
|
|
jStat.corrcoeff = function corrcoeff(arr1, arr2) {
|
|
return jStat.covariance(arr1, arr2) /
|
|
jStat.stdev(arr1, 1) /
|
|
jStat.stdev(arr2, 1);
|
|
};
|
|
|
|
// (spearman's) rank correlation coefficient, sp
|
|
jStat.spearmancoeff = function (arr1, arr2) {
|
|
arr1 = jStat.rank(arr1);
|
|
arr2 = jStat.rank(arr2);
|
|
//return pearson's correlation of the ranks:
|
|
return jStat.corrcoeff(arr1, arr2);
|
|
}
|
|
|
|
|
|
// statistical standardized moments (general form of skew/kurt)
|
|
jStat.stanMoment = function stanMoment(arr, n) {
|
|
var mu = jStat.mean(arr);
|
|
var sigma = jStat.stdev(arr);
|
|
var len = arr.length;
|
|
var skewSum = 0;
|
|
|
|
for (var i = 0; i < len; i++)
|
|
skewSum += Math.pow((arr[i] - mu) / sigma, n);
|
|
|
|
return skewSum / arr.length;
|
|
};
|
|
|
|
// (pearson's) moment coefficient of skewness
|
|
jStat.skewness = function skewness(arr) {
|
|
return jStat.stanMoment(arr, 3);
|
|
};
|
|
|
|
// (pearson's) (excess) kurtosis
|
|
jStat.kurtosis = function kurtosis(arr) {
|
|
return jStat.stanMoment(arr, 4) - 3;
|
|
};
|
|
|
|
|
|
var jProto = jStat.prototype;
|
|
|
|
|
|
// Extend jProto with method for calculating cumulative sums and products.
|
|
// This differs from the similar extension below as cumsum and cumprod should
|
|
// not be run again in the case fullbool === true.
|
|
// If a matrix is passed, automatically assume operation should be done on the
|
|
// columns.
|
|
(function(funcs) {
|
|
for (var i = 0; i < funcs.length; i++) (function(passfunc) {
|
|
// If a matrix is passed, automatically assume operation should be done on
|
|
// the columns.
|
|
jProto[passfunc] = function(fullbool, func) {
|
|
var arr = [];
|
|
var i = 0;
|
|
var tmpthis = this;
|
|
// Assignment reassignation depending on how parameters were passed in.
|
|
if (isFunction(fullbool)) {
|
|
func = fullbool;
|
|
fullbool = false;
|
|
}
|
|
// Check if a callback was passed with the function.
|
|
if (func) {
|
|
setTimeout(function() {
|
|
func.call(tmpthis, jProto[passfunc].call(tmpthis, fullbool));
|
|
});
|
|
return this;
|
|
}
|
|
// Check if matrix and run calculations.
|
|
if (this.length > 1) {
|
|
tmpthis = fullbool === true ? this : this.transpose();
|
|
for (; i < tmpthis.length; i++)
|
|
arr[i] = jStat[passfunc](tmpthis[i]);
|
|
return arr;
|
|
}
|
|
// Pass fullbool if only vector, not a matrix. for variance and stdev.
|
|
return jStat[passfunc](this[0], fullbool);
|
|
};
|
|
})(funcs[i]);
|
|
})(('cumsum cumprod').split(' '));
|
|
|
|
|
|
// Extend jProto with methods which don't require arguments and work on columns.
|
|
(function(funcs) {
|
|
for (var i = 0; i < funcs.length; i++) (function(passfunc) {
|
|
// If a matrix is passed, automatically assume operation should be done on
|
|
// the columns.
|
|
jProto[passfunc] = function(fullbool, func) {
|
|
var arr = [];
|
|
var i = 0;
|
|
var tmpthis = this;
|
|
// Assignment reassignation depending on how parameters were passed in.
|
|
if (isFunction(fullbool)) {
|
|
func = fullbool;
|
|
fullbool = false;
|
|
}
|
|
// Check if a callback was passed with the function.
|
|
if (func) {
|
|
setTimeout(function() {
|
|
func.call(tmpthis, jProto[passfunc].call(tmpthis, fullbool));
|
|
});
|
|
return this;
|
|
}
|
|
// Check if matrix and run calculations.
|
|
if (this.length > 1) {
|
|
if (passfunc !== 'sumrow')
|
|
tmpthis = fullbool === true ? this : this.transpose();
|
|
for (; i < tmpthis.length; i++)
|
|
arr[i] = jStat[passfunc](tmpthis[i]);
|
|
return fullbool === true
|
|
? jStat[passfunc](jStat.utils.toVector(arr))
|
|
: arr;
|
|
}
|
|
// Pass fullbool if only vector, not a matrix. for variance and stdev.
|
|
return jStat[passfunc](this[0], fullbool);
|
|
};
|
|
})(funcs[i]);
|
|
})(('sum sumsqrd sumsqerr sumrow product min max unique mean meansqerr ' +
|
|
'geomean median diff rank mode range variance deviation stdev meandev ' +
|
|
'meddev coeffvar quartiles histogram skewness kurtosis').split(' '));
|
|
|
|
|
|
// Extend jProto with functions that take arguments. Operations on matrices are
|
|
// done on columns.
|
|
(function(funcs) {
|
|
for (var i = 0; i < funcs.length; i++) (function(passfunc) {
|
|
jProto[passfunc] = function() {
|
|
var arr = [];
|
|
var i = 0;
|
|
var tmpthis = this;
|
|
var args = Array.prototype.slice.call(arguments);
|
|
var callbackFunction;
|
|
|
|
// If the last argument is a function, we assume it's a callback; we
|
|
// strip the callback out and call the function again.
|
|
if (isFunction(args[args.length - 1])) {
|
|
callbackFunction = args[args.length - 1];
|
|
var argsToPass = args.slice(0, args.length - 1);
|
|
|
|
setTimeout(function() {
|
|
callbackFunction.call(tmpthis,
|
|
jProto[passfunc].apply(tmpthis, argsToPass));
|
|
});
|
|
return this;
|
|
|
|
// Otherwise we curry the function args and call normally.
|
|
} else {
|
|
callbackFunction = undefined;
|
|
var curriedFunction = function curriedFunction(vector) {
|
|
return jStat[passfunc].apply(tmpthis, [vector].concat(args));
|
|
}
|
|
}
|
|
|
|
// If this is a matrix, run column-by-column.
|
|
if (this.length > 1) {
|
|
tmpthis = tmpthis.transpose();
|
|
for (; i < tmpthis.length; i++)
|
|
arr[i] = curriedFunction(tmpthis[i]);
|
|
return arr;
|
|
}
|
|
|
|
// Otherwise run on the vector.
|
|
return curriedFunction(this[0]);
|
|
};
|
|
})(funcs[i]);
|
|
})('quantiles percentileOfScore'.split(' '));
|
|
|
|
}(jStat, Math));
|