
223 lines
8.0 KiB
Raw Normal View History

* @license Apache-2.0
* Copyright (c) 2018 The Stdlib Authors.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
'use strict';
var isNonNegativeInteger = require( '@stdlib/assert/is-nonnegative-integer' ).isPrimitive;
var isCollection = require( '@stdlib/assert/is-collection' );
var isndarrayLike = require( '@stdlib/assert/is-ndarray-like' );
var isNumber = require( '@stdlib/assert/is-number' ).isPrimitive;
var isString = require( '@stdlib/assert/is-string' ).isPrimitive;
var absdiff = require( '@stdlib/math/base/utils/absolute-difference' );
var FLOAT64_SQRT_EPS = require( '@stdlib/constants/float64/sqrt-eps' );
var PINF = require( '@stdlib/constants/float64/pinf' );
var chisqCDF = require( './../../base/dists/chisquare/cdf' );
var isnan = require( '@stdlib/assert/is-nan' );
var daxpy = require( '@stdlib/blas/base/daxpy' );
var dscal = require( '@stdlib/blas/base/dscal' );
var dsumpw = require( '@stdlib/blas/ext/base/dsumpw' );
var Float64Array = require( '@stdlib/array/float64' );
var defaults = require( './defaults.js' );
var validate = require( './validate.js' );
var getPMF = require( './get_pmf.js' );
var testStatistic = require( './statistic.js' );
var simulate = require( './simulate.js' );
var Results = require( './results.js' );
// MAIN //
* Performs a chi-square goodness-of-fit test.
* @param {(Collection|VectorLike)} x - observation frequencies
* @param {(Collection|VectorLike|string)} y - expected frequencies or a discrete probability distribution name
* @param {...number} [args] - probability mass function (PMF) arguments
* @param {Options} [options] - function options
* @param {number} [options.alpha=0.05] - significance level
* @param {NonNegativeInteger} [options.ddof=0] - degrees of freedom adjustment
* @param {boolean} [options.simulate=false] - boolean indicating whether to compute p-values by Monte Carlo simulation
* @param {PositiveInteger} [options.iterations=500] - number of Monte Carlo iterations
* @throws {TypeError} first argument must be an array-like object or a 1-dimensional array containing nonnegative integers
* @throws {TypeError} second argument must be either an array-like object (or a 1-dimensional array) of nonnegative numbers, an array-like object (or a 1-dimensional array) of probabilities summing to one, or a discrete probability distribution name
* @throws {TypeError} options argument must be an object
* @throws {TypeError} must provide valid options
* @throws {Error} first and second arguments must have the same length
* @throws {Error} first argument must contain at least one element greater than zero
* @throws {RangeError} significance level must be a number on the interval `[0,1]`
* @throws {TypeError} probability mass function (PMF) arguments must be number primitives
* @returns {Object} test results
* @example
* var x = [ 89, 37, 30, 28, 2 ];
* var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
* var out = chi2gof( x, p );
* var o = out.toJSON();
* // returns { 'pValue': ~0.0406, 'statistic': ~9.9901, ... }
function chi2gof( x, y ) {
var expected;
var nargs;
var args;
var opts;
var pval;
var stat;
var obs;
var err;
var pmf;
var sum;
var df;
var N;
var d;
var s;
var o;
var n;
var p;
var v;
var i;
if ( isndarrayLike( x ) && x.ndims === 1 && x.strides.length === 1 ) { // is ndarray-like vector?
d = x.data;
s = x.strides[ 0 ];
o = x.offset;
} else if ( isCollection( x ) ) {
d = x;
s = 1;
o = 0;
} else {
throw new TypeError( 'invalid argument. First argument must be either an array-like object or a 1-dimensional ndarray. Value: `' + x + '`.' );
N = x.length;
// Initialize an array for storing a copy of the observations array:
obs = new Float64Array( N+1 ); // Note: `N+1` is intentional in the event that we need to add a remaining category for all values greater than or equal to `N`
n = 0;
for ( i = 0; i < N; i++ ) {
v = d[ o+(s*i) ];
if ( !isNonNegativeInteger( v ) ) {
throw new TypeError( 'invalid argument. First argument must contain nonnegative integers. Index: `' + i + '`. Value: `' + v + '`.' );
obs[ i ] = v;
n += v;
if ( n === 0 ) {
throw new Error( 'invalid argument. First argument must contain at least one element greater than zero (i.e., the total number number of observations must be greater than zero).' );
// NOTE: `obs` is now a single-segment contiguous Float64Array
nargs = 0;
if ( isString( y ) ) {
pmf = getPMF( y );
if ( pmf instanceof Error ) {
throw pmf;
nargs += pmf.length - 1; // WARNING: this relies on PMF functions having an explicit arity
args = [ 0 ];
for ( i = 0; i < nargs; i++ ) {
v = arguments[ i+2 ];
if ( !isNumber( v ) || isnan( v ) ) {
throw new TypeError( 'invalid argument. Probability mass function (PMF) arguments must be number primitives. Argument: `' + (i+2) + '`. Value: `' + v + '`.' );
args.push( v );
expected = new Float64Array( N+1 );
sum = 0.0;
for ( i = 0; i < N; i++ ) {
args[ 0 ] = i;
if ( y === 'discrete-uniform' ) {
args[ 0 ] += args[ 1 ]; // scales the value at which to evaluate the PMF based on the minimum support of the distribution (which should have been provided as the first distribution parameter)
v = pmf.apply( null, args );
sum += v;
expected[ i ] = v * n;
// Check whether we need to add a remaining category for all values greater than or equal to `N`...
if ( sum < 1.0 ) {
expected[ N ] = (1.0-sum) * n;
N += 1;
} else {
if ( isndarrayLike( y ) && y.ndims === 1 && y.strides.length === 1 ) { // is ndarray-like vector?
d = y.data;
s = y.strides[ 0 ];
o = y.offset;
} else if ( isCollection( y ) ) {
d = y;
s = 1;
o = 0;
} else {
throw new TypeError( 'invalid argument. Second argument must be either an array-like object (or 1-dimensional ndarray) of probabilities summing to one, an array-like object (or 1-dimensional ndarray) of expected frequencies, or a discrete probability distribution name. Value: `' + y + '`.' );
if ( y.length !== N ) {
throw new Error( 'invalid arguments. First and second arguments must have the same length.' );
expected = new Float64Array( N );
sum = 0.0;
for ( i = 0; i < N; i++ ) {
v = d[ o+(s*i) ];
if ( !isNumber( v ) ) {
throw new TypeError( 'invalid argument. Second argument must only contain numbers. Index: `' + i + '`. Value: `' + v + '`.' );
if ( v < 0.0 ) {
throw new TypeError( 'invalid argument. Second argument must only contain nonnegative numbers. Index: `' + i + '`. Value: `' + v + '`.' );
} else if ( v > 1.0 ) {
sum += PINF;
} else {
sum += v;
expected[ i ] = v;
// Check if provided a unity probability array (otherwise, assume provided an expected frequencies array)...
if ( absdiff( sum, 1.0 ) <= FLOAT64_SQRT_EPS ) {
p = y; // NOTE: `y` may not be a Float64Array
expected = dscal( N, n, expected, 1 );
// NOTE: `expected` is now a single-segment contiguous Float64Array
opts = defaults();
if ( arguments.length > 2+nargs ) {
err = validate( opts, arguments[ 2+nargs ] );
if ( err ) {
throw err;
stat = testStatistic( N, obs, 1, expected, 1 ); // TODO: consider replacing with low-level double-precision strided interface
if ( opts.simulate ) {
if ( p === void 0 ) {
v = dsumpw( N, expected, 1 );
p = daxpy( N, 1.0/v, expected, 1, new Float64Array( N ), 1 );
pval = simulate( N, expected, p, stat, n, opts.iterations );
} else {
df = N - 1 - opts.ddof;
pval = 1.0 - chisqCDF( stat, df );
return new Results( pval, opts.alpha, stat, ( df === void 0 ) ? null : df );
module.exports = chi2gof;