/**
* @license Apache-2.0
*
* Copyright (c) 2019 The Stdlib Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* ## Notice
*
* This code is a modification of an existing JavaScript implementation of ther [Porter stemming algorithm]{@link https://tartarus.org/martin/PorterStemmer/}.
*
* ```text
* Release 1 be 'andargor', Jul 2004
* Release 2 (substantially revised) by Christopher McKenzie, Aug 2009
* ```
*/

'use strict';

// MODULES //

var isString = require( '@stdlib/assert/is-string' ).isPrimitive;
var endsWith = require( '@stdlib/string/ends-with' );
var lowercase = require( '@stdlib/string/lowercase' );
var replace = require( '@stdlib/string/replace' );


// VARIABLES //

var step2list = {
	'ational': 'ate',
	'tional': 'tion',
	'enci': 'ence',
	'anci': 'ance',
	'izer': 'ize',
	'bli': 'ble',
	'alli': 'al',
	'entli': 'ent',
	'eli': 'e',
	'ousli': 'ous',
	'ization': 'ize',
	'ation': 'ate',
	'ator': 'ate',
	'alism': 'al',
	'iveness': 'ive',
	'fulness': 'ful',
	'ousness': 'ous',
	'aliti': 'al',
	'iviti': 'ive',
	'biliti': 'ble',
	'logi': 'log'
};
var step3list = {
	'icate': 'ic',
	'ative': '',
	'alize': 'al',
	'iciti': 'ic',
	'ical': 'ic',
	'ful': '',
	'ness': ''
};
var c = '[^aeiou]'; // consonant
var v = '[aeiouy]'; // vowel
var C = c + '[^aeiouy]*'; // consonant sequence
var V = v + '[aeiou]*'; // vowel sequence
var RE_CV = new RegExp( '^' + C + v + '[^aeiouwxy]$' );
var mgr0 = '^(' + C + ')?' + V + C; // [C]VC... is m>0
var RE_MGR0 = new RegExp( mgr0 );
var meq1 = '^(' + C + ')?' + V + C + '(' + V + ')?$'; // [C]VC[V] is m=1
var RE_MEQ1 = new RegExp( meq1 );
var mgr1 = '^(' + C + ')?' + V + C + V + C; // [C]VCVC... is m>1
var RE_MGR1 = new RegExp( mgr1 );
var sV = '^(' + C + ')?' + v; // vowel in stem
var RE_SV = new RegExp( sV );
var RE_STEP1A = /^(.+?)(ss|i)es$/;
var RE2_STEP1A = /^(.+?)([^s])s$/;
var RE_STEP1B = /^(.+?)eed$/;
var RE2_STEP1B = /^(.+?)(ed|ing)$/;
var RE_STEP1C = /^(.+?)y$/;
var RE_STEP2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
var RE_STEP3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
var RE_STEP4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
var RE2_STEP4 = /^(.+?)(s|t)(ion)$/;
var RE_STEP5 = /^(.+?)e$/;
var RE_LAST = /.$/;
var RE_ATBLIZ = /(at|bl|iz)$/;
var RE_DOUBLE = new RegExp( '([^aeiouylsz])\\1$' );


// MAIN //

/**
* Extracts the stem of a given word using the Porter stemming algorithm.
*
* ## References
*
* -   Porter, Michael F. 1980. "An algorithm for suffix stripping." _Program_ 13 (3): 130–37. doi:[10.1108/eb046814][@porter:1980].
*
* [@porter:1980]: https://doi.org/10.1108/eb046814
*
* @param {string} word - input word
* @throws {TypeError} first argument must be a string primitive
* @returns {string} word stem
*
* @example
* var out = porterStemmer( 'walking' );
* // returns 'walk'
*
* @example
* var out = porterStemmer( 'walked' );
* // returns 'walk'
*
* @example
* var out = porterStemmer( 'walks' );
* // returns 'walk'
*
* @example
* var out = porterStemmer( 'worldwide' );
* // returns 'worldwid'
*
* @example
* var out = porterStemmer( '' );
* // returns ''
*/
function porterStemmer( word ) {
	var firstch;
	var suffix;
	var stem;
	var fp;

	if ( !isString( word ) ) {
		throw new TypeError( 'invalid argument. First argument must be a string primitive. Value: `' + word + '`.' );
	}
	if ( word.length < 3 ) {
		return word;
	}
	word = lowercase( word );
	firstch = word[ 0 ];
	if ( firstch === 'y' ) {
		word = firstch.toUpperCase() + word.substr( 1 );
	}

	// Step 1a:
	if ( RE_STEP1A.test( word ) ) {
		word = replace( word, RE_STEP1A, '$1$2' );
	} else if ( RE2_STEP1A.test( word ) ) {
		word = replace( word, RE2_STEP1A, '$1$2' );
	}

	// Step 1b:
	if ( RE_STEP1B.test( word ) ) {
		fp = RE_STEP1B.exec( word );
		if ( RE_MGR0.test( fp[ 1 ] ) ) {
			word = replace( word, RE_LAST, '' );
		}
	} else if ( RE2_STEP1B.test( word ) ) {
		fp = RE2_STEP1B.exec( word );
		stem = fp[ 1 ];
		if ( RE_SV.test( stem ) ) {
			word = stem;
			if ( RE_ATBLIZ.test( word ) ) {
				word += 'e';
			} else if ( RE_DOUBLE.test( word ) ) {
				word = replace( word, RE_LAST, '' );
			} else if ( RE_CV.test( word ) ) {
				word += 'e';
			}
		}
	}

	// Step 1c:
	if ( RE_STEP1C.test( word ) ) {
		fp = RE_STEP1C.exec( word );
		stem = fp[ 1 ];
		if ( RE_SV.test( stem ) ) {
			word = stem + 'i';
		}
	}

	// Step 2:
	if ( RE_STEP2.test( word ) ) {
		fp = RE_STEP2.exec( word );
		stem = fp[ 1 ];
		suffix = fp[ 2 ];
		if ( RE_MGR0.test( stem ) ) {
			word = stem + step2list[ suffix ];
		}
	}

	// Step 3:
	if ( RE_STEP3.test( word ) ) {
		fp = RE_STEP3.exec( word );
		stem = fp[ 1 ];
		suffix = fp[ 2 ];
		if ( RE_MGR0.test( stem ) ) {
			word = stem + step3list[ suffix ];
		}
	}

	// Step 4:
	if ( RE_STEP4.test( word ) ) {
		fp = RE_STEP4.exec( word );
		stem = fp[ 1 ];
		if ( RE_MGR1.test( stem ) ) {
			word = stem;
		}
	} else if ( RE2_STEP4.test( word ) ) {
		fp = RE2_STEP4.exec( word );
		stem = fp[ 1 ] + fp[ 2 ];
		if ( RE_MGR1.test( stem ) ) {
			word = stem;
		}
	}

	// Step 5:
	if ( RE_STEP5.test( word ) ) {
		fp = RE_STEP5.exec( word );
		stem = fp[ 1 ];
		if (
			RE_MGR1.test( stem ) ||
			( RE_MEQ1.test( stem ) && !( RE_CV.test( stem ) ) )
		) {
			word = stem;
		}
	}
	if ( endsWith( word, 'll' ) && RE_MGR1.test( word ) ) {
		word = replace( word, RE_LAST, '' );
	}

	// Turn initial Y back to y:
	if ( firstch === 'y' ) {
		word = firstch.toLowerCase() + word.substr( 1 );
	}
	return word;
}


// EXPORTS //

module.exports = porterStemmer;