|  | ||
|---|---|---|
| .. | ||
| docs | ||
| lib | ||
| package.json | ||
| README.md | ||
tokenize
Tokenize a string.
Usage
var tokenize = require( '@stdlib/nlp/tokenize' );
tokenize( str[, keepWhitespace] )
Tokenizes a string.
var str = 'Hello Mrs. Maple, could you call me back?';
var out = tokenize( str );
// returns  [ 'Hello', 'Mrs.', 'Maple', ',', 'could', 'you', 'call', 'me', 'back', '?' ]
To include whitespace characters (spaces, tabs, line breaks) in the output array, set keepWhitespace to true.
var str = 'Hello World!\n';
var out = tokenize( str, true );
// returns  [ 'Hello', ' ', 'World', '!', '\n' ]
Examples
var tokenize = require( '@stdlib/nlp/tokenize' );
console.log( tokenize( 'Hello World!' ) );
// => [ 'Hello', 'World', '!' ]
console.log( tokenize( '' ) );
// => []
var str = 'Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod.';
console.log( tokenize( str ) );
/* =>
    [
        'Lorem',
        'ipsum',
        'dolor',
        'sit',
        'amet',
        ',',
        'consetetur',
        'sadipscing',
        'elitr',
        ',',
        'sed',
        'diam',
        'nonumy',
        'eirmod',
        '.'
    ]
*/