/** * @license Apache-2.0 * * Copyright (c) 2020 The Stdlib Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; // MODULES // var isBoolean = require( '@stdlib/assert/is-boolean' ).isPrimitive; var isString = require( '@stdlib/assert/is-string' ).isPrimitive; var isNonNegativeInteger = require( '@stdlib/assert/is-nonnegative-integer' ).isPrimitive; var format = require( './../../format' ); // VARIABLES // // Factors for converting individual surrogates var Ox10000 = 0x10000|0; // 65536 var Ox400 = 0x400|0; // 1024 // Range for a high surrogate var OxD800 = 0xD800|0; // 55296 var OxDBFF = 0xDBFF|0; // 56319 // Range for a low surrogate var OxDC00 = 0xDC00|0; // 56320 var OxDFFF = 0xDFFF|0; // 57343 // MAIN // /** * Returns a Unicode code point from a string at a specified position. * * ## Notes * * - UTF-16 encoding uses one 16-bit unit for non-surrogates (U+0000 to U+D7FF and U+E000 to U+FFFF). * - UTF-16 encoding uses two 16-bit units (surrogate pairs) for U+10000 to U+10FFFF and encodes U+10000-U+10FFFF by subtracting 0x10000 from the code point, expressing the result as a 20-bit binary, and splitting the 20 bits of 0x0-0xFFFFF as upper and lower 10-bits. The respective 10-bits are stored in two 16-bit words: a high and a low surrogate. * * * @param {string} str - input string * @param {NonNegativeInteger} idx - position * @param {boolean} [backward=false] - backward iteration for low surrogates * @throws {TypeError} first argument must be a string * @throws {TypeError} second argument must be a number having a nonnegative integer * @throws {TypeError} third argument must be a boolean * @throws {RangeError} position must be a valid index in string * @returns {NonNegativeInteger} code point * * @example * var out = codePointAt( 'last man standing', 4 ); * // returns 32 * * @example * var out = codePointAt( 'presidential election', 8, true ); * // returns 116 * * @example * var out = codePointAt( 'अनुच्छेद', 2 ); * // returns 2369 * * @example * var out = codePointAt( '🌷', 1, true ); * // returns 127799 */ function codePointAt( str, idx, backward ) { var code; var FLG; var low; var hi; if ( !isString( str ) ) { throw new TypeError( format( 'invalid argument. Must provide a string. Value: `%s`.', str ) ); } if ( !isNonNegativeInteger( idx ) ) { throw new TypeError( format( 'invalid argument. Must provide a valid position (i.e., a nonnegative integer). Value: `%s`.', idx ) ); } if ( idx >= str.length ) { throw new RangeError( format( 'invalid argument. Must provide a valid position (i.e., within string bounds). Value: `%u`.', idx ) ); } if ( arguments.length > 2 ) { if ( !isBoolean( backward ) ) { throw new TypeError( format( 'invalid argument. Third argument must be a boolean. Value: `%s`.', backward ) ); } FLG = backward; } else { FLG = false; } code = str.charCodeAt( idx ); // High surrogate if ( code >= OxD800 && code <= OxDBFF && idx < str.length - 1 ) { hi = code; low = str.charCodeAt( idx+1 ); if ( OxDC00 <= low && low <= OxDFFF ) { return ( ( hi - OxD800 ) * Ox400 ) + ( low - OxDC00 ) + Ox10000; } return hi; } // Low surrogate - support only if backward iteration is desired if ( FLG ) { if ( code >= OxDC00 && code <= OxDFFF && idx >= 1 ) { hi = str.charCodeAt( idx-1 ); low = code; if ( OxD800 <= hi && hi <= OxDBFF ) { return ( ( hi - OxD800 ) * Ox400 ) + ( low - OxDC00 ) + Ox10000; } return low; } } return code; } // EXPORTS // module.exports = codePointAt;