/**
* @namespace
* @name pc.string
* @description Extended String API
*/
pc.string = function () {
var ASCII_LOWERCASE = "abcdefghijklmnopqrstuvwxyz";
var ASCII_UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
var ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE;
var HIGH_SURROGATE_BEGIN = 0xD800;
var HIGH_SURROGATE_END = 0xDBFF;
var LOW_SURROGATE_BEGIN = 0xDC00;
var LOW_SURROGATE_END = 0xDFFF;
var ZERO_WIDTH_JOINER = 0x200D;
// Flag emoji
var REGIONAL_INDICATOR_BEGIN = 0x1F1E6;
var REGIONAL_INDICATOR_END = 0x1F1FF;
// Skin color modifications to emoji
var FITZPATRICK_MODIFIER_BEGIN = 0x1F3FB;
var FITZPATRICK_MODIFIER_END = 0x1F3FF;
// Accent characters
var DIACRITICAL_MARKS_BEGIN = 0x20D0;
var DIACRITICAL_MARKS_END = 0x20FF;
// Special emoji joins
var VARIATION_MODIFIER_BEGIN = 0xFE00;
var VARIATION_MODIFIER_END = 0xFE0F;
function getCodePointData(string, i) {
var size = string.length;
i = i || 0;
// Account for out-of-bounds indices:
if (i < 0 || i >= size) {
return null;
}
var first = string.charCodeAt(i);
var second;
if (size > 1 && first >= HIGH_SURROGATE_BEGIN && first <= HIGH_SURROGATE_END) {
second = string.charCodeAt(i + 1);
if (second >= LOW_SURROGATE_BEGIN && second <= LOW_SURROGATE_END) {
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
return { code: (first - HIGH_SURROGATE_BEGIN) * 0x400 + second - LOW_SURROGATE_BEGIN + 0x10000, long: true };
}
}
return { code: first, long: false };
}
function isCodeBetween(string, begin, end) {
if (!string)
return false;
var codeData = getCodePointData(string);
if (codeData) {
var code = codeData.code;
return code >= begin && code <= end;
}
return false;
}
function numCharsToTakeForNextSymbol(string, index) {
if (index === string.length - 1) {
// Last character in the string, so we can only take 1
return 1;
}
if (isCodeBetween(string[index], HIGH_SURROGATE_BEGIN, HIGH_SURROGATE_END)) {
var first = string.substring(index, index + 2);
var second = string.substring(index + 2, index + 4);
// check if second character is fitzpatrick (color) modifier
// or if this is a pair of regional indicators (a flag)
if (isCodeBetween(second, FITZPATRICK_MODIFIER_BEGIN, FITZPATRICK_MODIFIER_END) ||
(isCodeBetween(first, REGIONAL_INDICATOR_BEGIN, REGIONAL_INDICATOR_END) &&
isCodeBetween(second, REGIONAL_INDICATOR_BEGIN, REGIONAL_INDICATOR_END))
) {
return 4;
}
// check if next character is a modifier, in which case we should return it
if (isCodeBetween(second, VARIATION_MODIFIER_BEGIN, VARIATION_MODIFIER_END)) {
return 3;
}
// return surrogate pair
return 2;
}
// check if next character is the emoji modifier, in which case we should include it
if (isCodeBetween(string[index + 1], VARIATION_MODIFIER_BEGIN, VARIATION_MODIFIER_END)) {
return 2;
}
// just a regular character
return 1;
}
return {
/**
* @name pc.string.ASCII_LOWERCASE
* @description All lowercase letters
* @type String
*/
ASCII_LOWERCASE: ASCII_LOWERCASE,
/**
* @name pc.string.ASCII_UPPERCASE
* @description All uppercase letters
* @type String
*/
ASCII_UPPERCASE: ASCII_UPPERCASE,
/**
* @name pc.string.ASCII_LETTERS
* @description All ASCII letters
* @type String
*/
ASCII_LETTERS: ASCII_LETTERS,
/**
* @function
* @name pc.string.format
* @description Return a string with {n} replaced with the n-th argument
* @param {String} s The string to format
* @param {Object} [arguments] All other arguments are substituted into the string
* @returns {String} The formatted string
* @example
* var s = pc.string.format("Hello {0}", "world");
* console.log(s); // Prints "Hello world"
*/
format: function (s) {
var i = 0,
regexp,
args = pc.makeArray(arguments);
// drop first argument
args.shift();
for (i = 0; i < args.length; i++) {
regexp = new RegExp('\\{' + i + '\\}', 'gi');
s = s.replace(regexp, args[i]);
}
return s;
},
/**
* @private
* @function
* @name pc.string.startsWith
* @description Check if a string s starts with another string subs
* @param {String} s The string to look in
* @param {String} subs The string to look for
* @returns {Boolean} True if s starts with subs
* @deprecated
* @example
* var s = "abc";
* if (pc.string.startsWith(s, "a")) {
* console.log('Starts with a');
* }
*/
startsWith: function (s, subs) {
console.warn("WARNING: startsWith: Function is deprecated. Use String.startsWith instead.");
return s.startsWith(subs);
},
/**
* @private
* @function
* @name pc.string.endsWith
* @description Check if a string s ends with another string subs
* @param {String} s The string to look in
* @param {String} subs The string to look for
* @returns {Boolean} True if s ends with subs
* @deprecated
*/
endsWith: function (s, subs) {
console.warn("WARNING: endsWith: Function is deprecated. Use String.endsWith instead.");
return s.endsWith(subs);
},
/**
* @function
* @name pc.string.toBool
* @description Convert a string value to a boolean. In non-strict mode (the default), 'true' is converted to true, all other values
* are converted to false. In strict mode, 'true' is converted to true, 'false' is converted to false, all other values will throw
* an Exception.
* @param {String} s The string to convert
* @param {Boolean} [strict] In strict mode an Exception is thrown if s is not an accepted string value. Defaults to false
* @returns {Boolean} The converted value
*/
toBool: function (s, strict) {
if (s === 'true') {
return true;
}
if (strict) {
if (s === 'false') {
return false;
}
throw new TypeError('Not a boolean string');
}
return false;
},
/**
* @function
* @name pc.string.getCodePoint
* @description Get the code point number for a character in a string. Polyfill for
* [<code>codePointAt</code>]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt}.
* @param {String} string The string to get the code point from
* @param {Number} [i] The index in the string
* @returns {Number} The code point value for the character in the string
*/
getCodePoint: function (string, i) {
var codePointData = getCodePointData(string, i);
return codePointData && codePointData.code;
},
/**
* @function
* @name pc.string.getCodePoints
* @description Gets an array of all code points in a string
* @param {String} string The string to get code points from
* @returns {Number[]} The code points in the string
*/
getCodePoints: function (string) {
if (typeof string !== 'string') {
throw new TypeError('Not a string');
}
var i = 0;
var arr = [];
var codePoint;
while (!!(codePoint = getCodePointData(string, i))) {
arr.push(codePoint.code);
i += codePoint.long ? 2 : 1;
}
return arr;
},
/**
* @function
* @name pc.string.getSymbols
* @description Gets an array of all grapheme clusters (visible symbols) in a string. This is needed because
* some symbols (such as emoji or accented characters) are actually made up of multiple character codes.
* @param {String} string The string to break into symbols
* @returns {String[]} The symbols in the string
* @see {@link https://mathiasbynens.be/notes/javascript-unicode Unicode strings in JavaScript}
*/
getSymbols: function (string) {
if (typeof string !== 'string') {
throw new TypeError('Not a string');
}
var index = 0;
var length = string.length;
var output = [];
var take = 0;
var ch;
while (index < length) {
take += numCharsToTakeForNextSymbol(string, index + take);
ch = string[index + take];
// Handle special cases
if (isCodeBetween(ch, DIACRITICAL_MARKS_BEGIN, DIACRITICAL_MARKS_END)) {
ch = string[index + (take++)];
}
if (isCodeBetween(ch, VARIATION_MODIFIER_BEGIN, VARIATION_MODIFIER_END)) {
ch = string[index + (take++)];
}
if (ch && ch.charCodeAt(0) === ZERO_WIDTH_JOINER) {
ch = string[index + (take++)];
// Not a complete char yet
continue;
}
var char = string.substring(index, index + take);
output.push(char);
index += take;
take = 0;
}
return output;
},
/**
* @function
* @name pc.string.fromCodePoint
* @description Get the string for a given code point or set of code points. Polyfill for
* [<code>fromCodePoint</code>]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint}.
* @param {...Number} args The code points to convert to a string
* @returns {String} The converted string
*/
fromCodePoint: function (/* ...args */) {
var chars = [];
var current;
var codePoint;
var units;
for (var i = 0; i < arguments.length; ++i) {
current = Number(arguments[i]);
codePoint = current - 0x10000;
units = current > 0xFFFF ? [(codePoint >> 10) + 0xD800, (codePoint % 0x400) + 0xDC00] : [current];
chars.push(String.fromCharCode.apply(null, units));
}
return chars.join('');
}
};
}();