| /*! https://mths.be/utf8js v2.1.2 by @mathias */ |
| |
| var stringFromCharCode = String.fromCharCode; |
| |
| // Taken from https://mths.be/punycode |
| function ucs2decode(string) { |
| var output = []; |
| var counter = 0; |
| var length = string.length; |
| var value; |
| var extra; |
| while (counter < length) { |
| value = string.charCodeAt(counter++); |
| if (value >= 0xD800 && value <= 0xDBFF && counter < length) { |
| // high surrogate, and there is a next character |
| extra = string.charCodeAt(counter++); |
| if ((extra & 0xFC00) == 0xDC00) { // low surrogate |
| output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); |
| } else { |
| // unmatched surrogate; only append this code unit, in case the next |
| // code unit is the high surrogate of a surrogate pair |
| output.push(value); |
| counter--; |
| } |
| } else { |
| output.push(value); |
| } |
| } |
| return output; |
| } |
| |
| // Taken from https://mths.be/punycode |
| function ucs2encode(array) { |
| var length = array.length; |
| var index = -1; |
| var value; |
| var output = ''; |
| while (++index < length) { |
| value = array[index]; |
| if (value > 0xFFFF) { |
| value -= 0x10000; |
| output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); |
| value = 0xDC00 | value & 0x3FF; |
| } |
| output += stringFromCharCode(value); |
| } |
| return output; |
| } |
| |
| function checkScalarValue(codePoint, strict) { |
| if (codePoint >= 0xD800 && codePoint <= 0xDFFF) { |
| if (strict) { |
| throw Error( |
| 'Lone surrogate U+' + codePoint.toString(16).toUpperCase() + |
| ' is not a scalar value' |
| ); |
| } |
| return false; |
| } |
| return true; |
| } |
| /*--------------------------------------------------------------------------*/ |
| |
| function createByte(codePoint, shift) { |
| return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80); |
| } |
| |
| function encodeCodePoint(codePoint, strict) { |
| if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence |
| return stringFromCharCode(codePoint); |
| } |
| var symbol = ''; |
| if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence |
| symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0); |
| } |
| else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence |
| if (!checkScalarValue(codePoint, strict)) { |
| codePoint = 0xFFFD; |
| } |
| symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0); |
| symbol += createByte(codePoint, 6); |
| } |
| else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence |
| symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0); |
| symbol += createByte(codePoint, 12); |
| symbol += createByte(codePoint, 6); |
| } |
| symbol += stringFromCharCode((codePoint & 0x3F) | 0x80); |
| return symbol; |
| } |
| |
| function utf8encode(string, opts) { |
| opts = opts || {}; |
| var strict = false !== opts.strict; |
| |
| var codePoints = ucs2decode(string); |
| var length = codePoints.length; |
| var index = -1; |
| var codePoint; |
| var byteString = ''; |
| while (++index < length) { |
| codePoint = codePoints[index]; |
| byteString += encodeCodePoint(codePoint, strict); |
| } |
| return byteString; |
| } |
| |
| /*--------------------------------------------------------------------------*/ |
| |
| function readContinuationByte() { |
| if (byteIndex >= byteCount) { |
| throw Error('Invalid byte index'); |
| } |
| |
| var continuationByte = byteArray[byteIndex] & 0xFF; |
| byteIndex++; |
| |
| if ((continuationByte & 0xC0) == 0x80) { |
| return continuationByte & 0x3F; |
| } |
| |
| // If we end up here, it’s not a continuation byte |
| throw Error('Invalid continuation byte'); |
| } |
| |
| function decodeSymbol(strict) { |
| var byte1; |
| var byte2; |
| var byte3; |
| var byte4; |
| var codePoint; |
| |
| if (byteIndex > byteCount) { |
| throw Error('Invalid byte index'); |
| } |
| |
| if (byteIndex == byteCount) { |
| return false; |
| } |
| |
| // Read first byte |
| byte1 = byteArray[byteIndex] & 0xFF; |
| byteIndex++; |
| |
| // 1-byte sequence (no continuation bytes) |
| if ((byte1 & 0x80) == 0) { |
| return byte1; |
| } |
| |
| // 2-byte sequence |
| if ((byte1 & 0xE0) == 0xC0) { |
| byte2 = readContinuationByte(); |
| codePoint = ((byte1 & 0x1F) << 6) | byte2; |
| if (codePoint >= 0x80) { |
| return codePoint; |
| } else { |
| throw Error('Invalid continuation byte'); |
| } |
| } |
| |
| // 3-byte sequence (may include unpaired surrogates) |
| if ((byte1 & 0xF0) == 0xE0) { |
| byte2 = readContinuationByte(); |
| byte3 = readContinuationByte(); |
| codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3; |
| if (codePoint >= 0x0800) { |
| return checkScalarValue(codePoint, strict) ? codePoint : 0xFFFD; |
| } else { |
| throw Error('Invalid continuation byte'); |
| } |
| } |
| |
| // 4-byte sequence |
| if ((byte1 & 0xF8) == 0xF0) { |
| byte2 = readContinuationByte(); |
| byte3 = readContinuationByte(); |
| byte4 = readContinuationByte(); |
| codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | |
| (byte3 << 0x06) | byte4; |
| if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { |
| return codePoint; |
| } |
| } |
| |
| throw Error('Invalid UTF-8 detected'); |
| } |
| |
| var byteArray; |
| var byteCount; |
| var byteIndex; |
| function utf8decode(byteString, opts) { |
| opts = opts || {}; |
| var strict = false !== opts.strict; |
| |
| byteArray = ucs2decode(byteString); |
| byteCount = byteArray.length; |
| byteIndex = 0; |
| var codePoints = []; |
| var tmp; |
| while ((tmp = decodeSymbol(strict)) !== false) { |
| codePoints.push(tmp); |
| } |
| return ucs2encode(codePoints); |
| } |
| |
| module.exports = { |
| version: '2.1.2', |
| encode: utf8encode, |
| decode: utf8decode |
| }; |