/*! * Shim implementation of the TextEncoder, TextDecoder spec: * http://encoding.spec.whatwg.org/#interface-textencoder * * http://code.google.com/p/stringencoding/source/browse/encoding.js * 09b44d71759d on Sep 19, 2013 * Used under Apache License 2.0 - http://code.google.com/p/stringencoding/ * * Filer: modified to remove non-utf8 aspects, converted to CommonJS */ (function(global) { 'use strict'; // // Utilities // /** * @param {number} a The number to test. * @param {number} min The minimum value in the range, inclusive. * @param {number} max The maximum value in the range, inclusive. * @return {boolean} True if a >= min and a <= max. */ function inRange(a, min, max) { return min <= a && a <= max; } /** * @param {number} n The numerator. * @param {number} d The denominator. * @return {number} The result of the integer division of n by d. */ function div(n, d) { return Math.floor(n / d); } // // Implementation of Encoding specification // http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html // // // 3. Terminology // // // 4. Encodings // /** @const */ var EOF_byte = -1; /** @const */ var EOF_code_point = -1; /** * @constructor * @param {Uint8Array} bytes Array of bytes that provide the stream. */ function ByteInputStream(bytes) { /** @type {number} */ var pos = 0; /** @return {number} Get the next byte from the stream. */ this.get = function() { return (pos >= bytes.length) ? EOF_byte : Number(bytes[pos]); }; /** @param {number} n Number (positive or negative) by which to * offset the byte pointer. */ this.offset = function(n) { pos += n; if (pos < 0) { throw new Error('Seeking past start of the buffer'); } if (pos > bytes.length) { throw new Error('Seeking past EOF'); } }; /** * @param {Array.} test Array of bytes to compare against. * @return {boolean} True if the start of the stream matches the test * bytes. */ this.match = function(test) { if (test.length > pos + bytes.length) { return false; } var i; for (i = 0; i < test.length; i += 1) { if (Number(bytes[pos + i]) !== test[i]) { return false; } } return true; }; } /** * @constructor * @param {Array.} bytes The array to write bytes into. */ function ByteOutputStream(bytes) { /** @type {number} */ var pos = 0; /** * @param {...number} var_args The byte or bytes to emit into the stream. * @return {number} The last byte emitted. */ this.emit = function(var_args) { /** @type {number} */ var last = EOF_byte; var i; for (i = 0; i < arguments.length; ++i) { last = Number(arguments[i]); bytes[pos++] = last; } return last; }; } /** * @constructor * @param {string} string The source of code units for the stream. */ function CodePointInputStream(string) { /** * @param {string} string Input string of UTF-16 code units. * @return {Array.} Code points. */ function stringToCodePoints(string) { /** @type {Array.} */ var cps = []; // Based on http://www.w3.org/TR/WebIDL/#idl-DOMString var i = 0, n = string.length; while (i < string.length) { var c = string.charCodeAt(i); if (!inRange(c, 0xD800, 0xDFFF)) { cps.push(c); } else if (inRange(c, 0xDC00, 0xDFFF)) { cps.push(0xFFFD); } else { // (inRange(cu, 0xD800, 0xDBFF)) if (i === n - 1) { cps.push(0xFFFD); } else { var d = string.charCodeAt(i + 1); if (inRange(d, 0xDC00, 0xDFFF)) { var a = c & 0x3FF; var b = d & 0x3FF; i += 1; cps.push(0x10000 + (a << 10) + b); } else { cps.push(0xFFFD); } } } i += 1; } return cps; } /** @type {number} */ var pos = 0; /** @type {Array.} */ var cps = stringToCodePoints(string); /** @param {number} n The number of bytes (positive or negative) * to advance the code point pointer by.*/ this.offset = function(n) { pos += n; if (pos < 0) { throw new Error('Seeking past start of the buffer'); } if (pos > cps.length) { throw new Error('Seeking past EOF'); } }; /** @return {number} Get the next code point from the stream. */ this.get = function() { if (pos >= cps.length) { return EOF_code_point; } return cps[pos]; }; } /** * @constructor */ function CodePointOutputStream() { /** @type {string} */ var string = ''; /** @return {string} The accumulated string. */ this.string = function() { return string; }; /** @param {number} c The code point to encode into the stream. */ this.emit = function(c) { if (c <= 0xFFFF) { string += String.fromCharCode(c); } else { c -= 0x10000; string += String.fromCharCode(0xD800 + ((c >> 10) & 0x3ff)); string += String.fromCharCode(0xDC00 + (c & 0x3ff)); } }; } /** * @constructor * @param {string} message Description of the error. */ function EncodingError(message) { this.name = 'EncodingError'; this.message = message; this.code = 0; } EncodingError.prototype = Error.prototype; /** * @param {boolean} fatal If true, decoding errors raise an exception. * @param {number=} opt_code_point Override the standard fallback code point. * @return {number} The code point to insert on a decoding error. */ function decoderError(fatal, opt_code_point) { if (fatal) { throw new EncodingError('Decoder error'); } return opt_code_point || 0xFFFD; } /** * @param {number} code_point The code point that could not be encoded. */ function encoderError(code_point) { throw new EncodingError('The code point ' + code_point + ' could not be encoded.'); } /** * @param {string} label The encoding label. * @return {?{name:string,labels:Array.}} */ function getEncoding(label) { label = String(label).trim().toLowerCase(); if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) { return label_to_encoding[label]; } return null; } /** @type {Array.<{encodings: Array.<{name:string,labels:Array.}>, * heading: string}>} */ var encodings = [ { "encodings": [ { "labels": [ "unicode-1-1-utf-8", "utf-8", "utf8" ], "name": "utf-8" } ], "heading": "The Encoding" } // XXXfiler - removed non-utf8 aspects ]; var name_to_encoding = {}; var label_to_encoding = {}; encodings.forEach(function(category) { category.encodings.forEach(function(encoding) { name_to_encoding[encoding.name] = encoding; encoding.labels.forEach(function(label) { label_to_encoding[label] = encoding; }); }); }); // // 7. The encoding // // 7.1 utf-8 /** * @constructor * @param {{fatal: boolean}} options */ function UTF8Decoder(options) { var fatal = options.fatal; var /** @type {number} */ utf8_code_point = 0, /** @type {number} */ utf8_bytes_needed = 0, /** @type {number} */ utf8_bytes_seen = 0, /** @type {number} */ utf8_lower_boundary = 0; /** * @param {ByteInputStream} byte_pointer The byte stream to decode. * @return {?number} The next code point decoded, or null if not enough * data exists in the input stream to decode a complete code point. */ this.decode = function(byte_pointer) { var bite = byte_pointer.get(); if (bite === EOF_byte) { if (utf8_bytes_needed !== 0) { return decoderError(fatal); } return EOF_code_point; } byte_pointer.offset(1); if (utf8_bytes_needed === 0) { if (inRange(bite, 0x00, 0x7F)) { return bite; } if (inRange(bite, 0xC2, 0xDF)) { utf8_bytes_needed = 1; utf8_lower_boundary = 0x80; utf8_code_point = bite - 0xC0; } else if (inRange(bite, 0xE0, 0xEF)) { utf8_bytes_needed = 2; utf8_lower_boundary = 0x800; utf8_code_point = bite - 0xE0; } else if (inRange(bite, 0xF0, 0xF4)) { utf8_bytes_needed = 3; utf8_lower_boundary = 0x10000; utf8_code_point = bite - 0xF0; } else { return decoderError(fatal); } utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed); return null; } if (!inRange(bite, 0x80, 0xBF)) { utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; byte_pointer.offset(-1); return decoderError(fatal); } utf8_bytes_seen += 1; utf8_code_point = utf8_code_point + (bite - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen); if (utf8_bytes_seen !== utf8_bytes_needed) { return null; } var code_point = utf8_code_point; var lower_boundary = utf8_lower_boundary; utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; if (inRange(code_point, lower_boundary, 0x10FFFF) && !inRange(code_point, 0xD800, 0xDFFF)) { return code_point; } return decoderError(fatal); }; } /** * @constructor * @param {{fatal: boolean}} options */ function UTF8Encoder(options) { var fatal = options.fatal; /** * @param {ByteOutputStream} output_byte_stream Output byte stream. * @param {CodePointInputStream} code_point_pointer Input stream. * @return {number} The last byte emitted. */ this.encode = function(output_byte_stream, code_point_pointer) { var code_point = code_point_pointer.get(); if (code_point === EOF_code_point) { return EOF_byte; } code_point_pointer.offset(1); if (inRange(code_point, 0xD800, 0xDFFF)) { return encoderError(code_point); } if (inRange(code_point, 0x0000, 0x007f)) { return output_byte_stream.emit(code_point); } var count, offset; if (inRange(code_point, 0x0080, 0x07FF)) { count = 1; offset = 0xC0; } else if (inRange(code_point, 0x0800, 0xFFFF)) { count = 2; offset = 0xE0; } else if (inRange(code_point, 0x10000, 0x10FFFF)) { count = 3; offset = 0xF0; } var result = output_byte_stream.emit( div(code_point, Math.pow(64, count)) + offset); while (count > 0) { var temp = div(code_point, Math.pow(64, count - 1)); result = output_byte_stream.emit(0x80 + (temp % 64)); count -= 1; } return result; }; } name_to_encoding['utf-8'].getEncoder = function(options) { return new UTF8Encoder(options); }; name_to_encoding['utf-8'].getDecoder = function(options) { return new UTF8Decoder(options); }; // // Implementation of Text Encoding Web API // /** @const */ var DEFAULT_ENCODING = 'utf-8'; /** * @constructor * @param {string=} opt_encoding The label of the encoding; * defaults to 'utf-8'. * @param {{fatal: boolean}=} options */ function TextEncoder(opt_encoding, options) { if (!(this instanceof TextEncoder)) { throw new TypeError('Constructor cannot be called as a function'); } opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING; options = Object(options); /** @private */ this._encoding = getEncoding(opt_encoding); if (this._encoding === null || (this._encoding.name !== 'utf-8' && this._encoding.name !== 'utf-16le' && this._encoding.name !== 'utf-16be')) throw new TypeError('Unknown encoding: ' + opt_encoding); /** @private @type {boolean} */ this._streaming = false; /** @private */ this._encoder = null; /** @private @type {{fatal: boolean}=} */ this._options = { fatal: Boolean(options.fatal) }; if (Object.defineProperty) { Object.defineProperty( this, 'encoding', { get: function() { return this._encoding.name; } }); } else { this.encoding = this._encoding.name; } return this; } TextEncoder.prototype = { /** * @param {string=} opt_string The string to encode. * @param {{stream: boolean}=} options */ encode: function encode(opt_string, options) { opt_string = opt_string ? String(opt_string) : ''; options = Object(options); // TODO: any options? if (!this._streaming) { this._encoder = this._encoding.getEncoder(this._options); } this._streaming = Boolean(options.stream); var bytes = []; var output_stream = new ByteOutputStream(bytes); var input_stream = new CodePointInputStream(opt_string); while (input_stream.get() !== EOF_code_point) { this._encoder.encode(output_stream, input_stream); } if (!this._streaming) { var last_byte; do { last_byte = this._encoder.encode(output_stream, input_stream); } while (last_byte !== EOF_byte); this._encoder = null; } return new Uint8Array(bytes); } }; /** * @constructor * @param {string=} opt_encoding The label of the encoding; * defaults to 'utf-8'. * @param {{fatal: boolean}=} options */ function TextDecoder(opt_encoding, options) { if (!(this instanceof TextDecoder)) { throw new TypeError('Constructor cannot be called as a function'); } opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING; options = Object(options); /** @private */ this._encoding = getEncoding(opt_encoding); if (this._encoding === null) throw new TypeError('Unknown encoding: ' + opt_encoding); /** @private @type {boolean} */ this._streaming = false; /** @private */ this._decoder = null; /** @private @type {{fatal: boolean}=} */ this._options = { fatal: Boolean(options.fatal) }; if (Object.defineProperty) { Object.defineProperty( this, 'encoding', { get: function() { return this._encoding.name; } }); } else { this.encoding = this._encoding.name; } return this; } // TODO: Issue if input byte stream is offset by decoder // TODO: BOM detection will not work if stream header spans multiple calls // (last N bytes of previous stream may need to be retained?) TextDecoder.prototype = { /** * @param {ArrayBufferView=} opt_view The buffer of bytes to decode. * @param {{stream: boolean}=} options */ decode: function decode(opt_view, options) { if (opt_view && !('buffer' in opt_view && 'byteOffset' in opt_view && 'byteLength' in opt_view)) { throw new TypeError('Expected ArrayBufferView'); } else if (!opt_view) { opt_view = new Uint8Array(0); } options = Object(options); if (!this._streaming) { this._decoder = this._encoding.getDecoder(this._options); this._BOMseen = false; } this._streaming = Boolean(options.stream); var bytes = new Uint8Array(opt_view.buffer, opt_view.byteOffset, opt_view.byteLength); var input_stream = new ByteInputStream(bytes); var output_stream = new CodePointOutputStream(), code_point; while (input_stream.get() !== EOF_byte) { code_point = this._decoder.decode(input_stream); if (code_point !== null && code_point !== EOF_code_point) { output_stream.emit(code_point); } } if (!this._streaming) { do { code_point = this._decoder.decode(input_stream); if (code_point !== null && code_point !== EOF_code_point) { output_stream.emit(code_point); } } while (code_point !== EOF_code_point && input_stream.get() != EOF_byte); this._decoder = null; } var result = output_stream.string(); if (!this._BOMseen && result.length) { this._BOMseen = true; if (['utf-8', 'utf-16le', 'utf-16be'].indexOf(this.encoding) !== -1 && result.charCodeAt(0) === 0xFEFF) { result = result.substring(1); } } return result; } }; // Prefer native impl if available module.exports = { TextEncoder: global['TextEncoder'] || TextEncoder, TextDecoder: global['TextDecoder'] || TextDecoder }; }(this));