| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- "use strict";
- Object.defineProperty(exports, "__esModule", { value: true });
- var encodings_1 = require("../../encoding/encodings");
- var finished_1 = require("../../encoding/finished");
- var terminology_1 = require("../../encoding/terminology");
- var utilities_1 = require("../../encoding/utilities");
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- var UTF8Decoder = /** @class */ (function () {
- function UTF8Decoder(options) {
- this.fatal = options.fatal;
- // utf-8's decoder's has an associated utf-8 code point, utf-8
- // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
- // lower boundary (initially 0x80), and a utf-8 upper boundary
- // (initially 0xBF).
- /** @type {number} */ this.utf8_code_point = 0,
- /** @type {number} */ this.utf8_bytes_seen = 0,
- /** @type {number} */ this.utf8_bytes_needed = 0,
- /** @type {number} */ this.utf8_lower_boundary = 0x80,
- /** @type {number} */ this.utf8_upper_boundary = 0xBF;
- }
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- UTF8Decoder.prototype.handler = function (stream, bite) {
- // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
- // set utf-8 bytes needed to 0 and return error.
- if (bite === terminology_1.end_of_stream && this.utf8_bytes_needed !== 0) {
- this.utf8_bytes_needed = 0;
- return encodings_1.decoderError(this.fatal);
- }
- // 2. If byte is end-of-stream, return finished.
- if (bite === terminology_1.end_of_stream)
- return finished_1.finished;
- // 3. If utf-8 bytes needed is 0, based on byte:
- if (this.utf8_bytes_needed === 0) {
- // 0x00 to 0x7F
- if (utilities_1.inRange(bite, 0x00, 0x7F)) {
- // Return a code point whose value is byte.
- return bite;
- }
- // 0xC2 to 0xDF
- else if (utilities_1.inRange(bite, 0xC2, 0xDF)) {
- // 1. Set utf-8 bytes needed to 1.
- this.utf8_bytes_needed = 1;
- // 2. Set UTF-8 code point to byte & 0x1F.
- this.utf8_code_point = bite & 0x1F;
- }
- // 0xE0 to 0xEF
- else if (utilities_1.inRange(bite, 0xE0, 0xEF)) {
- // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
- if (bite === 0xE0)
- this.utf8_lower_boundary = 0xA0;
- // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
- if (bite === 0xED)
- this.utf8_upper_boundary = 0x9F;
- // 3. Set utf-8 bytes needed to 2.
- this.utf8_bytes_needed = 2;
- // 4. Set UTF-8 code point to byte & 0xF.
- this.utf8_code_point = bite & 0xF;
- }
- // 0xF0 to 0xF4
- else if (utilities_1.inRange(bite, 0xF0, 0xF4)) {
- // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
- if (bite === 0xF0)
- this.utf8_lower_boundary = 0x90;
- // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
- if (bite === 0xF4)
- this.utf8_upper_boundary = 0x8F;
- // 3. Set utf-8 bytes needed to 3.
- this.utf8_bytes_needed = 3;
- // 4. Set UTF-8 code point to byte & 0x7.
- this.utf8_code_point = bite & 0x7;
- }
- // Otherwise
- else {
- // Return error.
- return encodings_1.decoderError(this.fatal);
- }
- // Return continue.
- return null;
- }
- // 4. If byte is not in the range utf-8 lower boundary to utf-8
- // upper boundary, inclusive, run these substeps:
- if (!utilities_1.inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) {
- // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
- // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
- // utf-8 upper boundary to 0xBF.
- this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
- this.utf8_lower_boundary = 0x80;
- this.utf8_upper_boundary = 0xBF;
- // 2. Prepend byte to stream.
- stream.prepend(bite);
- // 3. Return error.
- return encodings_1.decoderError(this.fatal);
- }
- // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
- // to 0xBF.
- this.utf8_lower_boundary = 0x80;
- this.utf8_upper_boundary = 0xBF;
- // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
- // 0x3F)
- this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F);
- // 7. Increase utf-8 bytes seen by one.
- this.utf8_bytes_seen += 1;
- // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
- // continue.
- if (this.utf8_bytes_seen !== this.utf8_bytes_needed)
- return null;
- // 9. Let code point be utf-8 code point.
- var code_point = this.utf8_code_point;
- // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
- // seen to 0.
- this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
- // 11. Return a code point whose value is code point.
- return code_point;
- };
- return UTF8Decoder;
- }());
- exports.UTF8Decoder = UTF8Decoder;
- //# sourceMappingURL=UTF8Decoder.js.map
|