UTF8Decoder.js 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. var encodings_1 = require("../../encoding/encodings");
  4. var finished_1 = require("../../encoding/finished");
  5. var terminology_1 = require("../../encoding/terminology");
  6. var utilities_1 = require("../../encoding/utilities");
  7. /**
  8. * @constructor
  9. * @implements {Decoder}
  10. * @param {{fatal: boolean}} options
  11. */
  12. var UTF8Decoder = /** @class */ (function () {
  13. function UTF8Decoder(options) {
  14. this.fatal = options.fatal;
  15. // utf-8's decoder's has an associated utf-8 code point, utf-8
  16. // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
  17. // lower boundary (initially 0x80), and a utf-8 upper boundary
  18. // (initially 0xBF).
  19. /** @type {number} */ this.utf8_code_point = 0,
  20. /** @type {number} */ this.utf8_bytes_seen = 0,
  21. /** @type {number} */ this.utf8_bytes_needed = 0,
  22. /** @type {number} */ this.utf8_lower_boundary = 0x80,
  23. /** @type {number} */ this.utf8_upper_boundary = 0xBF;
  24. }
  25. /**
  26. * @param {Stream} stream The stream of bytes being decoded.
  27. * @param {number} bite The next byte read from the stream.
  28. * @return {?(number|!Array.<number>)} The next code point(s)
  29. * decoded, or null if not enough data exists in the input
  30. * stream to decode a complete code point.
  31. */
  32. UTF8Decoder.prototype.handler = function (stream, bite) {
  33. // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
  34. // set utf-8 bytes needed to 0 and return error.
  35. if (bite === terminology_1.end_of_stream && this.utf8_bytes_needed !== 0) {
  36. this.utf8_bytes_needed = 0;
  37. return encodings_1.decoderError(this.fatal);
  38. }
  39. // 2. If byte is end-of-stream, return finished.
  40. if (bite === terminology_1.end_of_stream)
  41. return finished_1.finished;
  42. // 3. If utf-8 bytes needed is 0, based on byte:
  43. if (this.utf8_bytes_needed === 0) {
  44. // 0x00 to 0x7F
  45. if (utilities_1.inRange(bite, 0x00, 0x7F)) {
  46. // Return a code point whose value is byte.
  47. return bite;
  48. }
  49. // 0xC2 to 0xDF
  50. else if (utilities_1.inRange(bite, 0xC2, 0xDF)) {
  51. // 1. Set utf-8 bytes needed to 1.
  52. this.utf8_bytes_needed = 1;
  53. // 2. Set UTF-8 code point to byte & 0x1F.
  54. this.utf8_code_point = bite & 0x1F;
  55. }
  56. // 0xE0 to 0xEF
  57. else if (utilities_1.inRange(bite, 0xE0, 0xEF)) {
  58. // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
  59. if (bite === 0xE0)
  60. this.utf8_lower_boundary = 0xA0;
  61. // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
  62. if (bite === 0xED)
  63. this.utf8_upper_boundary = 0x9F;
  64. // 3. Set utf-8 bytes needed to 2.
  65. this.utf8_bytes_needed = 2;
  66. // 4. Set UTF-8 code point to byte & 0xF.
  67. this.utf8_code_point = bite & 0xF;
  68. }
  69. // 0xF0 to 0xF4
  70. else if (utilities_1.inRange(bite, 0xF0, 0xF4)) {
  71. // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
  72. if (bite === 0xF0)
  73. this.utf8_lower_boundary = 0x90;
  74. // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
  75. if (bite === 0xF4)
  76. this.utf8_upper_boundary = 0x8F;
  77. // 3. Set utf-8 bytes needed to 3.
  78. this.utf8_bytes_needed = 3;
  79. // 4. Set UTF-8 code point to byte & 0x7.
  80. this.utf8_code_point = bite & 0x7;
  81. }
  82. // Otherwise
  83. else {
  84. // Return error.
  85. return encodings_1.decoderError(this.fatal);
  86. }
  87. // Return continue.
  88. return null;
  89. }
  90. // 4. If byte is not in the range utf-8 lower boundary to utf-8
  91. // upper boundary, inclusive, run these substeps:
  92. if (!utilities_1.inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) {
  93. // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
  94. // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
  95. // utf-8 upper boundary to 0xBF.
  96. this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
  97. this.utf8_lower_boundary = 0x80;
  98. this.utf8_upper_boundary = 0xBF;
  99. // 2. Prepend byte to stream.
  100. stream.prepend(bite);
  101. // 3. Return error.
  102. return encodings_1.decoderError(this.fatal);
  103. }
  104. // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
  105. // to 0xBF.
  106. this.utf8_lower_boundary = 0x80;
  107. this.utf8_upper_boundary = 0xBF;
  108. // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
  109. // 0x3F)
  110. this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F);
  111. // 7. Increase utf-8 bytes seen by one.
  112. this.utf8_bytes_seen += 1;
  113. // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
  114. // continue.
  115. if (this.utf8_bytes_seen !== this.utf8_bytes_needed)
  116. return null;
  117. // 9. Let code point be utf-8 code point.
  118. var code_point = this.utf8_code_point;
  119. // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
  120. // seen to 0.
  121. this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
  122. // 11. Return a code point whose value is code point.
  123. return code_point;
  124. };
  125. return UTF8Decoder;
  126. }());
  127. exports.UTF8Decoder = UTF8Decoder;
  128. //# sourceMappingURL=UTF8Decoder.js.map