GB18030Decoder.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import { decoderError } from "../../encoding/encodings";
  2. import { finished } from "../../encoding/finished";
  3. import { index, indexCodePointFor, indexGB18030RangesCodePointFor } from "../../encoding/indexes";
  4. import { end_of_stream, isASCIIByte } from "../../encoding/terminology";
  5. import { inRange } from "../../encoding/utilities";
  6. /**
  7. * @constructor
  8. * @implements {Decoder}
  9. * @param {{fatal: boolean}} options
  10. */
  11. export class GB18030Decoder {
  12. constructor(options) {
  13. this.fatal = options.fatal;
  14. // gb18030's decoder has an associated gb18030 first, gb18030
  15. // second, and gb18030 third (all initially 0x00).
  16. /** @type {number} */ this.gb18030_first = 0x00,
  17. /** @type {number} */ this.gb18030_second = 0x00,
  18. /** @type {number} */ this.gb18030_third = 0x00;
  19. }
  20. /**
  21. * @param {Stream} stream The stream of bytes being decoded.
  22. * @param {number} bite The next byte read from the stream.
  23. * @return {?(number|!Array.<number>)} The next code point(s)
  24. * decoded, or null if not enough data exists in the input
  25. * stream to decode a complete code point.
  26. */
  27. handler(stream, bite) {
  28. // 1. If byte is end-of-stream and gb18030 first, gb18030
  29. // second, and gb18030 third are 0x00, return finished.
  30. if (bite === end_of_stream && this.gb18030_first === 0x00 &&
  31. this.gb18030_second === 0x00 && this.gb18030_third === 0x00) {
  32. return finished;
  33. }
  34. // 2. If byte is end-of-stream, and gb18030 first, gb18030
  35. // second, or gb18030 third is not 0x00, set gb18030 first,
  36. // gb18030 second, and gb18030 third to 0x00, and return error.
  37. if (bite === end_of_stream &&
  38. (this.gb18030_first !== 0x00 || this.gb18030_second !== 0x00 ||
  39. this.gb18030_third !== 0x00)) {
  40. this.gb18030_first = 0x00;
  41. this.gb18030_second = 0x00;
  42. this.gb18030_third = 0x00;
  43. decoderError(this.fatal);
  44. }
  45. let code_point;
  46. // 3. If gb18030 third is not 0x00, run these substeps:
  47. if (this.gb18030_third !== 0x00) {
  48. // 1. Let code point be null.
  49. code_point = null;
  50. // 2. If byte is in the range 0x30 to 0x39, inclusive, set
  51. // code point to the index gb18030 ranges code point for
  52. // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
  53. // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
  54. if (inRange(bite, 0x30, 0x39)) {
  55. code_point = indexGB18030RangesCodePointFor((((this.gb18030_first - 0x81) * 10 + this.gb18030_second - 0x30) * 126 +
  56. this.gb18030_third - 0x81) * 10 + bite - 0x30);
  57. }
  58. // 3. Let buffer be a byte sequence consisting of gb18030
  59. // second, gb18030 third, and byte, in order.
  60. const buffer = [this.gb18030_second, this.gb18030_third, bite];
  61. // 4. Set gb18030 first, gb18030 second, and gb18030 third to
  62. // 0x00.
  63. this.gb18030_first = 0x00;
  64. this.gb18030_second = 0x00;
  65. this.gb18030_third = 0x00;
  66. // 5. If code point is null, prepend buffer to stream and
  67. // return error.
  68. if (code_point === null) {
  69. stream.prepend(buffer);
  70. return decoderError(this.fatal);
  71. }
  72. // 6. Return a code point whose value is code point.
  73. return code_point;
  74. }
  75. // 4. If gb18030 second is not 0x00, run these substeps:
  76. if (this.gb18030_second !== 0x00) {
  77. // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
  78. // gb18030 third to byte and return continue.
  79. if (inRange(bite, 0x81, 0xFE)) {
  80. this.gb18030_third = bite;
  81. return null;
  82. }
  83. // 2. Prepend gb18030 second followed by byte to stream, set
  84. // gb18030 first and gb18030 second to 0x00, and return error.
  85. stream.prepend([this.gb18030_second, bite]);
  86. this.gb18030_first = 0x00;
  87. this.gb18030_second = 0x00;
  88. return decoderError(this.fatal);
  89. }
  90. // 5. If gb18030 first is not 0x00, run these substeps:
  91. if (this.gb18030_first !== 0x00) {
  92. // 1. If byte is in the range 0x30 to 0x39, inclusive, set
  93. // gb18030 second to byte and return continue.
  94. if (inRange(bite, 0x30, 0x39)) {
  95. this.gb18030_second = bite;
  96. return null;
  97. }
  98. // 2. Let lead be gb18030 first, let pointer be null, and set
  99. // gb18030 first to 0x00.
  100. const lead = this.gb18030_first;
  101. let pointer = null;
  102. this.gb18030_first = 0x00;
  103. // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
  104. // otherwise.
  105. const offset = bite < 0x7F ? 0x40 : 0x41;
  106. // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  107. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
  108. // (byte − offset).
  109. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
  110. pointer = (lead - 0x81) * 190 + (bite - offset);
  111. // 5. Let code point be null if pointer is null and the index
  112. // code point for pointer in index gb18030 otherwise.
  113. code_point = pointer === null ? null :
  114. indexCodePointFor(pointer, index('gb18030'));
  115. // 6. If code point is null and byte is an ASCII byte, prepend
  116. // byte to stream.
  117. if (code_point === null && isASCIIByte(bite))
  118. stream.prepend(bite);
  119. // 7. If code point is null, return error.
  120. if (code_point === null)
  121. return decoderError(this.fatal);
  122. // 8. Return a code point whose value is code point.
  123. return code_point;
  124. }
  125. // 6. If byte is an ASCII byte, return a code point whose value
  126. // is byte.
  127. if (isASCIIByte(bite))
  128. return bite;
  129. // 7. If byte is 0x80, return code point U+20AC.
  130. if (bite === 0x80)
  131. return 0x20AC;
  132. // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
  133. // gb18030 first to byte and return continue.
  134. if (inRange(bite, 0x81, 0xFE)) {
  135. this.gb18030_first = bite;
  136. return null;
  137. }
  138. // 9. Return error.
  139. return decoderError(this.fatal);
  140. }
  141. }
  142. //# sourceMappingURL=GB18030Decoder.js.map