decode.d.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. export declare enum DecodingMode {
  2. /** Entities in text nodes that can end with any character. */
  3. Legacy = 0,
  4. /** Only allow entities terminated with a semicolon. */
  5. Strict = 1,
  6. /** Entities in attributes have limitations on ending characters. */
  7. Attribute = 2
  8. }
  9. /**
  10. * Producers for character reference errors as defined in the HTML spec.
  11. */
  12. export interface EntityErrorProducer {
  13. missingSemicolonAfterCharacterReference(): void;
  14. absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
  15. validateNumericCharacterReference(code: number): void;
  16. }
  17. /**
  18. * Token decoder with support of writing partial entities.
  19. */
  20. export declare class EntityDecoder {
  21. /** The tree used to decode entities. */
  22. private readonly decodeTree;
  23. /**
  24. * The function that is called when a codepoint is decoded.
  25. *
  26. * For multi-byte named entities, this will be called multiple times,
  27. * with the second codepoint, and the same `consumed` value.
  28. *
  29. * @param codepoint The decoded codepoint.
  30. * @param consumed The number of bytes consumed by the decoder.
  31. */
  32. private readonly emitCodePoint;
  33. /** An object that is used to produce errors. */
  34. private readonly errors?;
  35. constructor(
  36. /** The tree used to decode entities. */
  37. decodeTree: Uint16Array,
  38. /**
  39. * The function that is called when a codepoint is decoded.
  40. *
  41. * For multi-byte named entities, this will be called multiple times,
  42. * with the second codepoint, and the same `consumed` value.
  43. *
  44. * @param codepoint The decoded codepoint.
  45. * @param consumed The number of bytes consumed by the decoder.
  46. */
  47. emitCodePoint: (cp: number, consumed: number) => void,
  48. /** An object that is used to produce errors. */
  49. errors?: EntityErrorProducer | undefined);
  50. /** The current state of the decoder. */
  51. private state;
  52. /** Characters that were consumed while parsing an entity. */
  53. private consumed;
  54. /**
  55. * The result of the entity.
  56. *
  57. * Either the result index of a numeric entity, or the codepoint of a
  58. * numeric entity.
  59. */
  60. private result;
  61. /** The current index in the decode tree. */
  62. private treeIndex;
  63. /** The number of characters that were consumed in excess. */
  64. private excess;
  65. /** The mode in which the decoder is operating. */
  66. private decodeMode;
  67. /** The number of characters that have been consumed in the current run. */
  68. private runConsumed;
  69. /** Resets the instance to make it reusable. */
  70. startEntity(decodeMode: DecodingMode): void;
  71. /**
  72. * Write an entity to the decoder. This can be called multiple times with partial entities.
  73. * If the entity is incomplete, the decoder will return -1.
  74. *
  75. * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
  76. * entity is incomplete, and resume when the next string is written.
  77. *
  78. * @param input The string containing the entity (or a continuation of the entity).
  79. * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
  80. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  81. */
  82. write(input: string, offset: number): number;
  83. /**
  84. * Switches between the numeric decimal and hexadecimal states.
  85. *
  86. * Equivalent to the `Numeric character reference state` in the HTML spec.
  87. *
  88. * @param input The string containing the entity (or a continuation of the entity).
  89. * @param offset The current offset.
  90. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  91. */
  92. private stateNumericStart;
  93. /**
  94. * Parses a hexadecimal numeric entity.
  95. *
  96. * Equivalent to the `Hexademical character reference state` in the HTML spec.
  97. *
  98. * @param input The string containing the entity (or a continuation of the entity).
  99. * @param offset The current offset.
  100. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  101. */
  102. private stateNumericHex;
  103. /**
  104. * Parses a decimal numeric entity.
  105. *
  106. * Equivalent to the `Decimal character reference state` in the HTML spec.
  107. *
  108. * @param input The string containing the entity (or a continuation of the entity).
  109. * @param offset The current offset.
  110. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  111. */
  112. private stateNumericDecimal;
  113. /**
  114. * Validate and emit a numeric entity.
  115. *
  116. * Implements the logic from the `Hexademical character reference start
  117. * state` and `Numeric character reference end state` in the HTML spec.
  118. *
  119. * @param lastCp The last code point of the entity. Used to see if the
  120. * entity was terminated with a semicolon.
  121. * @param expectedLength The minimum number of characters that should be
  122. * consumed. Used to validate that at least one digit
  123. * was consumed.
  124. * @returns The number of characters that were consumed.
  125. */
  126. private emitNumericEntity;
  127. /**
  128. * Parses a named entity.
  129. *
  130. * Equivalent to the `Named character reference state` in the HTML spec.
  131. *
  132. * @param input The string containing the entity (or a continuation of the entity).
  133. * @param offset The current offset.
  134. * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
  135. */
  136. private stateNamedEntity;
  137. /**
  138. * Emit a named entity that was not terminated with a semicolon.
  139. *
  140. * @returns The number of characters consumed.
  141. */
  142. private emitNotTerminatedNamedEntity;
  143. /**
  144. * Emit a named entity.
  145. *
  146. * @param result The index of the entity in the decode tree.
  147. * @param valueLength The number of bytes in the entity.
  148. * @param consumed The number of characters consumed.
  149. *
  150. * @returns The number of characters consumed.
  151. */
  152. private emitNamedEntityData;
  153. /**
  154. * Signal to the parser that the end of the input was reached.
  155. *
  156. * Remaining data will be emitted and relevant errors will be produced.
  157. *
  158. * @returns The number of characters consumed.
  159. */
  160. end(): number;
  161. }
  162. /**
  163. * Determines the branch of the current node that is taken given the current
  164. * character. This function is used to traverse the trie.
  165. *
  166. * @param decodeTree The trie.
  167. * @param current The current node.
  168. * @param nodeIdx The index right after the current node and its value.
  169. * @param char The current character.
  170. * @returns The index of the next node, or -1 if no branch is taken.
  171. */
  172. export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
  173. /**
  174. * Decodes an HTML string.
  175. *
  176. * @param htmlString The string to decode.
  177. * @param mode The decoding mode.
  178. * @returns The decoded string.
  179. */
  180. export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
  181. /**
  182. * Decodes an HTML string in an attribute.
  183. *
  184. * @param htmlAttribute The string to decode.
  185. * @returns The decoded string.
  186. */
  187. export declare function decodeHTMLAttribute(htmlAttribute: string): string;
  188. /**
  189. * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
  190. *
  191. * @param htmlString The string to decode.
  192. * @returns The decoded string.
  193. */
  194. export declare function decodeHTMLStrict(htmlString: string): string;
  195. /**
  196. * Decodes an XML string, requiring all entities to be terminated by a semicolon.
  197. *
  198. * @param xmlString The string to decode.
  199. * @returns The decoded string.
  200. */
  201. export declare function decodeXML(xmlString: string): string;
  202. export { decodeCodePoint, fromCodePoint, replaceCodePoint, } from "./decode-codepoint.js";
  203. export { htmlDecodeTree } from "./generated/decode-data-html.js";
  204. export { xmlDecodeTree } from "./generated/decode-data-xml.js";
  205. //# sourceMappingURL=decode.d.ts.map