encode.js 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.encodeHTML = encodeHTML;
  4. exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
  5. const escape_js_1 = require("./escape.js");
  6. const encode_html_js_1 = require("./generated/encode-html.js");
  7. /**
  8. * We store the characters to consider as a compact bitset for fast lookups.
  9. */
  10. const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
  11. 5632, // Bits for 09,0A,0C
  12. 4227923966, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
  13. 4160749569, // 64..95 -> 40, 5B-5F
  14. 939524097, // 96..127-> 60, 7B-7D
  15. ]);
  16. const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, escape_js_1.XML_BITSET_VALUE, 0, 0]);
  17. /**
  18. * Encodes all characters in the input using HTML entities. This includes
  19. * characters that are valid ASCII characters in HTML documents, such as `#`.
  20. *
  21. * To get a more compact output, consider using the `encodeNonAsciiHTML`
  22. * function, which will only encode characters that are not valid in HTML
  23. * documents, as well as non-ASCII characters.
  24. *
  25. * If a character has no equivalent entity, a numeric hexadecimal reference
  26. * (eg. `ü`) will be used.
  27. */
  28. function encodeHTML(input) {
  29. return encodeHTMLTrieRe(HTML_BITSET, input);
  30. }
  31. /**
  32. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  33. * documents using HTML entities. This function will not encode characters that
  34. * are valid in HTML documents, such as `#`.
  35. *
  36. * If a character has no equivalent entity, a numeric hexadecimal reference
  37. * (eg. `ü`) will be used.
  38. */
  39. function encodeNonAsciiHTML(input) {
  40. return encodeHTMLTrieRe(XML_BITSET, input);
  41. }
  42. function encodeHTMLTrieRe(bitset, input) {
  43. let out;
  44. let last = 0; // Start of the next untouched slice.
  45. const { length } = input;
  46. for (let index = 0; index < length; index++) {
  47. const char = input.charCodeAt(index);
  48. // Skip ASCII characters that don't need encoding
  49. if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
  50. continue;
  51. }
  52. if (out === undefined)
  53. out = input.substring(0, index);
  54. else if (last !== index)
  55. out += input.substring(last, index);
  56. let node = encode_html_js_1.htmlTrie.get(char);
  57. if (typeof node === "object") {
  58. if (index + 1 < length) {
  59. const nextChar = input.charCodeAt(index + 1);
  60. const value = typeof node.next === "number"
  61. ? node.next === nextChar
  62. ? node.nextValue
  63. : undefined
  64. : node.next.get(nextChar);
  65. if (value !== undefined) {
  66. out += value;
  67. index++;
  68. last = index + 1;
  69. continue;
  70. }
  71. }
  72. node = node.value;
  73. }
  74. if (node === undefined) {
  75. const cp = (0, escape_js_1.getCodePoint)(input, index);
  76. out += `&#x${cp.toString(16)};`;
  77. if (cp !== char)
  78. index++;
  79. last = index + 1;
  80. }
  81. else {
  82. out += node;
  83. last = index + 1;
  84. }
  85. }
  86. if (out === undefined)
  87. return input;
  88. if (last < length)
  89. out += input.substr(last);
  90. return out;
  91. }
  92. //# sourceMappingURL=encode.js.map