sax.js 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723
  1. ;(function (sax) {
  2. // wrapper for non-node envs
  3. sax.parser = function (strict, opt) {
  4. return new SAXParser(strict, opt)
  5. }
  6. sax.SAXParser = SAXParser
  7. sax.SAXStream = SAXStream
  8. sax.createStream = createStream
  9. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  10. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  11. // since that's the earliest that a buffer overrun could occur. This way, checks are
  12. // as rare as required, but as often as necessary to ensure never crossing this bound.
  13. // Furthermore, buffers are only tested at most once per write(), so passing a very
  14. // large string into write() might have undesirable effects, but this is manageable by
  15. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  16. // edge case, result in creating at most one complete copy of the string passed in.
  17. // Set to Infinity to have unlimited buffers.
  18. sax.MAX_BUFFER_LENGTH = 64 * 1024
  19. var buffers = [
  20. 'comment',
  21. 'sgmlDecl',
  22. 'textNode',
  23. 'tagName',
  24. 'doctype',
  25. 'procInstName',
  26. 'procInstBody',
  27. 'entity',
  28. 'attribName',
  29. 'attribValue',
  30. 'cdata',
  31. 'script',
  32. ]
  33. sax.EVENTS = [
  34. 'text',
  35. 'processinginstruction',
  36. 'sgmldeclaration',
  37. 'doctype',
  38. 'comment',
  39. 'opentagstart',
  40. 'attribute',
  41. 'opentag',
  42. 'closetag',
  43. 'opencdata',
  44. 'cdata',
  45. 'closecdata',
  46. 'error',
  47. 'end',
  48. 'ready',
  49. 'script',
  50. 'opennamespace',
  51. 'closenamespace',
  52. ]
  53. function SAXParser(strict, opt) {
  54. if (!(this instanceof SAXParser)) {
  55. return new SAXParser(strict, opt)
  56. }
  57. var parser = this
  58. clearBuffers(parser)
  59. parser.q = parser.c = ''
  60. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  61. parser.opt = opt || {}
  62. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  63. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  64. parser.opt.maxEntityCount = parser.opt.maxEntityCount || 512
  65. parser.opt.maxEntityDepth = parser.opt.maxEntityDepth || 4
  66. parser.entityCount = parser.entityDepth = 0
  67. parser.tags = []
  68. parser.closed = parser.closedRoot = parser.sawRoot = false
  69. parser.tag = parser.error = null
  70. parser.strict = !!strict
  71. parser.noscript = !!(strict || parser.opt.noscript)
  72. parser.state = S.BEGIN
  73. parser.strictEntities = parser.opt.strictEntities
  74. parser.ENTITIES =
  75. parser.strictEntities ?
  76. Object.create(sax.XML_ENTITIES)
  77. : Object.create(sax.ENTITIES)
  78. parser.attribList = []
  79. // namespaces form a prototype chain.
  80. // it always points at the current tag,
  81. // which protos to its parent tag.
  82. if (parser.opt.xmlns) {
  83. parser.ns = Object.create(rootNS)
  84. }
  85. // disallow unquoted attribute values if not otherwise configured
  86. // and strict mode is true
  87. if (parser.opt.unquotedAttributeValues === undefined) {
  88. parser.opt.unquotedAttributeValues = !strict
  89. }
  90. // mostly just for error reporting
  91. parser.trackPosition = parser.opt.position !== false
  92. if (parser.trackPosition) {
  93. parser.position = parser.line = parser.column = 0
  94. }
  95. emit(parser, 'onready')
  96. }
  97. if (!Object.create) {
  98. Object.create = function (o) {
  99. function F() {}
  100. F.prototype = o
  101. var newf = new F()
  102. return newf
  103. }
  104. }
  105. if (!Object.keys) {
  106. Object.keys = function (o) {
  107. var a = []
  108. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  109. return a
  110. }
  111. }
  112. function checkBufferLength(parser) {
  113. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  114. var maxActual = 0
  115. for (var i = 0, l = buffers.length; i < l; i++) {
  116. var len = parser[buffers[i]].length
  117. if (len > maxAllowed) {
  118. // Text/cdata nodes can get big, and since they're buffered,
  119. // we can get here under normal conditions.
  120. // Avoid issues by emitting the text node now,
  121. // so at least it won't get any bigger.
  122. switch (buffers[i]) {
  123. case 'textNode':
  124. closeText(parser)
  125. break
  126. case 'cdata':
  127. emitNode(parser, 'oncdata', parser.cdata)
  128. parser.cdata = ''
  129. break
  130. case 'script':
  131. emitNode(parser, 'onscript', parser.script)
  132. parser.script = ''
  133. break
  134. default:
  135. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  136. }
  137. }
  138. maxActual = Math.max(maxActual, len)
  139. }
  140. // schedule the next check for the earliest possible buffer overrun.
  141. var m = sax.MAX_BUFFER_LENGTH - maxActual
  142. parser.bufferCheckPosition = m + parser.position
  143. }
  144. function clearBuffers(parser) {
  145. for (var i = 0, l = buffers.length; i < l; i++) {
  146. parser[buffers[i]] = ''
  147. }
  148. }
  149. function flushBuffers(parser) {
  150. closeText(parser)
  151. if (parser.cdata !== '') {
  152. emitNode(parser, 'oncdata', parser.cdata)
  153. parser.cdata = ''
  154. }
  155. if (parser.script !== '') {
  156. emitNode(parser, 'onscript', parser.script)
  157. parser.script = ''
  158. }
  159. }
  160. SAXParser.prototype = {
  161. end: function () {
  162. end(this)
  163. },
  164. write: write,
  165. resume: function () {
  166. this.error = null
  167. return this
  168. },
  169. close: function () {
  170. return this.write(null)
  171. },
  172. flush: function () {
  173. flushBuffers(this)
  174. },
  175. }
  176. var Stream
  177. try {
  178. Stream = require('stream').Stream
  179. } catch (ex) {
  180. Stream = function () {}
  181. }
  182. if (!Stream) Stream = function () {}
  183. var streamWraps = sax.EVENTS.filter(function (ev) {
  184. return ev !== 'error' && ev !== 'end'
  185. })
  186. function createStream(strict, opt) {
  187. return new SAXStream(strict, opt)
  188. }
  189. function SAXStream(strict, opt) {
  190. if (!(this instanceof SAXStream)) {
  191. return new SAXStream(strict, opt)
  192. }
  193. Stream.apply(this)
  194. this._parser = new SAXParser(strict, opt)
  195. this.writable = true
  196. this.readable = true
  197. var me = this
  198. this._parser.onend = function () {
  199. me.emit('end')
  200. }
  201. this._parser.onerror = function (er) {
  202. me.emit('error', er)
  203. // if didn't throw, then means error was handled.
  204. // go ahead and clear error, so we can write again.
  205. me._parser.error = null
  206. }
  207. this._decoder = null
  208. streamWraps.forEach(function (ev) {
  209. Object.defineProperty(me, 'on' + ev, {
  210. get: function () {
  211. return me._parser['on' + ev]
  212. },
  213. set: function (h) {
  214. if (!h) {
  215. me.removeAllListeners(ev)
  216. me._parser['on' + ev] = h
  217. return h
  218. }
  219. me.on(ev, h)
  220. },
  221. enumerable: true,
  222. configurable: false,
  223. })
  224. })
  225. }
  226. SAXStream.prototype = Object.create(Stream.prototype, {
  227. constructor: {
  228. value: SAXStream,
  229. },
  230. })
  231. SAXStream.prototype.write = function (data) {
  232. if (
  233. typeof Buffer === 'function' &&
  234. typeof Buffer.isBuffer === 'function' &&
  235. Buffer.isBuffer(data)
  236. ) {
  237. if (!this._decoder) {
  238. this._decoder = new TextDecoder('utf8')
  239. }
  240. data = this._decoder.decode(data, { stream: true })
  241. }
  242. this._parser.write(data.toString())
  243. this.emit('data', data)
  244. return true
  245. }
  246. SAXStream.prototype.end = function (chunk) {
  247. if (chunk && chunk.length) {
  248. this.write(chunk)
  249. }
  250. // Flush any remaining decoded data from the TextDecoder
  251. if (this._decoder) {
  252. var remaining = this._decoder.decode()
  253. if (remaining) {
  254. this._parser.write(remaining)
  255. this.emit('data', remaining)
  256. }
  257. }
  258. this._parser.end()
  259. return true
  260. }
  261. SAXStream.prototype.on = function (ev, handler) {
  262. var me = this
  263. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  264. me._parser['on' + ev] = function () {
  265. var args =
  266. arguments.length === 1 ?
  267. [arguments[0]]
  268. : Array.apply(null, arguments)
  269. args.splice(0, 0, ev)
  270. me.emit.apply(me, args)
  271. }
  272. }
  273. return Stream.prototype.on.call(me, ev, handler)
  274. }
  275. // this really needs to be replaced with character classes.
  276. // XML allows all manner of ridiculous numbers and digits.
  277. var CDATA = '[CDATA['
  278. var DOCTYPE = 'DOCTYPE'
  279. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  280. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  281. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  282. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  283. // This implementation works on strings, a single character at a time
  284. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  285. // without a significant breaking change to either this parser, or the
  286. // JavaScript language. Implementation of an emoji-capable xml parser
  287. // is left as an exercise for the reader.
  288. var nameStart =
  289. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  290. var nameBody =
  291. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  292. var entityStart =
  293. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  294. var entityBody =
  295. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  296. function isWhitespace(c) {
  297. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  298. }
  299. function isQuote(c) {
  300. return c === '"' || c === "'"
  301. }
  302. function isAttribEnd(c) {
  303. return c === '>' || isWhitespace(c)
  304. }
  305. function isMatch(regex, c) {
  306. return regex.test(c)
  307. }
  308. function notMatch(regex, c) {
  309. return !isMatch(regex, c)
  310. }
  311. var S = 0
  312. sax.STATE = {
  313. BEGIN: S++, // leading byte order mark or whitespace
  314. BEGIN_WHITESPACE: S++, // leading whitespace
  315. TEXT: S++, // general stuff
  316. TEXT_ENTITY: S++, // &amp and such.
  317. OPEN_WAKA: S++, // <
  318. SGML_DECL: S++, // <!BLARG
  319. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  320. DOCTYPE: S++, // <!DOCTYPE
  321. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  322. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  323. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  324. COMMENT_STARTING: S++, // <!-
  325. COMMENT: S++, // <!--
  326. COMMENT_ENDING: S++, // <!-- blah -
  327. COMMENT_ENDED: S++, // <!-- blah --
  328. CDATA: S++, // <![CDATA[ something
  329. CDATA_ENDING: S++, // ]
  330. CDATA_ENDING_2: S++, // ]]
  331. PROC_INST: S++, // <?hi
  332. PROC_INST_BODY: S++, // <?hi there
  333. PROC_INST_ENDING: S++, // <?hi "there" ?
  334. OPEN_TAG: S++, // <strong
  335. OPEN_TAG_SLASH: S++, // <strong /
  336. ATTRIB: S++, // <a
  337. ATTRIB_NAME: S++, // <a foo
  338. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  339. ATTRIB_VALUE: S++, // <a foo=
  340. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  341. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  342. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  343. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  344. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  345. CLOSE_TAG: S++, // </a
  346. CLOSE_TAG_SAW_WHITE: S++, // </a >
  347. SCRIPT: S++, // <script> ...
  348. SCRIPT_ENDING: S++, // <script> ... <
  349. }
  350. sax.XML_ENTITIES = {
  351. amp: '&',
  352. gt: '>',
  353. lt: '<',
  354. quot: '"',
  355. apos: "'",
  356. }
  357. sax.ENTITIES = {
  358. amp: '&',
  359. gt: '>',
  360. lt: '<',
  361. quot: '"',
  362. apos: "'",
  363. AElig: 198,
  364. Aacute: 193,
  365. Acirc: 194,
  366. Agrave: 192,
  367. Aring: 197,
  368. Atilde: 195,
  369. Auml: 196,
  370. Ccedil: 199,
  371. ETH: 208,
  372. Eacute: 201,
  373. Ecirc: 202,
  374. Egrave: 200,
  375. Euml: 203,
  376. Iacute: 205,
  377. Icirc: 206,
  378. Igrave: 204,
  379. Iuml: 207,
  380. Ntilde: 209,
  381. Oacute: 211,
  382. Ocirc: 212,
  383. Ograve: 210,
  384. Oslash: 216,
  385. Otilde: 213,
  386. Ouml: 214,
  387. THORN: 222,
  388. Uacute: 218,
  389. Ucirc: 219,
  390. Ugrave: 217,
  391. Uuml: 220,
  392. Yacute: 221,
  393. aacute: 225,
  394. acirc: 226,
  395. aelig: 230,
  396. agrave: 224,
  397. aring: 229,
  398. atilde: 227,
  399. auml: 228,
  400. ccedil: 231,
  401. eacute: 233,
  402. ecirc: 234,
  403. egrave: 232,
  404. eth: 240,
  405. euml: 235,
  406. iacute: 237,
  407. icirc: 238,
  408. igrave: 236,
  409. iuml: 239,
  410. ntilde: 241,
  411. oacute: 243,
  412. ocirc: 244,
  413. ograve: 242,
  414. oslash: 248,
  415. otilde: 245,
  416. ouml: 246,
  417. szlig: 223,
  418. thorn: 254,
  419. uacute: 250,
  420. ucirc: 251,
  421. ugrave: 249,
  422. uuml: 252,
  423. yacute: 253,
  424. yuml: 255,
  425. copy: 169,
  426. reg: 174,
  427. nbsp: 160,
  428. iexcl: 161,
  429. cent: 162,
  430. pound: 163,
  431. curren: 164,
  432. yen: 165,
  433. brvbar: 166,
  434. sect: 167,
  435. uml: 168,
  436. ordf: 170,
  437. laquo: 171,
  438. not: 172,
  439. shy: 173,
  440. macr: 175,
  441. deg: 176,
  442. plusmn: 177,
  443. sup1: 185,
  444. sup2: 178,
  445. sup3: 179,
  446. acute: 180,
  447. micro: 181,
  448. para: 182,
  449. middot: 183,
  450. cedil: 184,
  451. ordm: 186,
  452. raquo: 187,
  453. frac14: 188,
  454. frac12: 189,
  455. frac34: 190,
  456. iquest: 191,
  457. times: 215,
  458. divide: 247,
  459. OElig: 338,
  460. oelig: 339,
  461. Scaron: 352,
  462. scaron: 353,
  463. Yuml: 376,
  464. fnof: 402,
  465. circ: 710,
  466. tilde: 732,
  467. Alpha: 913,
  468. Beta: 914,
  469. Gamma: 915,
  470. Delta: 916,
  471. Epsilon: 917,
  472. Zeta: 918,
  473. Eta: 919,
  474. Theta: 920,
  475. Iota: 921,
  476. Kappa: 922,
  477. Lambda: 923,
  478. Mu: 924,
  479. Nu: 925,
  480. Xi: 926,
  481. Omicron: 927,
  482. Pi: 928,
  483. Rho: 929,
  484. Sigma: 931,
  485. Tau: 932,
  486. Upsilon: 933,
  487. Phi: 934,
  488. Chi: 935,
  489. Psi: 936,
  490. Omega: 937,
  491. alpha: 945,
  492. beta: 946,
  493. gamma: 947,
  494. delta: 948,
  495. epsilon: 949,
  496. zeta: 950,
  497. eta: 951,
  498. theta: 952,
  499. iota: 953,
  500. kappa: 954,
  501. lambda: 955,
  502. mu: 956,
  503. nu: 957,
  504. xi: 958,
  505. omicron: 959,
  506. pi: 960,
  507. rho: 961,
  508. sigmaf: 962,
  509. sigma: 963,
  510. tau: 964,
  511. upsilon: 965,
  512. phi: 966,
  513. chi: 967,
  514. psi: 968,
  515. omega: 969,
  516. thetasym: 977,
  517. upsih: 978,
  518. piv: 982,
  519. ensp: 8194,
  520. emsp: 8195,
  521. thinsp: 8201,
  522. zwnj: 8204,
  523. zwj: 8205,
  524. lrm: 8206,
  525. rlm: 8207,
  526. ndash: 8211,
  527. mdash: 8212,
  528. lsquo: 8216,
  529. rsquo: 8217,
  530. sbquo: 8218,
  531. ldquo: 8220,
  532. rdquo: 8221,
  533. bdquo: 8222,
  534. dagger: 8224,
  535. Dagger: 8225,
  536. bull: 8226,
  537. hellip: 8230,
  538. permil: 8240,
  539. prime: 8242,
  540. Prime: 8243,
  541. lsaquo: 8249,
  542. rsaquo: 8250,
  543. oline: 8254,
  544. frasl: 8260,
  545. euro: 8364,
  546. image: 8465,
  547. weierp: 8472,
  548. real: 8476,
  549. trade: 8482,
  550. alefsym: 8501,
  551. larr: 8592,
  552. uarr: 8593,
  553. rarr: 8594,
  554. darr: 8595,
  555. harr: 8596,
  556. crarr: 8629,
  557. lArr: 8656,
  558. uArr: 8657,
  559. rArr: 8658,
  560. dArr: 8659,
  561. hArr: 8660,
  562. forall: 8704,
  563. part: 8706,
  564. exist: 8707,
  565. empty: 8709,
  566. nabla: 8711,
  567. isin: 8712,
  568. notin: 8713,
  569. ni: 8715,
  570. prod: 8719,
  571. sum: 8721,
  572. minus: 8722,
  573. lowast: 8727,
  574. radic: 8730,
  575. prop: 8733,
  576. infin: 8734,
  577. ang: 8736,
  578. and: 8743,
  579. or: 8744,
  580. cap: 8745,
  581. cup: 8746,
  582. int: 8747,
  583. there4: 8756,
  584. sim: 8764,
  585. cong: 8773,
  586. asymp: 8776,
  587. ne: 8800,
  588. equiv: 8801,
  589. le: 8804,
  590. ge: 8805,
  591. sub: 8834,
  592. sup: 8835,
  593. nsub: 8836,
  594. sube: 8838,
  595. supe: 8839,
  596. oplus: 8853,
  597. otimes: 8855,
  598. perp: 8869,
  599. sdot: 8901,
  600. lceil: 8968,
  601. rceil: 8969,
  602. lfloor: 8970,
  603. rfloor: 8971,
  604. lang: 9001,
  605. rang: 9002,
  606. loz: 9674,
  607. spades: 9824,
  608. clubs: 9827,
  609. hearts: 9829,
  610. diams: 9830,
  611. }
  612. Object.keys(sax.ENTITIES).forEach(function (key) {
  613. var e = sax.ENTITIES[key]
  614. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  615. sax.ENTITIES[key] = s
  616. })
  617. for (var s in sax.STATE) {
  618. sax.STATE[sax.STATE[s]] = s
  619. }
  620. // shorthand
  621. S = sax.STATE
  622. function emit(parser, event, data) {
  623. parser[event] && parser[event](data)
  624. }
  625. function emitNode(parser, nodeType, data) {
  626. if (parser.textNode) closeText(parser)
  627. emit(parser, nodeType, data)
  628. }
  629. function closeText(parser) {
  630. parser.textNode = textopts(parser.opt, parser.textNode)
  631. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  632. parser.textNode = ''
  633. }
  634. function textopts(opt, text) {
  635. if (opt.trim) text = text.trim()
  636. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  637. return text
  638. }
  639. function error(parser, er) {
  640. closeText(parser)
  641. if (parser.trackPosition) {
  642. er +=
  643. '\nLine: ' +
  644. parser.line +
  645. '\nColumn: ' +
  646. parser.column +
  647. '\nChar: ' +
  648. parser.c
  649. }
  650. er = new Error(er)
  651. parser.error = er
  652. emit(parser, 'onerror', er)
  653. return parser
  654. }
  655. function end(parser) {
  656. if (parser.sawRoot && !parser.closedRoot)
  657. strictFail(parser, 'Unclosed root tag')
  658. if (
  659. parser.state !== S.BEGIN &&
  660. parser.state !== S.BEGIN_WHITESPACE &&
  661. parser.state !== S.TEXT
  662. ) {
  663. error(parser, 'Unexpected end')
  664. }
  665. closeText(parser)
  666. parser.c = ''
  667. parser.closed = true
  668. emit(parser, 'onend')
  669. SAXParser.call(parser, parser.strict, parser.opt)
  670. return parser
  671. }
  672. function strictFail(parser, message) {
  673. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  674. throw new Error('bad call to strictFail')
  675. }
  676. if (parser.strict) {
  677. error(parser, message)
  678. }
  679. }
  680. function newTag(parser) {
  681. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  682. var parent = parser.tags[parser.tags.length - 1] || parser
  683. var tag = (parser.tag = { name: parser.tagName, attributes: {} })
  684. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  685. if (parser.opt.xmlns) {
  686. tag.ns = parent.ns
  687. }
  688. parser.attribList.length = 0
  689. emitNode(parser, 'onopentagstart', tag)
  690. }
  691. function qname(name, attribute) {
  692. var i = name.indexOf(':')
  693. var qualName = i < 0 ? ['', name] : name.split(':')
  694. var prefix = qualName[0]
  695. var local = qualName[1]
  696. // <x "xmlns"="http://foo">
  697. if (attribute && name === 'xmlns') {
  698. prefix = 'xmlns'
  699. local = ''
  700. }
  701. return { prefix: prefix, local: local }
  702. }
  703. function attrib(parser) {
  704. if (!parser.strict) {
  705. parser.attribName = parser.attribName[parser.looseCase]()
  706. }
  707. if (
  708. parser.attribList.indexOf(parser.attribName) !== -1 ||
  709. parser.tag.attributes.hasOwnProperty(parser.attribName)
  710. ) {
  711. parser.attribName = parser.attribValue = ''
  712. return
  713. }
  714. if (parser.opt.xmlns) {
  715. var qn = qname(parser.attribName, true)
  716. var prefix = qn.prefix
  717. var local = qn.local
  718. if (prefix === 'xmlns') {
  719. // namespace binding attribute. push the binding into scope
  720. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  721. strictFail(
  722. parser,
  723. 'xml: prefix must be bound to ' +
  724. XML_NAMESPACE +
  725. '\n' +
  726. 'Actual: ' +
  727. parser.attribValue
  728. )
  729. } else if (
  730. local === 'xmlns' &&
  731. parser.attribValue !== XMLNS_NAMESPACE
  732. ) {
  733. strictFail(
  734. parser,
  735. 'xmlns: prefix must be bound to ' +
  736. XMLNS_NAMESPACE +
  737. '\n' +
  738. 'Actual: ' +
  739. parser.attribValue
  740. )
  741. } else {
  742. var tag = parser.tag
  743. var parent = parser.tags[parser.tags.length - 1] || parser
  744. if (tag.ns === parent.ns) {
  745. tag.ns = Object.create(parent.ns)
  746. }
  747. tag.ns[local] = parser.attribValue
  748. }
  749. }
  750. // defer onattribute events until all attributes have been seen
  751. // so any new bindings can take effect. preserve attribute order
  752. // so deferred events can be emitted in document order
  753. parser.attribList.push([parser.attribName, parser.attribValue])
  754. } else {
  755. // in non-xmlns mode, we can emit the event right away
  756. parser.tag.attributes[parser.attribName] = parser.attribValue
  757. emitNode(parser, 'onattribute', {
  758. name: parser.attribName,
  759. value: parser.attribValue,
  760. })
  761. }
  762. parser.attribName = parser.attribValue = ''
  763. }
  764. function openTag(parser, selfClosing) {
  765. if (parser.opt.xmlns) {
  766. // emit namespace binding events
  767. var tag = parser.tag
  768. // add namespace info to tag
  769. var qn = qname(parser.tagName)
  770. tag.prefix = qn.prefix
  771. tag.local = qn.local
  772. tag.uri = tag.ns[qn.prefix] || ''
  773. if (tag.prefix && !tag.uri) {
  774. strictFail(
  775. parser,
  776. 'Unbound namespace prefix: ' + JSON.stringify(parser.tagName)
  777. )
  778. tag.uri = qn.prefix
  779. }
  780. var parent = parser.tags[parser.tags.length - 1] || parser
  781. if (tag.ns && parent.ns !== tag.ns) {
  782. Object.keys(tag.ns).forEach(function (p) {
  783. emitNode(parser, 'onopennamespace', {
  784. prefix: p,
  785. uri: tag.ns[p],
  786. })
  787. })
  788. }
  789. // handle deferred onattribute events
  790. // Note: do not apply default ns to attributes:
  791. // http://www.w3.org/TR/REC-xml-names/#defaulting
  792. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  793. var nv = parser.attribList[i]
  794. var name = nv[0]
  795. var value = nv[1]
  796. var qualName = qname(name, true)
  797. var prefix = qualName.prefix
  798. var local = qualName.local
  799. var uri = prefix === '' ? '' : tag.ns[prefix] || ''
  800. var a = {
  801. name: name,
  802. value: value,
  803. prefix: prefix,
  804. local: local,
  805. uri: uri,
  806. }
  807. // if there's any attributes with an undefined namespace,
  808. // then fail on them now.
  809. if (prefix && prefix !== 'xmlns' && !uri) {
  810. strictFail(
  811. parser,
  812. 'Unbound namespace prefix: ' + JSON.stringify(prefix)
  813. )
  814. a.uri = prefix
  815. }
  816. parser.tag.attributes[name] = a
  817. emitNode(parser, 'onattribute', a)
  818. }
  819. parser.attribList.length = 0
  820. }
  821. parser.tag.isSelfClosing = !!selfClosing
  822. // process the tag
  823. parser.sawRoot = true
  824. parser.tags.push(parser.tag)
  825. emitNode(parser, 'onopentag', parser.tag)
  826. if (!selfClosing) {
  827. // special case for <script> in non-strict mode.
  828. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  829. parser.state = S.SCRIPT
  830. } else {
  831. parser.state = S.TEXT
  832. }
  833. parser.tag = null
  834. parser.tagName = ''
  835. }
  836. parser.attribName = parser.attribValue = ''
  837. parser.attribList.length = 0
  838. }
  839. function closeTag(parser) {
  840. if (!parser.tagName) {
  841. strictFail(parser, 'Weird empty close tag.')
  842. parser.textNode += '</>'
  843. parser.state = S.TEXT
  844. return
  845. }
  846. if (parser.script) {
  847. if (parser.tagName !== 'script') {
  848. parser.script += '</' + parser.tagName + '>'
  849. parser.tagName = ''
  850. parser.state = S.SCRIPT
  851. return
  852. }
  853. emitNode(parser, 'onscript', parser.script)
  854. parser.script = ''
  855. }
  856. // first make sure that the closing tag actually exists.
  857. // <a><b></c></b></a> will close everything, otherwise.
  858. var t = parser.tags.length
  859. var tagName = parser.tagName
  860. if (!parser.strict) {
  861. tagName = tagName[parser.looseCase]()
  862. }
  863. var closeTo = tagName
  864. while (t--) {
  865. var close = parser.tags[t]
  866. if (close.name !== closeTo) {
  867. // fail the first time in strict mode
  868. strictFail(parser, 'Unexpected close tag')
  869. } else {
  870. break
  871. }
  872. }
  873. // didn't find it. we already failed for strict, so just abort.
  874. if (t < 0) {
  875. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  876. parser.textNode += '</' + parser.tagName + '>'
  877. parser.state = S.TEXT
  878. return
  879. }
  880. parser.tagName = tagName
  881. var s = parser.tags.length
  882. while (s-- > t) {
  883. var tag = (parser.tag = parser.tags.pop())
  884. parser.tagName = parser.tag.name
  885. emitNode(parser, 'onclosetag', parser.tagName)
  886. var x = {}
  887. for (var i in tag.ns) {
  888. x[i] = tag.ns[i]
  889. }
  890. var parent = parser.tags[parser.tags.length - 1] || parser
  891. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  892. // remove namespace bindings introduced by tag
  893. Object.keys(tag.ns).forEach(function (p) {
  894. var n = tag.ns[p]
  895. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  896. })
  897. }
  898. }
  899. if (t === 0) parser.closedRoot = true
  900. parser.tagName = parser.attribValue = parser.attribName = ''
  901. parser.attribList.length = 0
  902. parser.state = S.TEXT
  903. }
  904. function parseEntity(parser) {
  905. var entity = parser.entity
  906. var entityLC = entity.toLowerCase()
  907. var num
  908. var numStr = ''
  909. if (parser.ENTITIES[entity]) {
  910. return parser.ENTITIES[entity]
  911. }
  912. if (parser.ENTITIES[entityLC]) {
  913. return parser.ENTITIES[entityLC]
  914. }
  915. entity = entityLC
  916. if (entity.charAt(0) === '#') {
  917. if (entity.charAt(1) === 'x') {
  918. entity = entity.slice(2)
  919. num = parseInt(entity, 16)
  920. numStr = num.toString(16)
  921. } else {
  922. entity = entity.slice(1)
  923. num = parseInt(entity, 10)
  924. numStr = num.toString(10)
  925. }
  926. }
  927. entity = entity.replace(/^0+/, '')
  928. if (
  929. isNaN(num) ||
  930. numStr.toLowerCase() !== entity ||
  931. num < 0 ||
  932. num > 0x10ffff
  933. ) {
  934. strictFail(parser, 'Invalid character entity')
  935. return '&' + parser.entity + ';'
  936. }
  937. return String.fromCodePoint(num)
  938. }
  939. function beginWhiteSpace(parser, c) {
  940. if (c === '<') {
  941. parser.state = S.OPEN_WAKA
  942. parser.startTagPosition = parser.position
  943. } else if (!isWhitespace(c)) {
  944. // have to process this as a text node.
  945. // weird, but happens.
  946. strictFail(parser, 'Non-whitespace before first tag.')
  947. parser.textNode = c
  948. parser.state = S.TEXT
  949. }
  950. }
  951. function charAt(chunk, i) {
  952. var result = ''
  953. if (i < chunk.length) {
  954. result = chunk.charAt(i)
  955. }
  956. return result
  957. }
  958. function write(chunk) {
  959. var parser = this
  960. if (this.error) {
  961. throw this.error
  962. }
  963. if (parser.closed) {
  964. return error(
  965. parser,
  966. 'Cannot write after close. Assign an onready handler.'
  967. )
  968. }
  969. if (chunk === null) {
  970. return end(parser)
  971. }
  972. if (typeof chunk === 'object') {
  973. chunk = chunk.toString()
  974. }
  975. var i = 0
  976. var c = ''
  977. while (true) {
  978. c = charAt(chunk, i++)
  979. parser.c = c
  980. if (!c) {
  981. break
  982. }
  983. if (parser.trackPosition) {
  984. parser.position++
  985. if (c === '\n') {
  986. parser.line++
  987. parser.column = 0
  988. } else {
  989. parser.column++
  990. }
  991. }
  992. switch (parser.state) {
  993. case S.BEGIN:
  994. parser.state = S.BEGIN_WHITESPACE
  995. if (c === '\uFEFF') {
  996. continue
  997. }
  998. beginWhiteSpace(parser, c)
  999. continue
  1000. case S.BEGIN_WHITESPACE:
  1001. beginWhiteSpace(parser, c)
  1002. continue
  1003. case S.TEXT:
  1004. if (parser.sawRoot && !parser.closedRoot) {
  1005. var starti = i - 1
  1006. while (c && c !== '<' && c !== '&') {
  1007. c = charAt(chunk, i++)
  1008. if (c && parser.trackPosition) {
  1009. parser.position++
  1010. if (c === '\n') {
  1011. parser.line++
  1012. parser.column = 0
  1013. } else {
  1014. parser.column++
  1015. }
  1016. }
  1017. }
  1018. parser.textNode += chunk.substring(starti, i - 1)
  1019. }
  1020. if (
  1021. c === '<' &&
  1022. !(parser.sawRoot && parser.closedRoot && !parser.strict)
  1023. ) {
  1024. parser.state = S.OPEN_WAKA
  1025. parser.startTagPosition = parser.position
  1026. } else {
  1027. if (
  1028. !isWhitespace(c) &&
  1029. (!parser.sawRoot || parser.closedRoot)
  1030. ) {
  1031. strictFail(parser, 'Text data outside of root node.')
  1032. }
  1033. if (c === '&') {
  1034. parser.state = S.TEXT_ENTITY
  1035. } else {
  1036. parser.textNode += c
  1037. }
  1038. }
  1039. continue
  1040. case S.SCRIPT:
  1041. // only non-strict
  1042. if (c === '<') {
  1043. parser.state = S.SCRIPT_ENDING
  1044. } else {
  1045. parser.script += c
  1046. }
  1047. continue
  1048. case S.SCRIPT_ENDING:
  1049. if (c === '/') {
  1050. parser.state = S.CLOSE_TAG
  1051. } else {
  1052. parser.script += '<' + c
  1053. parser.state = S.SCRIPT
  1054. }
  1055. continue
  1056. case S.OPEN_WAKA:
  1057. // either a /, ?, !, or text is coming next.
  1058. if (c === '!') {
  1059. parser.state = S.SGML_DECL
  1060. parser.sgmlDecl = ''
  1061. } else if (isWhitespace(c)) {
  1062. // wait for it...
  1063. } else if (isMatch(nameStart, c)) {
  1064. parser.state = S.OPEN_TAG
  1065. parser.tagName = c
  1066. } else if (c === '/') {
  1067. parser.state = S.CLOSE_TAG
  1068. parser.tagName = ''
  1069. } else if (c === '?') {
  1070. parser.state = S.PROC_INST
  1071. parser.procInstName = parser.procInstBody = ''
  1072. } else {
  1073. strictFail(parser, 'Unencoded <')
  1074. // if there was some whitespace, then add that in.
  1075. if (parser.startTagPosition + 1 < parser.position) {
  1076. var pad = parser.position - parser.startTagPosition
  1077. c = new Array(pad).join(' ') + c
  1078. }
  1079. parser.textNode += '<' + c
  1080. parser.state = S.TEXT
  1081. }
  1082. continue
  1083. case S.SGML_DECL:
  1084. if (parser.sgmlDecl + c === '--') {
  1085. parser.state = S.COMMENT
  1086. parser.comment = ''
  1087. parser.sgmlDecl = ''
  1088. continue
  1089. }
  1090. if (
  1091. parser.doctype &&
  1092. parser.doctype !== true &&
  1093. parser.sgmlDecl
  1094. ) {
  1095. parser.state = S.DOCTYPE_DTD
  1096. parser.doctype += '<!' + parser.sgmlDecl + c
  1097. parser.sgmlDecl = ''
  1098. } else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  1099. emitNode(parser, 'onopencdata')
  1100. parser.state = S.CDATA
  1101. parser.sgmlDecl = ''
  1102. parser.cdata = ''
  1103. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1104. parser.state = S.DOCTYPE
  1105. if (parser.doctype || parser.sawRoot) {
  1106. strictFail(
  1107. parser,
  1108. 'Inappropriately located doctype declaration'
  1109. )
  1110. }
  1111. parser.doctype = ''
  1112. parser.sgmlDecl = ''
  1113. } else if (c === '>') {
  1114. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1115. parser.sgmlDecl = ''
  1116. parser.state = S.TEXT
  1117. } else if (isQuote(c)) {
  1118. parser.state = S.SGML_DECL_QUOTED
  1119. parser.sgmlDecl += c
  1120. } else {
  1121. parser.sgmlDecl += c
  1122. }
  1123. continue
  1124. case S.SGML_DECL_QUOTED:
  1125. if (c === parser.q) {
  1126. parser.state = S.SGML_DECL
  1127. parser.q = ''
  1128. }
  1129. parser.sgmlDecl += c
  1130. continue
  1131. case S.DOCTYPE:
  1132. if (c === '>') {
  1133. parser.state = S.TEXT
  1134. emitNode(parser, 'ondoctype', parser.doctype)
  1135. parser.doctype = true // just remember that we saw it.
  1136. } else {
  1137. parser.doctype += c
  1138. if (c === '[') {
  1139. parser.state = S.DOCTYPE_DTD
  1140. } else if (isQuote(c)) {
  1141. parser.state = S.DOCTYPE_QUOTED
  1142. parser.q = c
  1143. }
  1144. }
  1145. continue
  1146. case S.DOCTYPE_QUOTED:
  1147. parser.doctype += c
  1148. if (c === parser.q) {
  1149. parser.q = ''
  1150. parser.state = S.DOCTYPE
  1151. }
  1152. continue
  1153. case S.DOCTYPE_DTD:
  1154. if (c === ']') {
  1155. parser.doctype += c
  1156. parser.state = S.DOCTYPE
  1157. } else if (c === '<') {
  1158. parser.state = S.OPEN_WAKA
  1159. parser.startTagPosition = parser.position
  1160. } else if (isQuote(c)) {
  1161. parser.doctype += c
  1162. parser.state = S.DOCTYPE_DTD_QUOTED
  1163. parser.q = c
  1164. } else {
  1165. parser.doctype += c
  1166. }
  1167. continue
  1168. case S.DOCTYPE_DTD_QUOTED:
  1169. parser.doctype += c
  1170. if (c === parser.q) {
  1171. parser.state = S.DOCTYPE_DTD
  1172. parser.q = ''
  1173. }
  1174. continue
  1175. case S.COMMENT:
  1176. if (c === '-') {
  1177. parser.state = S.COMMENT_ENDING
  1178. } else {
  1179. parser.comment += c
  1180. }
  1181. continue
  1182. case S.COMMENT_ENDING:
  1183. if (c === '-') {
  1184. parser.state = S.COMMENT_ENDED
  1185. parser.comment = textopts(parser.opt, parser.comment)
  1186. if (parser.comment) {
  1187. emitNode(parser, 'oncomment', parser.comment)
  1188. }
  1189. parser.comment = ''
  1190. } else {
  1191. parser.comment += '-' + c
  1192. parser.state = S.COMMENT
  1193. }
  1194. continue
  1195. case S.COMMENT_ENDED:
  1196. if (c !== '>') {
  1197. strictFail(parser, 'Malformed comment')
  1198. // allow <!-- blah -- bloo --> in non-strict mode,
  1199. // which is a comment of " blah -- bloo "
  1200. parser.comment += '--' + c
  1201. parser.state = S.COMMENT
  1202. } else if (parser.doctype && parser.doctype !== true) {
  1203. parser.state = S.DOCTYPE_DTD
  1204. } else {
  1205. parser.state = S.TEXT
  1206. }
  1207. continue
  1208. case S.CDATA:
  1209. var starti = i - 1
  1210. while (c && c !== ']') {
  1211. c = charAt(chunk, i++)
  1212. if (c && parser.trackPosition) {
  1213. parser.position++
  1214. if (c === '\n') {
  1215. parser.line++
  1216. parser.column = 0
  1217. } else {
  1218. parser.column++
  1219. }
  1220. }
  1221. }
  1222. parser.cdata += chunk.substring(starti, i - 1)
  1223. if (c === ']') {
  1224. parser.state = S.CDATA_ENDING
  1225. }
  1226. continue
  1227. case S.CDATA_ENDING:
  1228. if (c === ']') {
  1229. parser.state = S.CDATA_ENDING_2
  1230. } else {
  1231. parser.cdata += ']' + c
  1232. parser.state = S.CDATA
  1233. }
  1234. continue
  1235. case S.CDATA_ENDING_2:
  1236. if (c === '>') {
  1237. if (parser.cdata) {
  1238. emitNode(parser, 'oncdata', parser.cdata)
  1239. }
  1240. emitNode(parser, 'onclosecdata')
  1241. parser.cdata = ''
  1242. parser.state = S.TEXT
  1243. } else if (c === ']') {
  1244. parser.cdata += ']'
  1245. } else {
  1246. parser.cdata += ']]' + c
  1247. parser.state = S.CDATA
  1248. }
  1249. continue
  1250. case S.PROC_INST:
  1251. if (c === '?') {
  1252. parser.state = S.PROC_INST_ENDING
  1253. } else if (isWhitespace(c)) {
  1254. parser.state = S.PROC_INST_BODY
  1255. } else {
  1256. parser.procInstName += c
  1257. }
  1258. continue
  1259. case S.PROC_INST_BODY:
  1260. if (!parser.procInstBody && isWhitespace(c)) {
  1261. continue
  1262. } else if (c === '?') {
  1263. parser.state = S.PROC_INST_ENDING
  1264. } else {
  1265. parser.procInstBody += c
  1266. }
  1267. continue
  1268. case S.PROC_INST_ENDING:
  1269. if (c === '>') {
  1270. emitNode(parser, 'onprocessinginstruction', {
  1271. name: parser.procInstName,
  1272. body: parser.procInstBody,
  1273. })
  1274. parser.procInstName = parser.procInstBody = ''
  1275. parser.state = S.TEXT
  1276. } else {
  1277. parser.procInstBody += '?' + c
  1278. parser.state = S.PROC_INST_BODY
  1279. }
  1280. continue
  1281. case S.OPEN_TAG:
  1282. if (isMatch(nameBody, c)) {
  1283. parser.tagName += c
  1284. } else {
  1285. newTag(parser)
  1286. if (c === '>') {
  1287. openTag(parser)
  1288. } else if (c === '/') {
  1289. parser.state = S.OPEN_TAG_SLASH
  1290. } else {
  1291. if (!isWhitespace(c)) {
  1292. strictFail(parser, 'Invalid character in tag name')
  1293. }
  1294. parser.state = S.ATTRIB
  1295. }
  1296. }
  1297. continue
  1298. case S.OPEN_TAG_SLASH:
  1299. if (c === '>') {
  1300. openTag(parser, true)
  1301. closeTag(parser)
  1302. } else {
  1303. strictFail(
  1304. parser,
  1305. 'Forward-slash in opening tag not followed by >'
  1306. )
  1307. parser.state = S.ATTRIB
  1308. }
  1309. continue
  1310. case S.ATTRIB:
  1311. // haven't read the attribute name yet.
  1312. if (isWhitespace(c)) {
  1313. continue
  1314. } else if (c === '>') {
  1315. openTag(parser)
  1316. } else if (c === '/') {
  1317. parser.state = S.OPEN_TAG_SLASH
  1318. } else if (isMatch(nameStart, c)) {
  1319. parser.attribName = c
  1320. parser.attribValue = ''
  1321. parser.state = S.ATTRIB_NAME
  1322. } else {
  1323. strictFail(parser, 'Invalid attribute name')
  1324. }
  1325. continue
  1326. case S.ATTRIB_NAME:
  1327. if (c === '=') {
  1328. parser.state = S.ATTRIB_VALUE
  1329. } else if (c === '>') {
  1330. strictFail(parser, 'Attribute without value')
  1331. parser.attribValue = parser.attribName
  1332. attrib(parser)
  1333. openTag(parser)
  1334. } else if (isWhitespace(c)) {
  1335. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1336. } else if (isMatch(nameBody, c)) {
  1337. parser.attribName += c
  1338. } else {
  1339. strictFail(parser, 'Invalid attribute name')
  1340. }
  1341. continue
  1342. case S.ATTRIB_NAME_SAW_WHITE:
  1343. if (c === '=') {
  1344. parser.state = S.ATTRIB_VALUE
  1345. } else if (isWhitespace(c)) {
  1346. continue
  1347. } else {
  1348. strictFail(parser, 'Attribute without value')
  1349. parser.tag.attributes[parser.attribName] = ''
  1350. parser.attribValue = ''
  1351. emitNode(parser, 'onattribute', {
  1352. name: parser.attribName,
  1353. value: '',
  1354. })
  1355. parser.attribName = ''
  1356. if (c === '>') {
  1357. openTag(parser)
  1358. } else if (isMatch(nameStart, c)) {
  1359. parser.attribName = c
  1360. parser.state = S.ATTRIB_NAME
  1361. } else {
  1362. strictFail(parser, 'Invalid attribute name')
  1363. parser.state = S.ATTRIB
  1364. }
  1365. }
  1366. continue
  1367. case S.ATTRIB_VALUE:
  1368. if (isWhitespace(c)) {
  1369. continue
  1370. } else if (isQuote(c)) {
  1371. parser.q = c
  1372. parser.state = S.ATTRIB_VALUE_QUOTED
  1373. } else {
  1374. if (!parser.opt.unquotedAttributeValues) {
  1375. error(parser, 'Unquoted attribute value')
  1376. }
  1377. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1378. parser.attribValue = c
  1379. }
  1380. continue
  1381. case S.ATTRIB_VALUE_QUOTED:
  1382. if (c !== parser.q) {
  1383. if (c === '&') {
  1384. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1385. } else {
  1386. parser.attribValue += c
  1387. }
  1388. continue
  1389. }
  1390. attrib(parser)
  1391. parser.q = ''
  1392. parser.state = S.ATTRIB_VALUE_CLOSED
  1393. continue
  1394. case S.ATTRIB_VALUE_CLOSED:
  1395. if (isWhitespace(c)) {
  1396. parser.state = S.ATTRIB
  1397. } else if (c === '>') {
  1398. openTag(parser)
  1399. } else if (c === '/') {
  1400. parser.state = S.OPEN_TAG_SLASH
  1401. } else if (isMatch(nameStart, c)) {
  1402. strictFail(parser, 'No whitespace between attributes')
  1403. parser.attribName = c
  1404. parser.attribValue = ''
  1405. parser.state = S.ATTRIB_NAME
  1406. } else {
  1407. strictFail(parser, 'Invalid attribute name')
  1408. }
  1409. continue
  1410. case S.ATTRIB_VALUE_UNQUOTED:
  1411. if (!isAttribEnd(c)) {
  1412. if (c === '&') {
  1413. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1414. } else {
  1415. parser.attribValue += c
  1416. }
  1417. continue
  1418. }
  1419. attrib(parser)
  1420. if (c === '>') {
  1421. openTag(parser)
  1422. } else {
  1423. parser.state = S.ATTRIB
  1424. }
  1425. continue
  1426. case S.CLOSE_TAG:
  1427. if (!parser.tagName) {
  1428. if (isWhitespace(c)) {
  1429. continue
  1430. } else if (notMatch(nameStart, c)) {
  1431. if (parser.script) {
  1432. parser.script += '</' + c
  1433. parser.state = S.SCRIPT
  1434. } else {
  1435. strictFail(parser, 'Invalid tagname in closing tag.')
  1436. }
  1437. } else {
  1438. parser.tagName = c
  1439. }
  1440. } else if (c === '>') {
  1441. closeTag(parser)
  1442. } else if (isMatch(nameBody, c)) {
  1443. parser.tagName += c
  1444. } else if (parser.script) {
  1445. parser.script += '</' + parser.tagName + c
  1446. parser.tagName = ''
  1447. parser.state = S.SCRIPT
  1448. } else {
  1449. if (!isWhitespace(c)) {
  1450. strictFail(parser, 'Invalid tagname in closing tag')
  1451. }
  1452. parser.state = S.CLOSE_TAG_SAW_WHITE
  1453. }
  1454. continue
  1455. case S.CLOSE_TAG_SAW_WHITE:
  1456. if (isWhitespace(c)) {
  1457. continue
  1458. }
  1459. if (c === '>') {
  1460. closeTag(parser)
  1461. } else {
  1462. strictFail(parser, 'Invalid characters in closing tag')
  1463. }
  1464. continue
  1465. case S.TEXT_ENTITY:
  1466. case S.ATTRIB_VALUE_ENTITY_Q:
  1467. case S.ATTRIB_VALUE_ENTITY_U:
  1468. var returnState
  1469. var buffer
  1470. switch (parser.state) {
  1471. case S.TEXT_ENTITY:
  1472. returnState = S.TEXT
  1473. buffer = 'textNode'
  1474. break
  1475. case S.ATTRIB_VALUE_ENTITY_Q:
  1476. returnState = S.ATTRIB_VALUE_QUOTED
  1477. buffer = 'attribValue'
  1478. break
  1479. case S.ATTRIB_VALUE_ENTITY_U:
  1480. returnState = S.ATTRIB_VALUE_UNQUOTED
  1481. buffer = 'attribValue'
  1482. break
  1483. }
  1484. if (c === ';') {
  1485. var parsedEntity = parseEntity(parser)
  1486. if (
  1487. parser.opt.unparsedEntities &&
  1488. !Object.values(sax.XML_ENTITIES).includes(parsedEntity)
  1489. ) {
  1490. if ((parser.entityCount += 1) > parser.opt.maxEntityCount) {
  1491. error(
  1492. parser,
  1493. 'Parsed entity count exceeds max entity count'
  1494. )
  1495. }
  1496. if ((parser.entityDepth += 1) > parser.opt.maxEntityDepth) {
  1497. error(
  1498. parser,
  1499. 'Parsed entity depth exceeds max entity depth'
  1500. )
  1501. }
  1502. parser.entity = ''
  1503. parser.state = returnState
  1504. parser.write(parsedEntity)
  1505. parser.entityDepth -= 1
  1506. } else {
  1507. parser[buffer] += parsedEntity
  1508. parser.entity = ''
  1509. parser.state = returnState
  1510. }
  1511. } else if (
  1512. isMatch(parser.entity.length ? entityBody : entityStart, c)
  1513. ) {
  1514. parser.entity += c
  1515. } else {
  1516. strictFail(parser, 'Invalid character in entity name')
  1517. parser[buffer] += '&' + parser.entity + c
  1518. parser.entity = ''
  1519. parser.state = returnState
  1520. }
  1521. continue
  1522. default: /* istanbul ignore next */ {
  1523. throw new Error(parser, 'Unknown state: ' + parser.state)
  1524. }
  1525. }
  1526. } // while
  1527. if (parser.position >= parser.bufferCheckPosition) {
  1528. checkBufferLength(parser)
  1529. }
  1530. return parser
  1531. }
  1532. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1533. /* istanbul ignore next */
  1534. if (!String.fromCodePoint) {
  1535. ;(function () {
  1536. var stringFromCharCode = String.fromCharCode
  1537. var floor = Math.floor
  1538. var fromCodePoint = function () {
  1539. var MAX_SIZE = 0x4000
  1540. var codeUnits = []
  1541. var highSurrogate
  1542. var lowSurrogate
  1543. var index = -1
  1544. var length = arguments.length
  1545. if (!length) {
  1546. return ''
  1547. }
  1548. var result = ''
  1549. while (++index < length) {
  1550. var codePoint = Number(arguments[index])
  1551. if (
  1552. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1553. codePoint < 0 || // not a valid Unicode code point
  1554. codePoint > 0x10ffff || // not a valid Unicode code point
  1555. floor(codePoint) !== codePoint // not an integer
  1556. ) {
  1557. throw RangeError('Invalid code point: ' + codePoint)
  1558. }
  1559. if (codePoint <= 0xffff) {
  1560. // BMP code point
  1561. codeUnits.push(codePoint)
  1562. } else {
  1563. // Astral code point; split in surrogate halves
  1564. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1565. codePoint -= 0x10000
  1566. highSurrogate = (codePoint >> 10) + 0xd800
  1567. lowSurrogate = (codePoint % 0x400) + 0xdc00
  1568. codeUnits.push(highSurrogate, lowSurrogate)
  1569. }
  1570. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1571. result += stringFromCharCode.apply(null, codeUnits)
  1572. codeUnits.length = 0
  1573. }
  1574. }
  1575. return result
  1576. }
  1577. /* istanbul ignore next */
  1578. if (Object.defineProperty) {
  1579. Object.defineProperty(String, 'fromCodePoint', {
  1580. value: fromCodePoint,
  1581. configurable: true,
  1582. writable: true,
  1583. })
  1584. } else {
  1585. String.fromCodePoint = fromCodePoint
  1586. }
  1587. })()
  1588. }
  1589. })(typeof exports === 'undefined' ? (this.sax = {}) : exports)