sax.js 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
  1. ;(function (sax) {
  2. // wrapper for non-node envs
  3. sax.parser = function (strict, opt) {
  4. return new SAXParser(strict, opt)
  5. }
  6. sax.SAXParser = SAXParser
  7. sax.SAXStream = SAXStream
  8. sax.createStream = createStream
  9. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  10. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  11. // since that's the earliest that a buffer overrun could occur. This way, checks are
  12. // as rare as required, but as often as necessary to ensure never crossing this bound.
  13. // Furthermore, buffers are only tested at most once per write(), so passing a very
  14. // large string into write() might have undesirable effects, but this is manageable by
  15. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  16. // edge case, result in creating at most one complete copy of the string passed in.
  17. // Set to Infinity to have unlimited buffers.
  18. sax.MAX_BUFFER_LENGTH = 64 * 1024
  19. var buffers = [
  20. 'comment',
  21. 'sgmlDecl',
  22. 'textNode',
  23. 'tagName',
  24. 'doctype',
  25. 'procInstName',
  26. 'procInstBody',
  27. 'entity',
  28. 'attribName',
  29. 'attribValue',
  30. 'cdata',
  31. 'script',
  32. ]
  33. sax.EVENTS = [
  34. 'text',
  35. 'processinginstruction',
  36. 'sgmldeclaration',
  37. 'doctype',
  38. 'comment',
  39. 'opentagstart',
  40. 'attribute',
  41. 'opentag',
  42. 'closetag',
  43. 'opencdata',
  44. 'cdata',
  45. 'closecdata',
  46. 'error',
  47. 'end',
  48. 'ready',
  49. 'script',
  50. 'opennamespace',
  51. 'closenamespace',
  52. ]
  53. function SAXParser(strict, opt) {
  54. if (!(this instanceof SAXParser)) {
  55. return new SAXParser(strict, opt)
  56. }
  57. var parser = this
  58. clearBuffers(parser)
  59. parser.q = parser.c = ''
  60. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  61. parser.encoding = null;
  62. parser.opt = opt || {}
  63. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  64. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  65. parser.opt.maxEntityCount = parser.opt.maxEntityCount || 512
  66. parser.opt.maxEntityDepth = parser.opt.maxEntityDepth || 4
  67. parser.entityCount = parser.entityDepth = 0
  68. parser.tags = []
  69. parser.closed = parser.closedRoot = parser.sawRoot = false
  70. parser.tag = parser.error = null
  71. parser.strict = !!strict
  72. parser.noscript = !!(strict || parser.opt.noscript)
  73. parser.state = S.BEGIN
  74. parser.strictEntities = parser.opt.strictEntities
  75. parser.ENTITIES =
  76. parser.strictEntities ?
  77. Object.create(sax.XML_ENTITIES)
  78. : Object.create(sax.ENTITIES)
  79. parser.attribList = []
  80. // namespaces form a prototype chain.
  81. // it always points at the current tag,
  82. // which protos to its parent tag.
  83. if (parser.opt.xmlns) {
  84. parser.ns = Object.create(rootNS)
  85. }
  86. // disallow unquoted attribute values if not otherwise configured
  87. // and strict mode is true
  88. if (parser.opt.unquotedAttributeValues === undefined) {
  89. parser.opt.unquotedAttributeValues = !strict
  90. }
  91. // mostly just for error reporting
  92. parser.trackPosition = parser.opt.position !== false
  93. if (parser.trackPosition) {
  94. parser.position = parser.line = parser.column = 0
  95. }
  96. emit(parser, 'onready')
  97. }
  98. if (!Object.create) {
  99. Object.create = function (o) {
  100. function F() {}
  101. F.prototype = o
  102. var newf = new F()
  103. return newf
  104. }
  105. }
  106. if (!Object.keys) {
  107. Object.keys = function (o) {
  108. var a = []
  109. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  110. return a
  111. }
  112. }
  113. function checkBufferLength(parser) {
  114. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  115. var maxActual = 0
  116. for (var i = 0, l = buffers.length; i < l; i++) {
  117. var len = parser[buffers[i]].length
  118. if (len > maxAllowed) {
  119. // Text/cdata nodes can get big, and since they're buffered,
  120. // we can get here under normal conditions.
  121. // Avoid issues by emitting the text node now,
  122. // so at least it won't get any bigger.
  123. switch (buffers[i]) {
  124. case 'textNode':
  125. closeText(parser)
  126. break
  127. case 'cdata':
  128. emitNode(parser, 'oncdata', parser.cdata)
  129. parser.cdata = ''
  130. break
  131. case 'script':
  132. emitNode(parser, 'onscript', parser.script)
  133. parser.script = ''
  134. break
  135. default:
  136. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  137. }
  138. }
  139. maxActual = Math.max(maxActual, len)
  140. }
  141. // schedule the next check for the earliest possible buffer overrun.
  142. var m = sax.MAX_BUFFER_LENGTH - maxActual
  143. parser.bufferCheckPosition = m + parser.position
  144. }
  145. function clearBuffers(parser) {
  146. for (var i = 0, l = buffers.length; i < l; i++) {
  147. parser[buffers[i]] = ''
  148. }
  149. }
  150. function flushBuffers(parser) {
  151. closeText(parser)
  152. if (parser.cdata !== '') {
  153. emitNode(parser, 'oncdata', parser.cdata)
  154. parser.cdata = ''
  155. }
  156. if (parser.script !== '') {
  157. emitNode(parser, 'onscript', parser.script)
  158. parser.script = ''
  159. }
  160. }
  161. SAXParser.prototype = {
  162. end: function () {
  163. end(this)
  164. },
  165. write: write,
  166. resume: function () {
  167. this.error = null
  168. return this
  169. },
  170. close: function () {
  171. return this.write(null)
  172. },
  173. flush: function () {
  174. flushBuffers(this)
  175. },
  176. }
  177. var Stream
  178. try {
  179. Stream = require('stream').Stream
  180. } catch (ex) {
  181. Stream = function () {}
  182. }
  183. if (!Stream) Stream = function () {}
  184. var streamWraps = sax.EVENTS.filter(function (ev) {
  185. return ev !== 'error' && ev !== 'end'
  186. })
  187. function createStream(strict, opt) {
  188. return new SAXStream(strict, opt)
  189. }
  190. function determineBufferEncoding(data, isEnd) {
  191. // BOM-based detection is the most reliable signal when present.
  192. if (data.length >= 2) {
  193. if (data[0] === 0xff && data[1] === 0xfe) {
  194. return 'utf-16le'
  195. }
  196. if (data[0] === 0xfe && data[1] === 0xff) {
  197. return 'utf-16be'
  198. }
  199. }
  200. if (data.length >= 3 && data[0] === 0xef && data[1] === 0xbb && data[2] === 0xbf) {
  201. return 'utf8'
  202. }
  203. if (data.length >= 4) {
  204. // XML documents without a BOM still start with "<?xml", which is enough
  205. // to distinguish UTF-16LE/BE from UTF-8 by looking at the zero bytes.
  206. if (data[0] === 0x3c && data[1] === 0x00 && data[2] === 0x3f && data[3] === 0x00) {
  207. return 'utf-16le'
  208. }
  209. if (data[0] === 0x00 && data[1] === 0x3c && data[2] === 0x00 && data[3] === 0x3f) {
  210. return 'utf-16be'
  211. }
  212. return 'utf8'
  213. }
  214. return isEnd ? 'utf8' : null
  215. }
  216. function SAXStream(strict, opt) {
  217. if (!(this instanceof SAXStream)) {
  218. return new SAXStream(strict, opt)
  219. }
  220. Stream.apply(this)
  221. this._parser = new SAXParser(strict, opt)
  222. this.writable = true
  223. this.readable = true
  224. var me = this
  225. this._parser.onend = function () {
  226. me.emit('end')
  227. }
  228. this._parser.onerror = function (er) {
  229. me.emit('error', er)
  230. // if didn't throw, then means error was handled.
  231. // go ahead and clear error, so we can write again.
  232. me._parser.error = null
  233. }
  234. this._decoder = null
  235. this._decoderBuffer = null
  236. streamWraps.forEach(function (ev) {
  237. Object.defineProperty(me, 'on' + ev, {
  238. get: function () {
  239. return me._parser['on' + ev]
  240. },
  241. set: function (h) {
  242. if (!h) {
  243. me.removeAllListeners(ev)
  244. me._parser['on' + ev] = h
  245. return h
  246. }
  247. me.on(ev, h)
  248. },
  249. enumerable: true,
  250. configurable: false,
  251. })
  252. })
  253. }
  254. SAXStream.prototype = Object.create(Stream.prototype, {
  255. constructor: {
  256. value: SAXStream,
  257. },
  258. })
  259. SAXStream.prototype._decodeBuffer = function (data, isEnd) {
  260. if (this._decoderBuffer) {
  261. // Keep incomplete leading bytes until we have enough data to infer the
  262. // stream encoding, then decode the buffered prefix together with the next chunk.
  263. data = Buffer.concat([this._decoderBuffer, data])
  264. this._decoderBuffer = null
  265. }
  266. if (!this._decoder) {
  267. var encoding = determineBufferEncoding(data, isEnd)
  268. if (!encoding) {
  269. // A very short first chunk may not contain enough bytes to detect the
  270. // encoding yet, so defer decoding until the next write/end call.
  271. this._decoderBuffer = data
  272. return ''
  273. }
  274. // Store the detected transport encoding so strict mode can compare it
  275. // with the optional encoding declared in the XML prolog later on.
  276. this._parser.encoding = encoding
  277. this._decoder = new TextDecoder(encoding)
  278. }
  279. return this._decoder.decode(data, { stream: !isEnd })
  280. }
  281. SAXStream.prototype.write = function (data) {
  282. if (
  283. typeof Buffer === 'function' &&
  284. typeof Buffer.isBuffer === 'function' &&
  285. Buffer.isBuffer(data)
  286. ) {
  287. data = this._decodeBuffer(data, false)
  288. } else if (this._decoderBuffer) {
  289. // Flush any buffered binary prefix before handling a string chunk.
  290. // This only matters if the caller mixes Buffer and string writes (used in test).
  291. var remaining = this._decodeBuffer(Buffer.alloc(0), true)
  292. if (remaining) {
  293. this._parser.write(remaining)
  294. this.emit('data', remaining)
  295. }
  296. }
  297. this._parser.write(data.toString())
  298. this.emit('data', data)
  299. return true
  300. }
  301. SAXStream.prototype.end = function (chunk) {
  302. if (chunk && chunk.length) {
  303. this.write(chunk)
  304. }
  305. // Flush any remaining decoded data from the TextDecoder
  306. if (this._decoderBuffer) {
  307. var finalChunk = this._decodeBuffer(Buffer.alloc(0), true)
  308. if (finalChunk) {
  309. this._parser.write(finalChunk)
  310. this.emit('data', finalChunk)
  311. }
  312. } else if (this._decoder) {
  313. var remaining = this._decoder.decode()
  314. if (remaining) {
  315. this._parser.write(remaining)
  316. this.emit('data', remaining)
  317. }
  318. }
  319. this._parser.end()
  320. return true
  321. }
  322. SAXStream.prototype.on = function (ev, handler) {
  323. var me = this
  324. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  325. me._parser['on' + ev] = function () {
  326. var args =
  327. arguments.length === 1 ?
  328. [arguments[0]]
  329. : Array.apply(null, arguments)
  330. args.splice(0, 0, ev)
  331. me.emit.apply(me, args)
  332. }
  333. }
  334. return Stream.prototype.on.call(me, ev, handler)
  335. }
  336. // this really needs to be replaced with character classes.
  337. // XML allows all manner of ridiculous numbers and digits.
  338. var CDATA = '[CDATA['
  339. var DOCTYPE = 'DOCTYPE'
  340. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  341. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  342. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  343. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  344. // This implementation works on strings, a single character at a time
  345. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  346. // without a significant breaking change to either this parser, or the
  347. // JavaScript language. Implementation of an emoji-capable xml parser
  348. // is left as an exercise for the reader.
  349. var nameStart =
  350. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  351. var nameBody =
  352. /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  353. var entityStart =
  354. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  355. var entityBody =
  356. /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
  357. function isWhitespace(c) {
  358. return c === ' ' || c === '\n' || c === '\r' || c === '\t'
  359. }
  360. function isQuote(c) {
  361. return c === '"' || c === "'"
  362. }
  363. function isAttribEnd(c) {
  364. return c === '>' || isWhitespace(c)
  365. }
  366. function isMatch(regex, c) {
  367. return regex.test(c)
  368. }
  369. function notMatch(regex, c) {
  370. return !isMatch(regex, c)
  371. }
  372. var S = 0
  373. sax.STATE = {
  374. BEGIN: S++, // leading byte order mark or whitespace
  375. BEGIN_WHITESPACE: S++, // leading whitespace
  376. TEXT: S++, // general stuff
  377. TEXT_ENTITY: S++, // &amp and such.
  378. OPEN_WAKA: S++, // <
  379. SGML_DECL: S++, // <!BLARG
  380. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  381. DOCTYPE: S++, // <!DOCTYPE
  382. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  383. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  384. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  385. COMMENT_STARTING: S++, // <!-
  386. COMMENT: S++, // <!--
  387. COMMENT_ENDING: S++, // <!-- blah -
  388. COMMENT_ENDED: S++, // <!-- blah --
  389. CDATA: S++, // <![CDATA[ something
  390. CDATA_ENDING: S++, // ]
  391. CDATA_ENDING_2: S++, // ]]
  392. PROC_INST: S++, // <?hi
  393. PROC_INST_BODY: S++, // <?hi there
  394. PROC_INST_ENDING: S++, // <?hi "there" ?
  395. OPEN_TAG: S++, // <strong
  396. OPEN_TAG_SLASH: S++, // <strong /
  397. ATTRIB: S++, // <a
  398. ATTRIB_NAME: S++, // <a foo
  399. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  400. ATTRIB_VALUE: S++, // <a foo=
  401. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  402. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  403. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  404. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  405. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  406. CLOSE_TAG: S++, // </a
  407. CLOSE_TAG_SAW_WHITE: S++, // </a >
  408. SCRIPT: S++, // <script> ...
  409. SCRIPT_ENDING: S++, // <script> ... <
  410. }
  411. sax.XML_ENTITIES = {
  412. amp: '&',
  413. gt: '>',
  414. lt: '<',
  415. quot: '"',
  416. apos: "'",
  417. }
  418. sax.ENTITIES = {
  419. amp: '&',
  420. gt: '>',
  421. lt: '<',
  422. quot: '"',
  423. apos: "'",
  424. AElig: 198,
  425. Aacute: 193,
  426. Acirc: 194,
  427. Agrave: 192,
  428. Aring: 197,
  429. Atilde: 195,
  430. Auml: 196,
  431. Ccedil: 199,
  432. ETH: 208,
  433. Eacute: 201,
  434. Ecirc: 202,
  435. Egrave: 200,
  436. Euml: 203,
  437. Iacute: 205,
  438. Icirc: 206,
  439. Igrave: 204,
  440. Iuml: 207,
  441. Ntilde: 209,
  442. Oacute: 211,
  443. Ocirc: 212,
  444. Ograve: 210,
  445. Oslash: 216,
  446. Otilde: 213,
  447. Ouml: 214,
  448. THORN: 222,
  449. Uacute: 218,
  450. Ucirc: 219,
  451. Ugrave: 217,
  452. Uuml: 220,
  453. Yacute: 221,
  454. aacute: 225,
  455. acirc: 226,
  456. aelig: 230,
  457. agrave: 224,
  458. aring: 229,
  459. atilde: 227,
  460. auml: 228,
  461. ccedil: 231,
  462. eacute: 233,
  463. ecirc: 234,
  464. egrave: 232,
  465. eth: 240,
  466. euml: 235,
  467. iacute: 237,
  468. icirc: 238,
  469. igrave: 236,
  470. iuml: 239,
  471. ntilde: 241,
  472. oacute: 243,
  473. ocirc: 244,
  474. ograve: 242,
  475. oslash: 248,
  476. otilde: 245,
  477. ouml: 246,
  478. szlig: 223,
  479. thorn: 254,
  480. uacute: 250,
  481. ucirc: 251,
  482. ugrave: 249,
  483. uuml: 252,
  484. yacute: 253,
  485. yuml: 255,
  486. copy: 169,
  487. reg: 174,
  488. nbsp: 160,
  489. iexcl: 161,
  490. cent: 162,
  491. pound: 163,
  492. curren: 164,
  493. yen: 165,
  494. brvbar: 166,
  495. sect: 167,
  496. uml: 168,
  497. ordf: 170,
  498. laquo: 171,
  499. not: 172,
  500. shy: 173,
  501. macr: 175,
  502. deg: 176,
  503. plusmn: 177,
  504. sup1: 185,
  505. sup2: 178,
  506. sup3: 179,
  507. acute: 180,
  508. micro: 181,
  509. para: 182,
  510. middot: 183,
  511. cedil: 184,
  512. ordm: 186,
  513. raquo: 187,
  514. frac14: 188,
  515. frac12: 189,
  516. frac34: 190,
  517. iquest: 191,
  518. times: 215,
  519. divide: 247,
  520. OElig: 338,
  521. oelig: 339,
  522. Scaron: 352,
  523. scaron: 353,
  524. Yuml: 376,
  525. fnof: 402,
  526. circ: 710,
  527. tilde: 732,
  528. Alpha: 913,
  529. Beta: 914,
  530. Gamma: 915,
  531. Delta: 916,
  532. Epsilon: 917,
  533. Zeta: 918,
  534. Eta: 919,
  535. Theta: 920,
  536. Iota: 921,
  537. Kappa: 922,
  538. Lambda: 923,
  539. Mu: 924,
  540. Nu: 925,
  541. Xi: 926,
  542. Omicron: 927,
  543. Pi: 928,
  544. Rho: 929,
  545. Sigma: 931,
  546. Tau: 932,
  547. Upsilon: 933,
  548. Phi: 934,
  549. Chi: 935,
  550. Psi: 936,
  551. Omega: 937,
  552. alpha: 945,
  553. beta: 946,
  554. gamma: 947,
  555. delta: 948,
  556. epsilon: 949,
  557. zeta: 950,
  558. eta: 951,
  559. theta: 952,
  560. iota: 953,
  561. kappa: 954,
  562. lambda: 955,
  563. mu: 956,
  564. nu: 957,
  565. xi: 958,
  566. omicron: 959,
  567. pi: 960,
  568. rho: 961,
  569. sigmaf: 962,
  570. sigma: 963,
  571. tau: 964,
  572. upsilon: 965,
  573. phi: 966,
  574. chi: 967,
  575. psi: 968,
  576. omega: 969,
  577. thetasym: 977,
  578. upsih: 978,
  579. piv: 982,
  580. ensp: 8194,
  581. emsp: 8195,
  582. thinsp: 8201,
  583. zwnj: 8204,
  584. zwj: 8205,
  585. lrm: 8206,
  586. rlm: 8207,
  587. ndash: 8211,
  588. mdash: 8212,
  589. lsquo: 8216,
  590. rsquo: 8217,
  591. sbquo: 8218,
  592. ldquo: 8220,
  593. rdquo: 8221,
  594. bdquo: 8222,
  595. dagger: 8224,
  596. Dagger: 8225,
  597. bull: 8226,
  598. hellip: 8230,
  599. permil: 8240,
  600. prime: 8242,
  601. Prime: 8243,
  602. lsaquo: 8249,
  603. rsaquo: 8250,
  604. oline: 8254,
  605. frasl: 8260,
  606. euro: 8364,
  607. image: 8465,
  608. weierp: 8472,
  609. real: 8476,
  610. trade: 8482,
  611. alefsym: 8501,
  612. larr: 8592,
  613. uarr: 8593,
  614. rarr: 8594,
  615. darr: 8595,
  616. harr: 8596,
  617. crarr: 8629,
  618. lArr: 8656,
  619. uArr: 8657,
  620. rArr: 8658,
  621. dArr: 8659,
  622. hArr: 8660,
  623. forall: 8704,
  624. part: 8706,
  625. exist: 8707,
  626. empty: 8709,
  627. nabla: 8711,
  628. isin: 8712,
  629. notin: 8713,
  630. ni: 8715,
  631. prod: 8719,
  632. sum: 8721,
  633. minus: 8722,
  634. lowast: 8727,
  635. radic: 8730,
  636. prop: 8733,
  637. infin: 8734,
  638. ang: 8736,
  639. and: 8743,
  640. or: 8744,
  641. cap: 8745,
  642. cup: 8746,
  643. int: 8747,
  644. there4: 8756,
  645. sim: 8764,
  646. cong: 8773,
  647. asymp: 8776,
  648. ne: 8800,
  649. equiv: 8801,
  650. le: 8804,
  651. ge: 8805,
  652. sub: 8834,
  653. sup: 8835,
  654. nsub: 8836,
  655. sube: 8838,
  656. supe: 8839,
  657. oplus: 8853,
  658. otimes: 8855,
  659. perp: 8869,
  660. sdot: 8901,
  661. lceil: 8968,
  662. rceil: 8969,
  663. lfloor: 8970,
  664. rfloor: 8971,
  665. lang: 9001,
  666. rang: 9002,
  667. loz: 9674,
  668. spades: 9824,
  669. clubs: 9827,
  670. hearts: 9829,
  671. diams: 9830,
  672. }
  673. Object.keys(sax.ENTITIES).forEach(function (key) {
  674. var e = sax.ENTITIES[key]
  675. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  676. sax.ENTITIES[key] = s
  677. })
  678. for (var s in sax.STATE) {
  679. sax.STATE[sax.STATE[s]] = s
  680. }
  681. // shorthand
  682. S = sax.STATE
  683. function emit(parser, event, data) {
  684. parser[event] && parser[event](data)
  685. }
  686. function getDeclaredEncoding(body) {
  687. var match = body && body.match(/(?:^|\s)encoding\s*=\s*(['"])([^'"]+)\1/i)
  688. return match ? match[2] : null
  689. }
  690. function normalizeEncodingName(encoding) {
  691. if (!encoding) {
  692. return null
  693. }
  694. return encoding.toLowerCase().replace(/[^a-z0-9]/g, '')
  695. }
  696. function encodingsMatch(detectedEncoding, declaredEncoding) {
  697. const detected = normalizeEncodingName(detectedEncoding)
  698. const declared = normalizeEncodingName(declaredEncoding)
  699. if (!detected || !declared) {
  700. return true
  701. }
  702. if (declared === 'utf16') {
  703. return detected === 'utf16le' || detected === 'utf16be'
  704. }
  705. return detected === declared
  706. }
  707. function validateXmlDeclarationEncoding(parser, data) {
  708. if (
  709. !parser.strict ||
  710. !parser.encoding ||
  711. !data ||
  712. data.name !== 'xml'
  713. ) {
  714. return
  715. }
  716. var declaredEncoding = getDeclaredEncoding(data.body)
  717. if (
  718. declaredEncoding &&
  719. !encodingsMatch(parser.encoding, declaredEncoding)
  720. ) {
  721. strictFail(
  722. parser,
  723. 'XML declaration encoding ' +
  724. declaredEncoding +
  725. ' does not match detected stream encoding ' +
  726. parser.encoding.toUpperCase()
  727. )
  728. }
  729. }
  730. function emitNode(parser, nodeType, data) {
  731. if (parser.textNode) closeText(parser)
  732. emit(parser, nodeType, data)
  733. }
  734. function closeText(parser) {
  735. parser.textNode = textopts(parser.opt, parser.textNode)
  736. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  737. parser.textNode = ''
  738. }
  739. function textopts(opt, text) {
  740. if (opt.trim) text = text.trim()
  741. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  742. return text
  743. }
  744. function error(parser, er) {
  745. closeText(parser)
  746. if (parser.trackPosition) {
  747. er +=
  748. '\nLine: ' +
  749. parser.line +
  750. '\nColumn: ' +
  751. parser.column +
  752. '\nChar: ' +
  753. parser.c
  754. }
  755. er = new Error(er)
  756. parser.error = er
  757. emit(parser, 'onerror', er)
  758. return parser
  759. }
  760. function end(parser) {
  761. if (parser.sawRoot && !parser.closedRoot)
  762. strictFail(parser, 'Unclosed root tag')
  763. if (
  764. parser.state !== S.BEGIN &&
  765. parser.state !== S.BEGIN_WHITESPACE &&
  766. parser.state !== S.TEXT
  767. ) {
  768. error(parser, 'Unexpected end')
  769. }
  770. closeText(parser)
  771. parser.c = ''
  772. parser.closed = true
  773. emit(parser, 'onend')
  774. SAXParser.call(parser, parser.strict, parser.opt)
  775. return parser
  776. }
  777. function strictFail(parser, message) {
  778. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  779. throw new Error('bad call to strictFail')
  780. }
  781. if (parser.strict) {
  782. error(parser, message)
  783. }
  784. }
  785. function newTag(parser) {
  786. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  787. var parent = parser.tags[parser.tags.length - 1] || parser
  788. var tag = (parser.tag = { name: parser.tagName, attributes: {} })
  789. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  790. if (parser.opt.xmlns) {
  791. tag.ns = parent.ns
  792. }
  793. parser.attribList.length = 0
  794. emitNode(parser, 'onopentagstart', tag)
  795. }
  796. function qname(name, attribute) {
  797. var i = name.indexOf(':')
  798. var qualName = i < 0 ? ['', name] : name.split(':')
  799. var prefix = qualName[0]
  800. var local = qualName[1]
  801. // <x "xmlns"="http://foo">
  802. if (attribute && name === 'xmlns') {
  803. prefix = 'xmlns'
  804. local = ''
  805. }
  806. return { prefix: prefix, local: local }
  807. }
  808. function attrib(parser) {
  809. if (!parser.strict) {
  810. parser.attribName = parser.attribName[parser.looseCase]()
  811. }
  812. if (
  813. parser.attribList.indexOf(parser.attribName) !== -1 ||
  814. parser.tag.attributes.hasOwnProperty(parser.attribName)
  815. ) {
  816. parser.attribName = parser.attribValue = ''
  817. return
  818. }
  819. if (parser.opt.xmlns) {
  820. var qn = qname(parser.attribName, true)
  821. var prefix = qn.prefix
  822. var local = qn.local
  823. if (prefix === 'xmlns') {
  824. // namespace binding attribute. push the binding into scope
  825. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  826. strictFail(
  827. parser,
  828. 'xml: prefix must be bound to ' +
  829. XML_NAMESPACE +
  830. '\n' +
  831. 'Actual: ' +
  832. parser.attribValue
  833. )
  834. } else if (
  835. local === 'xmlns' &&
  836. parser.attribValue !== XMLNS_NAMESPACE
  837. ) {
  838. strictFail(
  839. parser,
  840. 'xmlns: prefix must be bound to ' +
  841. XMLNS_NAMESPACE +
  842. '\n' +
  843. 'Actual: ' +
  844. parser.attribValue
  845. )
  846. } else {
  847. var tag = parser.tag
  848. var parent = parser.tags[parser.tags.length - 1] || parser
  849. if (tag.ns === parent.ns) {
  850. tag.ns = Object.create(parent.ns)
  851. }
  852. tag.ns[local] = parser.attribValue
  853. }
  854. }
  855. // defer onattribute events until all attributes have been seen
  856. // so any new bindings can take effect. preserve attribute order
  857. // so deferred events can be emitted in document order
  858. parser.attribList.push([parser.attribName, parser.attribValue])
  859. } else {
  860. // in non-xmlns mode, we can emit the event right away
  861. parser.tag.attributes[parser.attribName] = parser.attribValue
  862. emitNode(parser, 'onattribute', {
  863. name: parser.attribName,
  864. value: parser.attribValue,
  865. })
  866. }
  867. parser.attribName = parser.attribValue = ''
  868. }
  869. function openTag(parser, selfClosing) {
  870. if (parser.opt.xmlns) {
  871. // emit namespace binding events
  872. var tag = parser.tag
  873. // add namespace info to tag
  874. var qn = qname(parser.tagName)
  875. tag.prefix = qn.prefix
  876. tag.local = qn.local
  877. tag.uri = tag.ns[qn.prefix] || ''
  878. if (tag.prefix && !tag.uri) {
  879. strictFail(
  880. parser,
  881. 'Unbound namespace prefix: ' + JSON.stringify(parser.tagName)
  882. )
  883. tag.uri = qn.prefix
  884. }
  885. var parent = parser.tags[parser.tags.length - 1] || parser
  886. if (tag.ns && parent.ns !== tag.ns) {
  887. Object.keys(tag.ns).forEach(function (p) {
  888. emitNode(parser, 'onopennamespace', {
  889. prefix: p,
  890. uri: tag.ns[p],
  891. })
  892. })
  893. }
  894. // handle deferred onattribute events
  895. // Note: do not apply default ns to attributes:
  896. // http://www.w3.org/TR/REC-xml-names/#defaulting
  897. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  898. var nv = parser.attribList[i]
  899. var name = nv[0]
  900. var value = nv[1]
  901. var qualName = qname(name, true)
  902. var prefix = qualName.prefix
  903. var local = qualName.local
  904. var uri = prefix === '' ? '' : tag.ns[prefix] || ''
  905. var a = {
  906. name: name,
  907. value: value,
  908. prefix: prefix,
  909. local: local,
  910. uri: uri,
  911. }
  912. // if there's any attributes with an undefined namespace,
  913. // then fail on them now.
  914. if (prefix && prefix !== 'xmlns' && !uri) {
  915. strictFail(
  916. parser,
  917. 'Unbound namespace prefix: ' + JSON.stringify(prefix)
  918. )
  919. a.uri = prefix
  920. }
  921. parser.tag.attributes[name] = a
  922. emitNode(parser, 'onattribute', a)
  923. }
  924. parser.attribList.length = 0
  925. }
  926. parser.tag.isSelfClosing = !!selfClosing
  927. // process the tag
  928. parser.sawRoot = true
  929. parser.tags.push(parser.tag)
  930. emitNode(parser, 'onopentag', parser.tag)
  931. if (!selfClosing) {
  932. // special case for <script> in non-strict mode.
  933. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  934. parser.state = S.SCRIPT
  935. } else {
  936. parser.state = S.TEXT
  937. }
  938. parser.tag = null
  939. parser.tagName = ''
  940. }
  941. parser.attribName = parser.attribValue = ''
  942. parser.attribList.length = 0
  943. }
  944. function closeTag(parser) {
  945. if (!parser.tagName) {
  946. strictFail(parser, 'Weird empty close tag.')
  947. parser.textNode += '</>'
  948. parser.state = S.TEXT
  949. return
  950. }
  951. if (parser.script) {
  952. if (parser.tagName !== 'script') {
  953. parser.script += '</' + parser.tagName + '>'
  954. parser.tagName = ''
  955. parser.state = S.SCRIPT
  956. return
  957. }
  958. emitNode(parser, 'onscript', parser.script)
  959. parser.script = ''
  960. }
  961. // first make sure that the closing tag actually exists.
  962. // <a><b></c></b></a> will close everything, otherwise.
  963. var t = parser.tags.length
  964. var tagName = parser.tagName
  965. if (!parser.strict) {
  966. tagName = tagName[parser.looseCase]()
  967. }
  968. var closeTo = tagName
  969. while (t--) {
  970. var close = parser.tags[t]
  971. if (close.name !== closeTo) {
  972. // fail the first time in strict mode
  973. strictFail(parser, 'Unexpected close tag')
  974. } else {
  975. break
  976. }
  977. }
  978. // didn't find it. we already failed for strict, so just abort.
  979. if (t < 0) {
  980. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  981. parser.textNode += '</' + parser.tagName + '>'
  982. parser.state = S.TEXT
  983. return
  984. }
  985. parser.tagName = tagName
  986. var s = parser.tags.length
  987. while (s-- > t) {
  988. var tag = (parser.tag = parser.tags.pop())
  989. parser.tagName = parser.tag.name
  990. emitNode(parser, 'onclosetag', parser.tagName)
  991. var x = {}
  992. for (var i in tag.ns) {
  993. x[i] = tag.ns[i]
  994. }
  995. var parent = parser.tags[parser.tags.length - 1] || parser
  996. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  997. // remove namespace bindings introduced by tag
  998. Object.keys(tag.ns).forEach(function (p) {
  999. var n = tag.ns[p]
  1000. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  1001. })
  1002. }
  1003. }
  1004. if (t === 0) parser.closedRoot = true
  1005. parser.tagName = parser.attribValue = parser.attribName = ''
  1006. parser.attribList.length = 0
  1007. parser.state = S.TEXT
  1008. }
  1009. function parseEntity(parser) {
  1010. var entity = parser.entity
  1011. var entityLC = entity.toLowerCase()
  1012. var num
  1013. var numStr = ''
  1014. if (parser.ENTITIES[entity]) {
  1015. return parser.ENTITIES[entity]
  1016. }
  1017. if (parser.ENTITIES[entityLC]) {
  1018. return parser.ENTITIES[entityLC]
  1019. }
  1020. entity = entityLC
  1021. if (entity.charAt(0) === '#') {
  1022. if (entity.charAt(1) === 'x') {
  1023. entity = entity.slice(2)
  1024. num = parseInt(entity, 16)
  1025. numStr = num.toString(16)
  1026. } else {
  1027. entity = entity.slice(1)
  1028. num = parseInt(entity, 10)
  1029. numStr = num.toString(10)
  1030. }
  1031. }
  1032. entity = entity.replace(/^0+/, '')
  1033. if (
  1034. isNaN(num) ||
  1035. numStr.toLowerCase() !== entity ||
  1036. num < 0 ||
  1037. num > 0x10ffff
  1038. ) {
  1039. strictFail(parser, 'Invalid character entity')
  1040. return '&' + parser.entity + ';'
  1041. }
  1042. return String.fromCodePoint(num)
  1043. }
  1044. function beginWhiteSpace(parser, c) {
  1045. if (c === '<') {
  1046. parser.state = S.OPEN_WAKA
  1047. parser.startTagPosition = parser.position
  1048. } else if (!isWhitespace(c)) {
  1049. // have to process this as a text node.
  1050. // weird, but happens.
  1051. strictFail(parser, 'Non-whitespace before first tag.')
  1052. parser.textNode = c
  1053. parser.state = S.TEXT
  1054. }
  1055. }
  1056. function charAt(chunk, i) {
  1057. var result = ''
  1058. if (i < chunk.length) {
  1059. result = chunk.charAt(i)
  1060. }
  1061. return result
  1062. }
  1063. function write(chunk) {
  1064. var parser = this
  1065. if (this.error) {
  1066. throw this.error
  1067. }
  1068. if (parser.closed) {
  1069. return error(
  1070. parser,
  1071. 'Cannot write after close. Assign an onready handler.'
  1072. )
  1073. }
  1074. if (chunk === null) {
  1075. return end(parser)
  1076. }
  1077. if (typeof chunk === 'object') {
  1078. chunk = chunk.toString()
  1079. }
  1080. var i = 0
  1081. var c = ''
  1082. while (true) {
  1083. c = charAt(chunk, i++)
  1084. parser.c = c
  1085. if (!c) {
  1086. break
  1087. }
  1088. if (parser.trackPosition) {
  1089. parser.position++
  1090. if (c === '\n') {
  1091. parser.line++
  1092. parser.column = 0
  1093. } else {
  1094. parser.column++
  1095. }
  1096. }
  1097. switch (parser.state) {
  1098. case S.BEGIN:
  1099. parser.state = S.BEGIN_WHITESPACE
  1100. if (c === '\uFEFF') {
  1101. continue
  1102. }
  1103. beginWhiteSpace(parser, c)
  1104. continue
  1105. case S.BEGIN_WHITESPACE:
  1106. beginWhiteSpace(parser, c)
  1107. continue
  1108. case S.TEXT:
  1109. if (parser.sawRoot && !parser.closedRoot) {
  1110. var starti = i - 1
  1111. while (c && c !== '<' && c !== '&') {
  1112. c = charAt(chunk, i++)
  1113. if (c && parser.trackPosition) {
  1114. parser.position++
  1115. if (c === '\n') {
  1116. parser.line++
  1117. parser.column = 0
  1118. } else {
  1119. parser.column++
  1120. }
  1121. }
  1122. }
  1123. parser.textNode += chunk.substring(starti, i - 1)
  1124. }
  1125. if (
  1126. c === '<' &&
  1127. !(parser.sawRoot && parser.closedRoot && !parser.strict)
  1128. ) {
  1129. parser.state = S.OPEN_WAKA
  1130. parser.startTagPosition = parser.position
  1131. } else {
  1132. if (
  1133. !isWhitespace(c) &&
  1134. (!parser.sawRoot || parser.closedRoot)
  1135. ) {
  1136. strictFail(parser, 'Text data outside of root node.')
  1137. }
  1138. if (c === '&') {
  1139. parser.state = S.TEXT_ENTITY
  1140. } else {
  1141. parser.textNode += c
  1142. }
  1143. }
  1144. continue
  1145. case S.SCRIPT:
  1146. // only non-strict
  1147. if (c === '<') {
  1148. parser.state = S.SCRIPT_ENDING
  1149. } else {
  1150. parser.script += c
  1151. }
  1152. continue
  1153. case S.SCRIPT_ENDING:
  1154. if (c === '/') {
  1155. parser.state = S.CLOSE_TAG
  1156. } else {
  1157. parser.script += '<' + c
  1158. parser.state = S.SCRIPT
  1159. }
  1160. continue
  1161. case S.OPEN_WAKA:
  1162. // either a /, ?, !, or text is coming next.
  1163. if (c === '!') {
  1164. parser.state = S.SGML_DECL
  1165. parser.sgmlDecl = ''
  1166. } else if (isWhitespace(c)) {
  1167. // wait for it...
  1168. } else if (isMatch(nameStart, c)) {
  1169. parser.state = S.OPEN_TAG
  1170. parser.tagName = c
  1171. } else if (c === '/') {
  1172. parser.state = S.CLOSE_TAG
  1173. parser.tagName = ''
  1174. } else if (c === '?') {
  1175. parser.state = S.PROC_INST
  1176. parser.procInstName = parser.procInstBody = ''
  1177. } else {
  1178. strictFail(parser, 'Unencoded <')
  1179. // if there was some whitespace, then add that in.
  1180. if (parser.startTagPosition + 1 < parser.position) {
  1181. var pad = parser.position - parser.startTagPosition
  1182. c = new Array(pad).join(' ') + c
  1183. }
  1184. parser.textNode += '<' + c
  1185. parser.state = S.TEXT
  1186. }
  1187. continue
  1188. case S.SGML_DECL:
  1189. if (parser.sgmlDecl + c === '--') {
  1190. parser.state = S.COMMENT
  1191. parser.comment = ''
  1192. parser.sgmlDecl = ''
  1193. continue
  1194. }
  1195. if (
  1196. parser.doctype &&
  1197. parser.doctype !== true &&
  1198. parser.sgmlDecl
  1199. ) {
  1200. parser.state = S.DOCTYPE_DTD
  1201. parser.doctype += '<!' + parser.sgmlDecl + c
  1202. parser.sgmlDecl = ''
  1203. } else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  1204. emitNode(parser, 'onopencdata')
  1205. parser.state = S.CDATA
  1206. parser.sgmlDecl = ''
  1207. parser.cdata = ''
  1208. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1209. parser.state = S.DOCTYPE
  1210. if (parser.doctype || parser.sawRoot) {
  1211. strictFail(
  1212. parser,
  1213. 'Inappropriately located doctype declaration'
  1214. )
  1215. }
  1216. parser.doctype = ''
  1217. parser.sgmlDecl = ''
  1218. } else if (c === '>') {
  1219. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1220. parser.sgmlDecl = ''
  1221. parser.state = S.TEXT
  1222. } else if (isQuote(c)) {
  1223. parser.state = S.SGML_DECL_QUOTED
  1224. parser.sgmlDecl += c
  1225. } else {
  1226. parser.sgmlDecl += c
  1227. }
  1228. continue
  1229. case S.SGML_DECL_QUOTED:
  1230. if (c === parser.q) {
  1231. parser.state = S.SGML_DECL
  1232. parser.q = ''
  1233. }
  1234. parser.sgmlDecl += c
  1235. continue
  1236. case S.DOCTYPE:
  1237. if (c === '>') {
  1238. parser.state = S.TEXT
  1239. emitNode(parser, 'ondoctype', parser.doctype)
  1240. parser.doctype = true // just remember that we saw it.
  1241. } else {
  1242. parser.doctype += c
  1243. if (c === '[') {
  1244. parser.state = S.DOCTYPE_DTD
  1245. } else if (isQuote(c)) {
  1246. parser.state = S.DOCTYPE_QUOTED
  1247. parser.q = c
  1248. }
  1249. }
  1250. continue
  1251. case S.DOCTYPE_QUOTED:
  1252. parser.doctype += c
  1253. if (c === parser.q) {
  1254. parser.q = ''
  1255. parser.state = S.DOCTYPE
  1256. }
  1257. continue
  1258. case S.DOCTYPE_DTD:
  1259. if (c === ']') {
  1260. parser.doctype += c
  1261. parser.state = S.DOCTYPE
  1262. } else if (c === '<') {
  1263. parser.state = S.OPEN_WAKA
  1264. parser.startTagPosition = parser.position
  1265. } else if (isQuote(c)) {
  1266. parser.doctype += c
  1267. parser.state = S.DOCTYPE_DTD_QUOTED
  1268. parser.q = c
  1269. } else {
  1270. parser.doctype += c
  1271. }
  1272. continue
  1273. case S.DOCTYPE_DTD_QUOTED:
  1274. parser.doctype += c
  1275. if (c === parser.q) {
  1276. parser.state = S.DOCTYPE_DTD
  1277. parser.q = ''
  1278. }
  1279. continue
  1280. case S.COMMENT:
  1281. if (c === '-') {
  1282. parser.state = S.COMMENT_ENDING
  1283. } else {
  1284. parser.comment += c
  1285. }
  1286. continue
  1287. case S.COMMENT_ENDING:
  1288. if (c === '-') {
  1289. parser.state = S.COMMENT_ENDED
  1290. parser.comment = textopts(parser.opt, parser.comment)
  1291. if (parser.comment) {
  1292. emitNode(parser, 'oncomment', parser.comment)
  1293. }
  1294. parser.comment = ''
  1295. } else {
  1296. parser.comment += '-' + c
  1297. parser.state = S.COMMENT
  1298. }
  1299. continue
  1300. case S.COMMENT_ENDED:
  1301. if (c !== '>') {
  1302. strictFail(parser, 'Malformed comment')
  1303. // allow <!-- blah -- bloo --> in non-strict mode,
  1304. // which is a comment of " blah -- bloo "
  1305. parser.comment += '--' + c
  1306. parser.state = S.COMMENT
  1307. } else if (parser.doctype && parser.doctype !== true) {
  1308. parser.state = S.DOCTYPE_DTD
  1309. } else {
  1310. parser.state = S.TEXT
  1311. }
  1312. continue
  1313. case S.CDATA:
  1314. var starti = i - 1
  1315. while (c && c !== ']') {
  1316. c = charAt(chunk, i++)
  1317. if (c && parser.trackPosition) {
  1318. parser.position++
  1319. if (c === '\n') {
  1320. parser.line++
  1321. parser.column = 0
  1322. } else {
  1323. parser.column++
  1324. }
  1325. }
  1326. }
  1327. parser.cdata += chunk.substring(starti, i - 1)
  1328. if (c === ']') {
  1329. parser.state = S.CDATA_ENDING
  1330. }
  1331. continue
  1332. case S.CDATA_ENDING:
  1333. if (c === ']') {
  1334. parser.state = S.CDATA_ENDING_2
  1335. } else {
  1336. parser.cdata += ']' + c
  1337. parser.state = S.CDATA
  1338. }
  1339. continue
  1340. case S.CDATA_ENDING_2:
  1341. if (c === '>') {
  1342. if (parser.cdata) {
  1343. emitNode(parser, 'oncdata', parser.cdata)
  1344. }
  1345. emitNode(parser, 'onclosecdata')
  1346. parser.cdata = ''
  1347. parser.state = S.TEXT
  1348. } else if (c === ']') {
  1349. parser.cdata += ']'
  1350. } else {
  1351. parser.cdata += ']]' + c
  1352. parser.state = S.CDATA
  1353. }
  1354. continue
  1355. case S.PROC_INST:
  1356. if (c === '?') {
  1357. parser.state = S.PROC_INST_ENDING
  1358. } else if (isWhitespace(c)) {
  1359. parser.state = S.PROC_INST_BODY
  1360. } else {
  1361. parser.procInstName += c
  1362. }
  1363. continue
  1364. case S.PROC_INST_BODY:
  1365. if (!parser.procInstBody && isWhitespace(c)) {
  1366. continue
  1367. } else if (c === '?') {
  1368. parser.state = S.PROC_INST_ENDING
  1369. } else {
  1370. parser.procInstBody += c
  1371. }
  1372. continue
  1373. case S.PROC_INST_ENDING:
  1374. if (c === '>') {
  1375. const procInstEndData = {
  1376. name: parser.procInstName,
  1377. body: parser.procInstBody,
  1378. }
  1379. validateXmlDeclarationEncoding(parser, procInstEndData)
  1380. emitNode(parser, 'onprocessinginstruction', procInstEndData)
  1381. parser.procInstName = parser.procInstBody = ''
  1382. parser.state = S.TEXT
  1383. } else {
  1384. parser.procInstBody += '?' + c
  1385. parser.state = S.PROC_INST_BODY
  1386. }
  1387. continue
  1388. case S.OPEN_TAG:
  1389. if (isMatch(nameBody, c)) {
  1390. parser.tagName += c
  1391. } else {
  1392. newTag(parser)
  1393. if (c === '>') {
  1394. openTag(parser)
  1395. } else if (c === '/') {
  1396. parser.state = S.OPEN_TAG_SLASH
  1397. } else {
  1398. if (!isWhitespace(c)) {
  1399. strictFail(parser, 'Invalid character in tag name')
  1400. }
  1401. parser.state = S.ATTRIB
  1402. }
  1403. }
  1404. continue
  1405. case S.OPEN_TAG_SLASH:
  1406. if (c === '>') {
  1407. openTag(parser, true)
  1408. closeTag(parser)
  1409. } else {
  1410. strictFail(
  1411. parser,
  1412. 'Forward-slash in opening tag not followed by >'
  1413. )
  1414. parser.state = S.ATTRIB
  1415. }
  1416. continue
  1417. case S.ATTRIB:
  1418. // haven't read the attribute name yet.
  1419. if (isWhitespace(c)) {
  1420. continue
  1421. } else if (c === '>') {
  1422. openTag(parser)
  1423. } else if (c === '/') {
  1424. parser.state = S.OPEN_TAG_SLASH
  1425. } else if (isMatch(nameStart, c)) {
  1426. parser.attribName = c
  1427. parser.attribValue = ''
  1428. parser.state = S.ATTRIB_NAME
  1429. } else {
  1430. strictFail(parser, 'Invalid attribute name')
  1431. }
  1432. continue
  1433. case S.ATTRIB_NAME:
  1434. if (c === '=') {
  1435. parser.state = S.ATTRIB_VALUE
  1436. } else if (c === '>') {
  1437. strictFail(parser, 'Attribute without value')
  1438. parser.attribValue = parser.attribName
  1439. attrib(parser)
  1440. openTag(parser)
  1441. } else if (isWhitespace(c)) {
  1442. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1443. } else if (isMatch(nameBody, c)) {
  1444. parser.attribName += c
  1445. } else {
  1446. strictFail(parser, 'Invalid attribute name')
  1447. }
  1448. continue
  1449. case S.ATTRIB_NAME_SAW_WHITE:
  1450. if (c === '=') {
  1451. parser.state = S.ATTRIB_VALUE
  1452. } else if (isWhitespace(c)) {
  1453. continue
  1454. } else {
  1455. strictFail(parser, 'Attribute without value')
  1456. parser.tag.attributes[parser.attribName] = ''
  1457. parser.attribValue = ''
  1458. emitNode(parser, 'onattribute', {
  1459. name: parser.attribName,
  1460. value: '',
  1461. })
  1462. parser.attribName = ''
  1463. if (c === '>') {
  1464. openTag(parser)
  1465. } else if (isMatch(nameStart, c)) {
  1466. parser.attribName = c
  1467. parser.state = S.ATTRIB_NAME
  1468. } else {
  1469. strictFail(parser, 'Invalid attribute name')
  1470. parser.state = S.ATTRIB
  1471. }
  1472. }
  1473. continue
  1474. case S.ATTRIB_VALUE:
  1475. if (isWhitespace(c)) {
  1476. continue
  1477. } else if (isQuote(c)) {
  1478. parser.q = c
  1479. parser.state = S.ATTRIB_VALUE_QUOTED
  1480. } else {
  1481. if (!parser.opt.unquotedAttributeValues) {
  1482. error(parser, 'Unquoted attribute value')
  1483. }
  1484. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1485. parser.attribValue = c
  1486. }
  1487. continue
  1488. case S.ATTRIB_VALUE_QUOTED:
  1489. if (c !== parser.q) {
  1490. if (c === '&') {
  1491. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1492. } else {
  1493. parser.attribValue += c
  1494. }
  1495. continue
  1496. }
  1497. attrib(parser)
  1498. parser.q = ''
  1499. parser.state = S.ATTRIB_VALUE_CLOSED
  1500. continue
  1501. case S.ATTRIB_VALUE_CLOSED:
  1502. if (isWhitespace(c)) {
  1503. parser.state = S.ATTRIB
  1504. } else if (c === '>') {
  1505. openTag(parser)
  1506. } else if (c === '/') {
  1507. parser.state = S.OPEN_TAG_SLASH
  1508. } else if (isMatch(nameStart, c)) {
  1509. strictFail(parser, 'No whitespace between attributes')
  1510. parser.attribName = c
  1511. parser.attribValue = ''
  1512. parser.state = S.ATTRIB_NAME
  1513. } else {
  1514. strictFail(parser, 'Invalid attribute name')
  1515. }
  1516. continue
  1517. case S.ATTRIB_VALUE_UNQUOTED:
  1518. if (!isAttribEnd(c)) {
  1519. if (c === '&') {
  1520. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1521. } else {
  1522. parser.attribValue += c
  1523. }
  1524. continue
  1525. }
  1526. attrib(parser)
  1527. if (c === '>') {
  1528. openTag(parser)
  1529. } else {
  1530. parser.state = S.ATTRIB
  1531. }
  1532. continue
  1533. case S.CLOSE_TAG:
  1534. if (!parser.tagName) {
  1535. if (isWhitespace(c)) {
  1536. continue
  1537. } else if (notMatch(nameStart, c)) {
  1538. if (parser.script) {
  1539. parser.script += '</' + c
  1540. parser.state = S.SCRIPT
  1541. } else {
  1542. strictFail(parser, 'Invalid tagname in closing tag.')
  1543. }
  1544. } else {
  1545. parser.tagName = c
  1546. }
  1547. } else if (c === '>') {
  1548. closeTag(parser)
  1549. } else if (isMatch(nameBody, c)) {
  1550. parser.tagName += c
  1551. } else if (parser.script) {
  1552. parser.script += '</' + parser.tagName + c
  1553. parser.tagName = ''
  1554. parser.state = S.SCRIPT
  1555. } else {
  1556. if (!isWhitespace(c)) {
  1557. strictFail(parser, 'Invalid tagname in closing tag')
  1558. }
  1559. parser.state = S.CLOSE_TAG_SAW_WHITE
  1560. }
  1561. continue
  1562. case S.CLOSE_TAG_SAW_WHITE:
  1563. if (isWhitespace(c)) {
  1564. continue
  1565. }
  1566. if (c === '>') {
  1567. closeTag(parser)
  1568. } else {
  1569. strictFail(parser, 'Invalid characters in closing tag')
  1570. }
  1571. continue
  1572. case S.TEXT_ENTITY:
  1573. case S.ATTRIB_VALUE_ENTITY_Q:
  1574. case S.ATTRIB_VALUE_ENTITY_U:
  1575. var returnState
  1576. var buffer
  1577. switch (parser.state) {
  1578. case S.TEXT_ENTITY:
  1579. returnState = S.TEXT
  1580. buffer = 'textNode'
  1581. break
  1582. case S.ATTRIB_VALUE_ENTITY_Q:
  1583. returnState = S.ATTRIB_VALUE_QUOTED
  1584. buffer = 'attribValue'
  1585. break
  1586. case S.ATTRIB_VALUE_ENTITY_U:
  1587. returnState = S.ATTRIB_VALUE_UNQUOTED
  1588. buffer = 'attribValue'
  1589. break
  1590. }
  1591. if (c === ';') {
  1592. var parsedEntity = parseEntity(parser)
  1593. if (
  1594. parser.opt.unparsedEntities &&
  1595. !Object.values(sax.XML_ENTITIES).includes(parsedEntity)
  1596. ) {
  1597. if ((parser.entityCount += 1) > parser.opt.maxEntityCount) {
  1598. error(
  1599. parser,
  1600. 'Parsed entity count exceeds max entity count'
  1601. )
  1602. }
  1603. if ((parser.entityDepth += 1) > parser.opt.maxEntityDepth) {
  1604. error(
  1605. parser,
  1606. 'Parsed entity depth exceeds max entity depth'
  1607. )
  1608. }
  1609. parser.entity = ''
  1610. parser.state = returnState
  1611. parser.write(parsedEntity)
  1612. parser.entityDepth -= 1
  1613. } else {
  1614. parser[buffer] += parsedEntity
  1615. parser.entity = ''
  1616. parser.state = returnState
  1617. }
  1618. } else if (
  1619. isMatch(parser.entity.length ? entityBody : entityStart, c)
  1620. ) {
  1621. parser.entity += c
  1622. } else {
  1623. strictFail(parser, 'Invalid character in entity name')
  1624. parser[buffer] += '&' + parser.entity + c
  1625. parser.entity = ''
  1626. parser.state = returnState
  1627. }
  1628. continue
  1629. default: /* istanbul ignore next */ {
  1630. throw new Error(parser, 'Unknown state: ' + parser.state)
  1631. }
  1632. }
  1633. } // while
  1634. if (parser.position >= parser.bufferCheckPosition) {
  1635. checkBufferLength(parser)
  1636. }
  1637. return parser
  1638. }
  1639. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1640. /* istanbul ignore next */
  1641. if (!String.fromCodePoint) {
  1642. ;(function () {
  1643. var stringFromCharCode = String.fromCharCode
  1644. var floor = Math.floor
  1645. var fromCodePoint = function () {
  1646. var MAX_SIZE = 0x4000
  1647. var codeUnits = []
  1648. var highSurrogate
  1649. var lowSurrogate
  1650. var index = -1
  1651. var length = arguments.length
  1652. if (!length) {
  1653. return ''
  1654. }
  1655. var result = ''
  1656. while (++index < length) {
  1657. var codePoint = Number(arguments[index])
  1658. if (
  1659. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1660. codePoint < 0 || // not a valid Unicode code point
  1661. codePoint > 0x10ffff || // not a valid Unicode code point
  1662. floor(codePoint) !== codePoint // not an integer
  1663. ) {
  1664. throw RangeError('Invalid code point: ' + codePoint)
  1665. }
  1666. if (codePoint <= 0xffff) {
  1667. // BMP code point
  1668. codeUnits.push(codePoint)
  1669. } else {
  1670. // Astral code point; split in surrogate halves
  1671. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1672. codePoint -= 0x10000
  1673. highSurrogate = (codePoint >> 10) + 0xd800
  1674. lowSurrogate = (codePoint % 0x400) + 0xdc00
  1675. codeUnits.push(highSurrogate, lowSurrogate)
  1676. }
  1677. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1678. result += stringFromCharCode.apply(null, codeUnits)
  1679. codeUnits.length = 0
  1680. }
  1681. }
  1682. return result
  1683. }
  1684. /* istanbul ignore next */
  1685. if (Object.defineProperty) {
  1686. Object.defineProperty(String, 'fromCodePoint', {
  1687. value: fromCodePoint,
  1688. configurable: true,
  1689. writable: true,
  1690. })
  1691. } else {
  1692. String.fromCodePoint = fromCodePoint
  1693. }
  1694. })()
  1695. }
  1696. })(typeof exports === 'undefined' ? (this.sax = {}) : exports)