htmlparser.js 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. var startTag = /^<([-A-Za-z0-9_]+)((?:\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/, endTag = /^<\/([-A-Za-z0-9_]+)[^>]*>/, attr = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,link,meta,param,embed,command,keygen,source,track,wbr"), block = makeMap("a,address,code,article,applet,aside,audio,blockquote,button,canvas,center,dd,del,dir,div,dl,dt,fieldset,figcaption,figure,footer,form,frameset,h1,h2,h3,h4,h5,h6,header,hgroup,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,output,p,pre,section,script,table,tbody,td,tfoot,th,thead,tr,ul,video"), inline = makeMap("abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"), closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"), fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"), special = makeMap("wxxxcode-style,script,style,view,scroll-view,block");
  2. function HTMLParser(e, n) {
  3. var t, a, r, i = [], s = e;
  4. for (i.last = function() {
  5. return this[this.length - 1];
  6. }; e; ) {
  7. if (a = !0, i.last() && special[i.last()]) e = e.replace(new RegExp("([\\s\\S]*?)</" + i.last() + "[^>]*>"), function(e, t) {
  8. return t = t.replace(/<!--([\s\S]*?)-->|<!\[CDATA\[([\s\S]*?)]]>/g, "$1$2"), n.chars && n.chars(t),
  9. "";
  10. }), c("", i.last()); else if (0 == e.indexOf("\x3c!--") ? 0 <= (t = e.indexOf("--\x3e")) && (n.comment && n.comment(e.substring(4, t)),
  11. e = e.substring(t + 3), a = !1) : 0 == e.indexOf("</") ? (r = e.match(endTag)) && (e = e.substring(r[0].length),
  12. r[0].replace(endTag, c), a = !1) : 0 == e.indexOf("<") && (r = e.match(startTag)) && (e = e.substring(r[0].length),
  13. r[0].replace(startTag, o), a = !1), a) {
  14. t = e.indexOf("<");
  15. for (var l = ""; 0 === t; ) l += "<", t = (e = e.substring(1)).indexOf("<");
  16. l += t < 0 ? e : e.substring(0, t), e = t < 0 ? "" : e.substring(t), n.chars && n.chars(l);
  17. }
  18. if (e == s) throw "Parse Error: " + e;
  19. s = e;
  20. }
  21. function o(e, t, a, r) {
  22. if (t = t.toLowerCase(), block[t]) for (;i.last() && inline[i.last()]; ) c("", i.last());
  23. if (closeSelf[t] && i.last() == t && c("", t), (r = empty[t] || !!r) || i.push(t),
  24. n.start) {
  25. var s = [];
  26. a.replace(attr, function(e, t) {
  27. var a = arguments[2] ? arguments[2] : arguments[3] ? arguments[3] : arguments[4] ? arguments[4] : fillAttrs[t] ? t : "";
  28. s.push({
  29. name: t,
  30. value: a,
  31. escaped: a.replace(/(^|[^\\])"/g, '$1\\"')
  32. });
  33. }), n.start && n.start(t, s, r);
  34. }
  35. }
  36. function c(e, t) {
  37. if (t) {
  38. t = t.toLowerCase();
  39. for (a = i.length - 1; 0 <= a && i[a] != t; a--) ;
  40. } else var a = 0;
  41. if (0 <= a) {
  42. for (var r = i.length - 1; a <= r; r--) n.end && n.end(i[r]);
  43. i.length = a;
  44. }
  45. }
  46. c();
  47. }
  48. function makeMap(e) {
  49. for (var t = {}, a = e.split(","), r = 0; r < a.length; r++) t[a[r]] = !0;
  50. return t;
  51. }
  52. module.exports = HTMLParser;