HtmlToMarkdown.js 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. /**
  2. * https://unpkg.com/turndown@5.0.3/dist/turndown.js
  3. * https://github.com/domchristie/turndown
  4. * @type {TurndownService}
  5. */
  6. var TurndownService = (function () {
  7. 'use strict';
  8. function extend (destination) {
  9. for (var i = 1; i < arguments.length; i++) {
  10. var source = arguments[i];
  11. for (var key in source) {
  12. if (source.hasOwnProperty(key)) destination[key] = source[key];
  13. }
  14. }
  15. return destination
  16. }
  17. function repeat (character, count) {
  18. return Array(count + 1).join(character)
  19. }
  20. var blockElements = [
  21. 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
  22. 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
  23. 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  24. 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
  25. 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
  26. 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
  27. ];
  28. function isBlock (node) {
  29. return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
  30. }
  31. var voidElements = [
  32. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
  33. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  34. ];
  35. function isVoid (node) {
  36. return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
  37. }
  38. var voidSelector = voidElements.join();
  39. function hasVoid (node) {
  40. return node.querySelector && node.querySelector(voidSelector)
  41. }
  42. var rules = {};
  43. rules.paragraph = {
  44. filter: 'p',
  45. replacement: function (content) {
  46. return '\n\n' + content + '\n\n'
  47. }
  48. };
  49. rules.lineBreak = {
  50. filter: 'br',
  51. replacement: function (content, node, options) {
  52. return options.br + '\n'
  53. }
  54. };
  55. rules.heading = {
  56. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  57. replacement: function (content, node, options) {
  58. var hLevel = Number(node.nodeName.charAt(1));
  59. if (options.headingStyle === 'setext' && hLevel < 3) {
  60. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  61. return (
  62. '\n\n' + content + '\n' + underline + '\n\n'
  63. )
  64. } else {
  65. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  66. }
  67. }
  68. };
  69. rules.blockquote = {
  70. filter: 'blockquote',
  71. replacement: function (content) {
  72. content = content.replace(/^\n+|\n+$/g, '');
  73. content = content.replace(/^/gm, '> ');
  74. return '\n\n' + content + '\n\n'
  75. }
  76. };
  77. rules.list = {
  78. filter: ['ul', 'ol'],
  79. replacement: function (content, node) {
  80. var parent = node.parentNode;
  81. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  82. return '\n' + content
  83. } else {
  84. return '\n\n' + content + '\n\n'
  85. }
  86. }
  87. };
  88. rules.listItem = {
  89. filter: 'li',
  90. replacement: function (content, node, options) {
  91. content = content
  92. .replace(/^\n+/, '') // remove leading newlines
  93. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  94. .replace(/\n/gm, '\n '); // indent
  95. var prefix = options.bulletListMarker + ' ';
  96. var parent = node.parentNode;
  97. if (parent.nodeName === 'OL') {
  98. var start = parent.getAttribute('start');
  99. var index = Array.prototype.indexOf.call(parent.children, node);
  100. prefix = (start ? Number(start) + index : index + 1) + '. ';
  101. }
  102. return (
  103. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  104. )
  105. }
  106. };
  107. rules.indentedCodeBlock = {
  108. filter: function (node, options) {
  109. return (
  110. options.codeBlockStyle === 'indented' &&
  111. node.nodeName === 'PRE' &&
  112. node.firstChild &&
  113. node.firstChild.nodeName === 'CODE'
  114. )
  115. },
  116. replacement: function (content, node, options) {
  117. return (
  118. '\n\n ' +
  119. node.firstChild.textContent.replace(/\n/g, '\n ') +
  120. '\n\n'
  121. )
  122. }
  123. };
  124. rules.fencedCodeBlock = {
  125. filter: function (node, options) {
  126. return (
  127. options.codeBlockStyle === 'fenced' &&
  128. node.nodeName === 'PRE' &&
  129. node.firstChild &&
  130. node.firstChild.nodeName === 'CODE'
  131. )
  132. },
  133. replacement: function (content, node, options) {
  134. var className = node.firstChild.className || '';
  135. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  136. return (
  137. '\n\n' + options.fence + language + '\n' +
  138. node.firstChild.textContent +
  139. '\n' + options.fence + '\n\n'
  140. )
  141. }
  142. };
  143. rules.horizontalRule = {
  144. filter: 'hr',
  145. replacement: function (content, node, options) {
  146. return '\n\n' + options.hr + '\n\n'
  147. }
  148. };
  149. rules.inlineLink = {
  150. filter: function (node, options) {
  151. return (
  152. options.linkStyle === 'inlined' &&
  153. node.nodeName === 'A' &&
  154. node.getAttribute('href')
  155. )
  156. },
  157. replacement: function (content, node) {
  158. var href = node.getAttribute('href');
  159. var title = node.title ? ' "' + node.title + '"' : '';
  160. return '[' + content + '](' + href + title + ')'
  161. }
  162. };
  163. rules.referenceLink = {
  164. filter: function (node, options) {
  165. return (
  166. options.linkStyle === 'referenced' &&
  167. node.nodeName === 'A' &&
  168. node.getAttribute('href')
  169. )
  170. },
  171. replacement: function (content, node, options) {
  172. var href = node.getAttribute('href');
  173. var title = node.title ? ' "' + node.title + '"' : '';
  174. var replacement;
  175. var reference;
  176. switch (options.linkReferenceStyle) {
  177. case 'collapsed':
  178. replacement = '[' + content + '][]';
  179. reference = '[' + content + ']: ' + href + title;
  180. break
  181. case 'shortcut':
  182. replacement = '[' + content + ']';
  183. reference = '[' + content + ']: ' + href + title;
  184. break
  185. default:
  186. var id = this.references.length + 1;
  187. replacement = '[' + content + '][' + id + ']';
  188. reference = '[' + id + ']: ' + href + title;
  189. }
  190. this.references.push(reference);
  191. return replacement
  192. },
  193. references: [],
  194. append: function (options) {
  195. var references = '';
  196. if (this.references.length) {
  197. references = '\n\n' + this.references.join('\n') + '\n\n';
  198. this.references = []; // Reset references
  199. }
  200. return references
  201. }
  202. };
  203. rules.emphasis = {
  204. filter: ['em', 'i'],
  205. replacement: function (content, node, options) {
  206. if (!content.trim()) return ''
  207. return options.emDelimiter + content + options.emDelimiter
  208. }
  209. };
  210. rules.strong = {
  211. filter: ['strong', 'b'],
  212. replacement: function (content, node, options) {
  213. if (!content.trim()) return ''
  214. return options.strongDelimiter + content + options.strongDelimiter
  215. }
  216. };
  217. rules.code = {
  218. filter: function (node) {
  219. var hasSiblings = node.previousSibling || node.nextSibling;
  220. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  221. return node.nodeName === 'CODE' && !isCodeBlock
  222. },
  223. replacement: function (content) {
  224. if (!content.trim()) return ''
  225. var delimiter = '`';
  226. var leadingSpace = '';
  227. var trailingSpace = '';
  228. var matches = content.match(/`+/gm);
  229. if (matches) {
  230. if (/^`/.test(content)) leadingSpace = ' ';
  231. if (/`$/.test(content)) trailingSpace = ' ';
  232. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  233. }
  234. return delimiter + leadingSpace + content + trailingSpace + delimiter
  235. }
  236. };
  237. rules.image = {
  238. filter: 'img',
  239. replacement: function (content, node) {
  240. var alt = node.alt || '';
  241. var src = node.getAttribute('src') || '';
  242. var title = node.title || '';
  243. var titlePart = title ? ' "' + title + '"' : '';
  244. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  245. }
  246. };
  247. /**
  248. * Manages a collection of rules used to convert HTML to Markdown
  249. */
  250. function Rules (options) {
  251. this.options = options;
  252. this._keep = [];
  253. this._remove = [];
  254. this.blankRule = {
  255. replacement: options.blankReplacement
  256. };
  257. this.keepReplacement = options.keepReplacement;
  258. this.defaultRule = {
  259. replacement: options.defaultReplacement
  260. };
  261. this.array = [];
  262. for (var key in options.rules) this.array.push(options.rules[key]);
  263. }
  264. Rules.prototype = {
  265. add: function (key, rule) {
  266. this.array.unshift(rule);
  267. },
  268. keep: function (filter) {
  269. this._keep.unshift({
  270. filter: filter,
  271. replacement: this.keepReplacement
  272. });
  273. },
  274. remove: function (filter) {
  275. this._remove.unshift({
  276. filter: filter,
  277. replacement: function () {
  278. return ''
  279. }
  280. });
  281. },
  282. forNode: function (node) {
  283. if (node.isBlank) return this.blankRule
  284. var rule;
  285. if ((rule = findRule(this.array, node, this.options))) return rule
  286. if ((rule = findRule(this._keep, node, this.options))) return rule
  287. if ((rule = findRule(this._remove, node, this.options))) return rule
  288. return this.defaultRule
  289. },
  290. forEach: function (fn) {
  291. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  292. }
  293. };
  294. function findRule (rules, node, options) {
  295. for (var i = 0; i < rules.length; i++) {
  296. var rule = rules[i];
  297. if (filterValue(rule, node, options)) return rule
  298. }
  299. return void 0
  300. }
  301. function filterValue (rule, node, options) {
  302. var filter = rule.filter;
  303. if (typeof filter === 'string') {
  304. if (filter === node.nodeName.toLowerCase()) return true
  305. } else if (Array.isArray(filter)) {
  306. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  307. } else if (typeof filter === 'function') {
  308. if (filter.call(rule, node, options)) return true
  309. } else {
  310. throw new TypeError('`filter` needs to be a string, array, or function')
  311. }
  312. }
  313. /**
  314. * The collapseWhitespace function is adapted from collapse-whitespace
  315. * by Luc Thevenard.
  316. *
  317. * The MIT License (MIT)
  318. *
  319. * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
  320. *
  321. * Permission is hereby granted, free of charge, to any person obtaining a copy
  322. * of this software and associated documentation files (the "Software"), to deal
  323. * in the Software without restriction, including without limitation the rights
  324. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  325. * copies of the Software, and to permit persons to whom the Software is
  326. * furnished to do so, subject to the following conditions:
  327. *
  328. * The above copyright notice and this permission notice shall be included in
  329. * all copies or substantial portions of the Software.
  330. *
  331. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  332. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  333. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  334. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  335. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  336. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  337. * THE SOFTWARE.
  338. */
  339. /**
  340. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  341. *
  342. * @param {Object} options
  343. */
  344. function collapseWhitespace (options) {
  345. var element = options.element;
  346. var isBlock = options.isBlock;
  347. var isVoid = options.isVoid;
  348. var isPre = options.isPre || function (node) {
  349. return node.nodeName === 'PRE'
  350. };
  351. if (!element.firstChild || isPre(element)) return
  352. var prevText = null;
  353. var prevVoid = false;
  354. var prev = null;
  355. var node = next(prev, element, isPre);
  356. while (node !== element) {
  357. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  358. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  359. if ((!prevText || / $/.test(prevText.data)) &&
  360. !prevVoid && text[0] === ' ') {
  361. text = text.substr(1);
  362. }
  363. // `text` might be empty at this point.
  364. if (!text) {
  365. node = remove(node);
  366. continue
  367. }
  368. node.data = text;
  369. prevText = node;
  370. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  371. if (isBlock(node) || node.nodeName === 'BR') {
  372. if (prevText) {
  373. prevText.data = prevText.data.replace(/ $/, '');
  374. }
  375. prevText = null;
  376. prevVoid = false;
  377. } else if (isVoid(node)) {
  378. // Avoid trimming space around non-block, non-BR void elements.
  379. prevText = null;
  380. prevVoid = true;
  381. }
  382. } else {
  383. node = remove(node);
  384. continue
  385. }
  386. var nextNode = next(prev, node, isPre);
  387. prev = node;
  388. node = nextNode;
  389. }
  390. if (prevText) {
  391. prevText.data = prevText.data.replace(/ $/, '');
  392. if (!prevText.data) {
  393. remove(prevText);
  394. }
  395. }
  396. }
  397. /**
  398. * remove(node) removes the given node from the DOM and returns the
  399. * next node in the sequence.
  400. *
  401. * @param {Node} node
  402. * @return {Node} node
  403. */
  404. function remove (node) {
  405. var next = node.nextSibling || node.parentNode;
  406. node.parentNode.removeChild(node);
  407. return next
  408. }
  409. /**
  410. * next(prev, current, isPre) returns the next node in the sequence, given the
  411. * current and previous nodes.
  412. *
  413. * @param {Node} prev
  414. * @param {Node} current
  415. * @param {Function} isPre
  416. * @return {Node}
  417. */
  418. function next (prev, current, isPre) {
  419. if ((prev && prev.parentNode === current) || isPre(current)) {
  420. return current.nextSibling || current.parentNode
  421. }
  422. return current.firstChild || current.nextSibling || current.parentNode
  423. }
  424. /*
  425. * Set up window for Node.js
  426. */
  427. var root = (typeof window !== 'undefined' ? window : {});
  428. /*
  429. * Parsing HTML strings
  430. */
  431. function canParseHTMLNatively () {
  432. var Parser = root.DOMParser;
  433. var canParse = false;
  434. // Adapted from https://gist.github.com/1129031
  435. // Firefox/Opera/IE throw errors on unsupported types
  436. try {
  437. // WebKit returns null on unsupported types
  438. if (new Parser().parseFromString('', 'text/html')) {
  439. canParse = true;
  440. }
  441. } catch (e) {}
  442. return canParse
  443. }
  444. function createHTMLParser () {
  445. var Parser = function () {};
  446. {
  447. if (shouldUseActiveX()) {
  448. Parser.prototype.parseFromString = function (string) {
  449. var doc = new window.ActiveXObject('htmlfile');
  450. doc.designMode = 'on'; // disable on-page scripts
  451. doc.open();
  452. doc.write(string);
  453. doc.close();
  454. return doc
  455. };
  456. } else {
  457. Parser.prototype.parseFromString = function (string) {
  458. var doc = document.implementation.createHTMLDocument('');
  459. doc.open();
  460. doc.write(string);
  461. doc.close();
  462. return doc
  463. };
  464. }
  465. }
  466. return Parser
  467. }
  468. function shouldUseActiveX () {
  469. var useActiveX = false;
  470. try {
  471. document.implementation.createHTMLDocument('').open();
  472. } catch (e) {
  473. if (window.ActiveXObject) useActiveX = true;
  474. }
  475. return useActiveX
  476. }
  477. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  478. function RootNode (input) {
  479. var root;
  480. if (typeof input === 'string') {
  481. var doc = htmlParser().parseFromString(
  482. // DOM parsers arrange elements in the <head> and <body>.
  483. // Wrapping in a custom element ensures elements are reliably arranged in
  484. // a single element.
  485. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  486. 'text/html'
  487. );
  488. root = doc.getElementById('turndown-root');
  489. } else {
  490. root = input.cloneNode(true);
  491. }
  492. collapseWhitespace({
  493. element: root,
  494. isBlock: isBlock,
  495. isVoid: isVoid
  496. });
  497. return root
  498. }
  499. var _htmlParser;
  500. function htmlParser () {
  501. _htmlParser = _htmlParser || new HTMLParser();
  502. return _htmlParser
  503. }
  504. function Node (node) {
  505. node.isBlock = isBlock(node);
  506. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  507. node.isBlank = isBlank(node);
  508. node.flankingWhitespace = flankingWhitespace(node);
  509. return node
  510. }
  511. function isBlank (node) {
  512. return (
  513. ['A', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
  514. /^\s*$/i.test(node.textContent) &&
  515. !isVoid(node) &&
  516. !hasVoid(node)
  517. )
  518. }
  519. function flankingWhitespace (node) {
  520. var leading = '';
  521. var trailing = '';
  522. if (!node.isBlock) {
  523. var hasLeading = /^[ \r\n\t]/.test(node.textContent);
  524. var hasTrailing = /[ \r\n\t]$/.test(node.textContent);
  525. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  526. leading = ' ';
  527. }
  528. if (hasTrailing && !isFlankedByWhitespace('right', node)) {
  529. trailing = ' ';
  530. }
  531. }
  532. return { leading: leading, trailing: trailing }
  533. }
  534. function isFlankedByWhitespace (side, node) {
  535. var sibling;
  536. var regExp;
  537. var isFlanked;
  538. if (side === 'left') {
  539. sibling = node.previousSibling;
  540. regExp = / $/;
  541. } else {
  542. sibling = node.nextSibling;
  543. regExp = /^ /;
  544. }
  545. if (sibling) {
  546. if (sibling.nodeType === 3) {
  547. isFlanked = regExp.test(sibling.nodeValue);
  548. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  549. isFlanked = regExp.test(sibling.textContent);
  550. }
  551. }
  552. return isFlanked
  553. }
  554. var reduce = Array.prototype.reduce;
  555. var leadingNewLinesRegExp = /^\n*/;
  556. var trailingNewLinesRegExp = /\n*$/;
  557. var escapes = [
  558. [/\\/g, '\\\\'],
  559. [/\*/g, '\\*'],
  560. [/^-/g, '\\-'],
  561. [/^\+ /g, '\\+ '],
  562. [/^(=+)/g, '\\$1'],
  563. [/^(#{1,6}) /g, '\\$1 '],
  564. [/`/g, '\\`'],
  565. [/^~~~/g, '\\~~~'],
  566. [/\[/g, '\\['],
  567. [/\]/g, '\\]'],
  568. [/^>/g, '\\>'],
  569. [/_/g, '\\_'],
  570. [/^(\d+)\. /g, '$1\\. ']
  571. ];
  572. function TurndownService (options) {
  573. if (!(this instanceof TurndownService)) return new TurndownService(options)
  574. var defaults = {
  575. rules: rules,
  576. headingStyle: 'setext',
  577. hr: '* * *',
  578. bulletListMarker: '*',
  579. codeBlockStyle: 'indented',
  580. fence: '```',
  581. emDelimiter: '_',
  582. strongDelimiter: '**',
  583. linkStyle: 'inlined',
  584. linkReferenceStyle: 'full',
  585. br: ' ',
  586. blankReplacement: function (content, node) {
  587. return node.isBlock ? '\n\n' : ''
  588. },
  589. keepReplacement: function (content, node) {
  590. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  591. },
  592. defaultReplacement: function (content, node) {
  593. return node.isBlock ? '\n\n' + content + '\n\n' : content
  594. }
  595. };
  596. this.options = extend({}, defaults, options);
  597. this.rules = new Rules(this.options);
  598. }
  599. TurndownService.prototype = {
  600. /**
  601. * The entry point for converting a string or DOM node to Markdown
  602. * @public
  603. * @param {String|HTMLElement} input The string or DOM node to convert
  604. * @returns A Markdown representation of the input
  605. * @type String
  606. */
  607. turndown: function (input) {
  608. if (!canConvert(input)) {
  609. throw new TypeError(
  610. input + ' is not a string, or an element/document/fragment node.'
  611. )
  612. }
  613. if (input === '') return ''
  614. var output = process.call(this, new RootNode(input));
  615. return postProcess.call(this, output)
  616. },
  617. /**
  618. * Add one or more plugins
  619. * @public
  620. * @param {Function|Array} plugin The plugin or array of plugins to add
  621. * @returns The Turndown instance for chaining
  622. * @type Object
  623. */
  624. use: function (plugin) {
  625. if (Array.isArray(plugin)) {
  626. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  627. } else if (typeof plugin === 'function') {
  628. plugin(this);
  629. } else {
  630. throw new TypeError('plugin must be a Function or an Array of Functions')
  631. }
  632. return this
  633. },
  634. /**
  635. * Adds a rule
  636. * @public
  637. * @param {String} key The unique key of the rule
  638. * @param {Object} rule The rule
  639. * @returns The Turndown instance for chaining
  640. * @type Object
  641. */
  642. addRule: function (key, rule) {
  643. this.rules.add(key, rule);
  644. return this
  645. },
  646. /**
  647. * Keep a node (as HTML) that matches the filter
  648. * @public
  649. * @param {String|Array|Function} filter The unique key of the rule
  650. * @returns The Turndown instance for chaining
  651. * @type Object
  652. */
  653. keep: function (filter) {
  654. this.rules.keep(filter);
  655. return this
  656. },
  657. /**
  658. * Remove a node that matches the filter
  659. * @public
  660. * @param {String|Array|Function} filter The unique key of the rule
  661. * @returns The Turndown instance for chaining
  662. * @type Object
  663. */
  664. remove: function (filter) {
  665. this.rules.remove(filter);
  666. return this
  667. },
  668. /**
  669. * Escapes Markdown syntax
  670. * @public
  671. * @param {String} string The string to escape
  672. * @returns A string with Markdown syntax escaped
  673. * @type String
  674. */
  675. escape: function (string) {
  676. return escapes.reduce(function (accumulator, escape) {
  677. return accumulator.replace(escape[0], escape[1])
  678. }, string)
  679. }
  680. };
  681. /**
  682. * Reduces a DOM node down to its Markdown string equivalent
  683. * @private
  684. * @param {HTMLElement} parentNode The node to convert
  685. * @returns A Markdown representation of the node
  686. * @type String
  687. */
  688. function process (parentNode) {
  689. var self = this;
  690. return reduce.call(parentNode.childNodes, function (output, node) {
  691. node = new Node(node);
  692. var replacement = '';
  693. if (node.nodeType === 3) {
  694. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  695. } else if (node.nodeType === 1) {
  696. replacement = replacementForNode.call(self, node);
  697. }
  698. return join(output, replacement)
  699. }, '')
  700. }
  701. /**
  702. * Appends strings as each rule requires and trims the output
  703. * @private
  704. * @param {String} output The conversion output
  705. * @returns A trimmed version of the ouput
  706. * @type String
  707. */
  708. function postProcess (output) {
  709. var self = this;
  710. this.rules.forEach(function (rule) {
  711. if (typeof rule.append === 'function') {
  712. output = join(output, rule.append(self.options));
  713. }
  714. });
  715. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  716. }
  717. /**
  718. * Converts an element node to its Markdown equivalent
  719. * @private
  720. * @param {HTMLElement} node The node to convert
  721. * @returns A Markdown representation of the node
  722. * @type String
  723. */
  724. function replacementForNode (node) {
  725. var rule = this.rules.forNode(node);
  726. var content = process.call(this, node);
  727. var whitespace = node.flankingWhitespace;
  728. if (whitespace.leading || whitespace.trailing) content = content.trim();
  729. return (
  730. whitespace.leading +
  731. rule.replacement(content, node, this.options) +
  732. whitespace.trailing
  733. )
  734. }
  735. /**
  736. * Determines the new lines between the current output and the replacement
  737. * @private
  738. * @param {String} output The current conversion output
  739. * @param {String} replacement The string to append to the output
  740. * @returns The whitespace to separate the current output and the replacement
  741. * @type String
  742. */
  743. function separatingNewlines (output, replacement) {
  744. var newlines = [
  745. output.match(trailingNewLinesRegExp)[0],
  746. replacement.match(leadingNewLinesRegExp)[0]
  747. ].sort();
  748. var maxNewlines = newlines[newlines.length - 1];
  749. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  750. }
  751. function join (string1, string2) {
  752. var separator = separatingNewlines(string1, string2);
  753. // Remove trailing/leading newlines and replace with separator
  754. string1 = string1.replace(trailingNewLinesRegExp, '');
  755. string2 = string2.replace(leadingNewLinesRegExp, '');
  756. return string1 + separator + string2
  757. }
  758. /**
  759. * Determines whether an input can be converted
  760. * @private
  761. * @param {String|HTMLElement} input Describe this parameter
  762. * @returns Describe what it returns
  763. * @type String|Object|Array|Boolean|Number
  764. */
  765. function canConvert (input) {
  766. return (
  767. input != null && (
  768. typeof input === 'string' ||
  769. (input.nodeType && (
  770. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  771. ))
  772. )
  773. )
  774. }
  775. return TurndownService;
  776. }());