mirror of
				https://github.com/zadam/trilium.git
				synced 2025-11-03 20:06:08 +01:00 
			
		
		
		
	fixes and library updates
This commit is contained in:
		@@ -1,5 +1,3 @@
 | 
			
		||||
// https://github.com/mozilla/readability/tree/814f0a3884350b6f1adfdebb79ca3599e9806605
 | 
			
		||||
 | 
			
		||||
/*eslint-env es6:false*/
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (c) 2010 Arc90 Inc
 | 
			
		||||
@@ -38,6 +36,7 @@ function Readability(doc, options) {
 | 
			
		||||
  options = options || {};
 | 
			
		||||
 | 
			
		||||
  this._doc = doc;
 | 
			
		||||
  this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__;
 | 
			
		||||
  this._articleTitle = null;
 | 
			
		||||
  this._articleByline = null;
 | 
			
		||||
  this._articleDir = null;
 | 
			
		||||
@@ -50,11 +49,12 @@ function Readability(doc, options) {
 | 
			
		||||
  this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
 | 
			
		||||
  this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD;
 | 
			
		||||
  this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []);
 | 
			
		||||
  this._keepClasses = !!options.keepClasses;
 | 
			
		||||
 | 
			
		||||
  // Start with all flags set
 | 
			
		||||
  this._flags = this.FLAG_STRIP_UNLIKELYS |
 | 
			
		||||
                this.FLAG_WEIGHT_CLASSES |
 | 
			
		||||
                this.FLAG_CLEAN_CONDITIONALLY;
 | 
			
		||||
      this.FLAG_WEIGHT_CLASSES |
 | 
			
		||||
      this.FLAG_CLEAN_CONDITIONALLY;
 | 
			
		||||
 | 
			
		||||
  var logEl;
 | 
			
		||||
 | 
			
		||||
@@ -116,8 +116,8 @@ Readability.prototype = {
 | 
			
		||||
  REGEXPS: {
 | 
			
		||||
    // NOTE: These two regular expressions are duplicated in
 | 
			
		||||
    // Readability-readerable.js. Please keep both copies in sync.
 | 
			
		||||
    unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
 | 
			
		||||
    okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
 | 
			
		||||
    unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
 | 
			
		||||
    okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
 | 
			
		||||
 | 
			
		||||
    positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
 | 
			
		||||
    negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
 | 
			
		||||
@@ -126,6 +126,7 @@ Readability.prototype = {
 | 
			
		||||
    replaceFonts: /<(\/?)font[^>]*>/gi,
 | 
			
		||||
    normalize: /\s{2,}/g,
 | 
			
		||||
    videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,
 | 
			
		||||
    shareElements: /(\b|_)(share|sharedaddy)(\b|_)/i,
 | 
			
		||||
    nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,
 | 
			
		||||
    prevLink: /(prev|earl|old|new|<|«)/i,
 | 
			
		||||
    whitespace: /^\s*$/,
 | 
			
		||||
@@ -159,13 +160,15 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return void
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _postProcessContent: function(articleContent) {
 | 
			
		||||
    // Readability cannot open relative uris so we convert them to absolute uris.
 | 
			
		||||
    this._fixRelativeUris(articleContent);
 | 
			
		||||
 | 
			
		||||
    // Remove classes.
 | 
			
		||||
    this._cleanClasses(articleContent);
 | 
			
		||||
    if (!this._keepClasses) {
 | 
			
		||||
      // Remove classes.
 | 
			
		||||
      this._cleanClasses(articleContent);
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
@@ -179,6 +182,10 @@ Readability.prototype = {
 | 
			
		||||
   * @return void
 | 
			
		||||
   */
 | 
			
		||||
  _removeNodes: function(nodeList, filterFn) {
 | 
			
		||||
    // Avoid ever operating on live node lists.
 | 
			
		||||
    if (this._docJSDOMParser && nodeList._isLiveNodeList) {
 | 
			
		||||
      throw new Error("Do not pass live node lists to _removeNodes");
 | 
			
		||||
    }
 | 
			
		||||
    for (var i = nodeList.length - 1; i >= 0; i--) {
 | 
			
		||||
      var node = nodeList[i];
 | 
			
		||||
      var parentNode = node.parentNode;
 | 
			
		||||
@@ -198,6 +205,10 @@ Readability.prototype = {
 | 
			
		||||
   * @return void
 | 
			
		||||
   */
 | 
			
		||||
  _replaceNodeTags: function(nodeList, newTagName) {
 | 
			
		||||
    // Avoid ever operating on live node lists.
 | 
			
		||||
    if (this._docJSDOMParser && nodeList._isLiveNodeList) {
 | 
			
		||||
      throw new Error("Do not pass live node lists to _replaceNodeTags");
 | 
			
		||||
    }
 | 
			
		||||
    for (var i = nodeList.length - 1; i >= 0; i--) {
 | 
			
		||||
      var node = nodeList[i];
 | 
			
		||||
      this._setNodeTag(node, newTagName);
 | 
			
		||||
@@ -285,11 +296,11 @@ Readability.prototype = {
 | 
			
		||||
  _cleanClasses: function(node) {
 | 
			
		||||
    var classesToPreserve = this._classesToPreserve;
 | 
			
		||||
    var className = (node.getAttribute("class") || "")
 | 
			
		||||
      .split(/\s+/)
 | 
			
		||||
      .filter(function(cls) {
 | 
			
		||||
        return classesToPreserve.indexOf(cls) != -1;
 | 
			
		||||
      })
 | 
			
		||||
      .join(" ");
 | 
			
		||||
        .split(/\s+/)
 | 
			
		||||
        .filter(function(cls) {
 | 
			
		||||
          return classesToPreserve.indexOf(cls) != -1;
 | 
			
		||||
        })
 | 
			
		||||
        .join(" ");
 | 
			
		||||
 | 
			
		||||
    if (className) {
 | 
			
		||||
      node.setAttribute("class", className);
 | 
			
		||||
@@ -330,11 +341,21 @@ Readability.prototype = {
 | 
			
		||||
    this._forEachNode(links, function(link) {
 | 
			
		||||
      var href = link.getAttribute("href");
 | 
			
		||||
      if (href) {
 | 
			
		||||
        // Replace links with javascript: URIs with text content, since
 | 
			
		||||
        // Remove links with javascript: URIs, since
 | 
			
		||||
        // they won't work after scripts have been removed from the page.
 | 
			
		||||
        if (href.indexOf("javascript:") === 0) {
 | 
			
		||||
          var text = this._doc.createTextNode(link.textContent);
 | 
			
		||||
          link.parentNode.replaceChild(text, link);
 | 
			
		||||
          // if the link only contains simple text content, it can be converted to a text node
 | 
			
		||||
          if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) {
 | 
			
		||||
            var text = this._doc.createTextNode(link.textContent);
 | 
			
		||||
            link.parentNode.replaceChild(text, link);
 | 
			
		||||
          } else {
 | 
			
		||||
            // if the link has multiple children, they should all be preserved
 | 
			
		||||
            var container = this._doc.createElement("span");
 | 
			
		||||
            while (link.childNodes.length > 0) {
 | 
			
		||||
              container.appendChild(link.childNodes[0]);
 | 
			
		||||
            }
 | 
			
		||||
            link.parentNode.replaceChild(container, link);
 | 
			
		||||
          }
 | 
			
		||||
        } else {
 | 
			
		||||
          link.setAttribute("href", toAbsoluteURI(href));
 | 
			
		||||
        }
 | 
			
		||||
@@ -386,8 +407,8 @@ Readability.prototype = {
 | 
			
		||||
      // Check if we have an heading containing this exact string, so we
 | 
			
		||||
      // could assume it's the full title.
 | 
			
		||||
      var headings = this._concatNodeLists(
 | 
			
		||||
        doc.getElementsByTagName("h1"),
 | 
			
		||||
        doc.getElementsByTagName("h2")
 | 
			
		||||
          doc.getElementsByTagName("h1"),
 | 
			
		||||
          doc.getElementsByTagName("h2")
 | 
			
		||||
      );
 | 
			
		||||
      var trimmedTitle = curTitle.trim();
 | 
			
		||||
      var match = this._someNode(headings, function(heading) {
 | 
			
		||||
@@ -422,7 +443,7 @@ Readability.prototype = {
 | 
			
		||||
    var curTitleWordCount = wordCount(curTitle);
 | 
			
		||||
    if (curTitleWordCount <= 4 &&
 | 
			
		||||
        (!titleHadHierarchicalSeparators ||
 | 
			
		||||
         curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) {
 | 
			
		||||
            curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) {
 | 
			
		||||
      curTitle = origTitle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -439,13 +460,13 @@ Readability.prototype = {
 | 
			
		||||
    var doc = this._doc;
 | 
			
		||||
 | 
			
		||||
    // Remove all style tags in head
 | 
			
		||||
    this._removeNodes(doc.getElementsByTagName("style"));
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(doc, ["style"]));
 | 
			
		||||
 | 
			
		||||
    if (doc.body) {
 | 
			
		||||
      this._replaceBrs(doc.body);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
 | 
			
		||||
    this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN");
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
@@ -456,8 +477,8 @@ Readability.prototype = {
 | 
			
		||||
  _nextElement: function (node) {
 | 
			
		||||
    var next = node;
 | 
			
		||||
    while (next
 | 
			
		||||
        && (next.nodeType != this.ELEMENT_NODE)
 | 
			
		||||
        && this.REGEXPS.whitespace.test(next.textContent)) {
 | 
			
		||||
    && (next.nodeType != this.ELEMENT_NODE)
 | 
			
		||||
    && this.REGEXPS.whitespace.test(next.textContent)) {
 | 
			
		||||
      next = next.nextSibling;
 | 
			
		||||
    }
 | 
			
		||||
    return next;
 | 
			
		||||
@@ -525,7 +546,7 @@ Readability.prototype = {
 | 
			
		||||
 | 
			
		||||
  _setNodeTag: function (node, tag) {
 | 
			
		||||
    this.log("_setNodeTag", node, tag);
 | 
			
		||||
    if (node.__JSDOMParser__) {
 | 
			
		||||
    if (this._docJSDOMParser) {
 | 
			
		||||
      node.localName = tag.toLowerCase();
 | 
			
		||||
      node.tagName = tag.toUpperCase();
 | 
			
		||||
      return node;
 | 
			
		||||
@@ -588,7 +609,7 @@ Readability.prototype = {
 | 
			
		||||
 | 
			
		||||
    this._forEachNode(articleContent.children, function (topCandidate) {
 | 
			
		||||
      this._cleanMatchedNodes(topCandidate, function (node, matchString) {
 | 
			
		||||
        return /share/.test(matchString) && node.textContent.length < shareElementThreshold;
 | 
			
		||||
        return this.REGEXPS.shareElements.test(matchString) && node.textContent.length < shareElementThreshold;
 | 
			
		||||
      });
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
@@ -625,7 +646,7 @@ Readability.prototype = {
 | 
			
		||||
    this._cleanConditionally(articleContent, "div");
 | 
			
		||||
 | 
			
		||||
    // Remove extra paragraphs
 | 
			
		||||
    this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) {
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) {
 | 
			
		||||
      var imgCount = paragraph.getElementsByTagName("img").length;
 | 
			
		||||
      var embedCount = paragraph.getElementsByTagName("embed").length;
 | 
			
		||||
      var objectCount = paragraph.getElementsByTagName("object").length;
 | 
			
		||||
@@ -662,7 +683,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return void
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _initializeNode: function(node) {
 | 
			
		||||
    node.readability = {"contentScore": 0};
 | 
			
		||||
 | 
			
		||||
@@ -769,7 +790,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param page a document to run upon. Needs to be a full document, complete with body.
 | 
			
		||||
   * @return Element
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _grabArticle: function (page) {
 | 
			
		||||
    this.log("**** grabArticle ****");
 | 
			
		||||
    var doc = this._doc;
 | 
			
		||||
@@ -823,8 +844,8 @@ Readability.prototype = {
 | 
			
		||||
 | 
			
		||||
        // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe).
 | 
			
		||||
        if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" ||
 | 
			
		||||
             node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" ||
 | 
			
		||||
             node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") &&
 | 
			
		||||
            node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" ||
 | 
			
		||||
            node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") &&
 | 
			
		||||
            this._isElementWithoutContent(node)) {
 | 
			
		||||
          node = this._removeAndGetNext(node);
 | 
			
		||||
          continue;
 | 
			
		||||
@@ -880,7 +901,7 @@ Readability.prototype = {
 | 
			
		||||
       * Then add their score to their parent node.
 | 
			
		||||
       *
 | 
			
		||||
       * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
 | 
			
		||||
      **/
 | 
			
		||||
       **/
 | 
			
		||||
      var candidates = [];
 | 
			
		||||
      this._forEachNode(elementsToScore, function(elementToScore) {
 | 
			
		||||
        if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === "undefined")
 | 
			
		||||
@@ -1085,7 +1106,7 @@ Readability.prototype = {
 | 
			
		||||
            if (nodeLength > 80 && linkDensity < 0.25) {
 | 
			
		||||
              append = true;
 | 
			
		||||
            } else if (nodeLength < 80 && nodeLength > 0 && linkDensity === 0 &&
 | 
			
		||||
                       nodeContent.search(/\.( |$)/) !== -1) {
 | 
			
		||||
                nodeContent.search(/\.( |$)/) !== -1) {
 | 
			
		||||
              append = true;
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
@@ -1264,12 +1285,12 @@ Readability.prototype = {
 | 
			
		||||
 | 
			
		||||
    // get title
 | 
			
		||||
    metadata.title = values["dc:title"] ||
 | 
			
		||||
                     values["dcterm:title"] ||
 | 
			
		||||
                     values["og:title"] ||
 | 
			
		||||
                     values["weibo:article:title"] ||
 | 
			
		||||
                     values["weibo:webpage:title"] ||
 | 
			
		||||
                     values["title"] ||
 | 
			
		||||
                     values["twitter:title"];
 | 
			
		||||
        values["dcterm:title"] ||
 | 
			
		||||
        values["og:title"] ||
 | 
			
		||||
        values["weibo:article:title"] ||
 | 
			
		||||
        values["weibo:webpage:title"] ||
 | 
			
		||||
        values["title"] ||
 | 
			
		||||
        values["twitter:title"];
 | 
			
		||||
 | 
			
		||||
    if (!metadata.title) {
 | 
			
		||||
      metadata.title = this._getArticleTitle();
 | 
			
		||||
@@ -1277,17 +1298,17 @@ Readability.prototype = {
 | 
			
		||||
 | 
			
		||||
    // get author
 | 
			
		||||
    metadata.byline = values["dc:creator"] ||
 | 
			
		||||
                      values["dcterm:creator"] ||
 | 
			
		||||
                      values["author"];
 | 
			
		||||
        values["dcterm:creator"] ||
 | 
			
		||||
        values["author"];
 | 
			
		||||
 | 
			
		||||
    // get description
 | 
			
		||||
    metadata.excerpt = values["dc:description"] ||
 | 
			
		||||
                       values["dcterm:description"] ||
 | 
			
		||||
                       values["og:description"] ||
 | 
			
		||||
                       values["weibo:article:description"] ||
 | 
			
		||||
                       values["weibo:webpage:description"] ||
 | 
			
		||||
                       values["description"] ||
 | 
			
		||||
                       values["twitter:description"];
 | 
			
		||||
        values["dcterm:description"] ||
 | 
			
		||||
        values["og:description"] ||
 | 
			
		||||
        values["weibo:article:description"] ||
 | 
			
		||||
        values["weibo:webpage:description"] ||
 | 
			
		||||
        values["description"] ||
 | 
			
		||||
        values["twitter:description"];
 | 
			
		||||
 | 
			
		||||
    // get site name
 | 
			
		||||
    metadata.siteName = values["og:site_name"];
 | 
			
		||||
@@ -1299,14 +1320,14 @@ Readability.prototype = {
 | 
			
		||||
   * Removes script tags from the document.
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _removeScripts: function(doc) {
 | 
			
		||||
    this._removeNodes(doc.getElementsByTagName("script"), function(scriptNode) {
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(doc, ["script"]), function(scriptNode) {
 | 
			
		||||
      scriptNode.nodeValue = "";
 | 
			
		||||
      scriptNode.removeAttribute("src");
 | 
			
		||||
      return true;
 | 
			
		||||
    });
 | 
			
		||||
    this._removeNodes(doc.getElementsByTagName("noscript"));
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(doc, ["noscript"]));
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
@@ -1316,7 +1337,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @param string tag of child element
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _hasSingleTagInsideElement: function(element, tag) {
 | 
			
		||||
    // There should be exactly 1 element child with given tag
 | 
			
		||||
    if (element.children.length != 1 || element.children[0].tagName !== tag) {
 | 
			
		||||
@@ -1326,15 +1347,15 @@ Readability.prototype = {
 | 
			
		||||
    // And there should be no text nodes with real content
 | 
			
		||||
    return !this._someNode(element.childNodes, function(node) {
 | 
			
		||||
      return node.nodeType === this.TEXT_NODE &&
 | 
			
		||||
             this.REGEXPS.hasContent.test(node.textContent);
 | 
			
		||||
          this.REGEXPS.hasContent.test(node.textContent);
 | 
			
		||||
    });
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  _isElementWithoutContent: function(node) {
 | 
			
		||||
    return node.nodeType === this.ELEMENT_NODE &&
 | 
			
		||||
      node.textContent.trim().length == 0 &&
 | 
			
		||||
      (node.children.length == 0 ||
 | 
			
		||||
       node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
 | 
			
		||||
        node.textContent.trim().length == 0 &&
 | 
			
		||||
        (node.children.length == 0 ||
 | 
			
		||||
            node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
@@ -1345,23 +1366,23 @@ Readability.prototype = {
 | 
			
		||||
  _hasChildBlockElement: function (element) {
 | 
			
		||||
    return this._someNode(element.childNodes, function(node) {
 | 
			
		||||
      return this.DIV_TO_P_ELEMS.indexOf(node.tagName) !== -1 ||
 | 
			
		||||
             this._hasChildBlockElement(node);
 | 
			
		||||
          this._hasChildBlockElement(node);
 | 
			
		||||
    });
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /***
 | 
			
		||||
   * Determine if a node qualifies as phrasing content.
 | 
			
		||||
   * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _isPhrasingContent: function(node) {
 | 
			
		||||
    return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.indexOf(node.tagName) !== -1 ||
 | 
			
		||||
      ((node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") &&
 | 
			
		||||
        this._everyNode(node.childNodes, this._isPhrasingContent));
 | 
			
		||||
        ((node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") &&
 | 
			
		||||
            this._everyNode(node.childNodes, this._isPhrasingContent));
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  _isWhitespace: function(node) {
 | 
			
		||||
    return (node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0) ||
 | 
			
		||||
           (node.nodeType === this.ELEMENT_NODE && node.tagName === "BR");
 | 
			
		||||
        (node.nodeType === this.ELEMENT_NODE && node.tagName === "BR");
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
@@ -1371,7 +1392,7 @@ Readability.prototype = {
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @param Boolean normalizeSpaces (default: true)
 | 
			
		||||
   * @return string
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _getInnerText: function(e, normalizeSpaces) {
 | 
			
		||||
    normalizeSpaces = (typeof normalizeSpaces === "undefined") ? true : normalizeSpaces;
 | 
			
		||||
    var textContent = e.textContent.trim();
 | 
			
		||||
@@ -1388,7 +1409,7 @@ Readability.prototype = {
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @param string - what to split on. Default is ","
 | 
			
		||||
   * @return number (integer)
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _getCharCount: function(e, s) {
 | 
			
		||||
    s = s || ",";
 | 
			
		||||
    return this._getInnerText(e).split(s).length - 1;
 | 
			
		||||
@@ -1400,7 +1421,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return void
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _cleanStyles: function(e) {
 | 
			
		||||
    if (!e || e.tagName.toLowerCase() === "svg")
 | 
			
		||||
      return;
 | 
			
		||||
@@ -1428,7 +1449,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return number (float)
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _getLinkDensity: function(element) {
 | 
			
		||||
    var textLength = this._getInnerText(element).length;
 | 
			
		||||
    if (textLength === 0)
 | 
			
		||||
@@ -1450,7 +1471,7 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return number (Integer)
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _getClassWeight: function(e) {
 | 
			
		||||
    if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))
 | 
			
		||||
      return 0;
 | 
			
		||||
@@ -1489,7 +1510,7 @@ Readability.prototype = {
 | 
			
		||||
  _clean: function(e, tag) {
 | 
			
		||||
    var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1;
 | 
			
		||||
 | 
			
		||||
    this._removeNodes(e.getElementsByTagName(tag), function(element) {
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) {
 | 
			
		||||
      // Allow youtube and vimeo videos through as people usually want to see those.
 | 
			
		||||
      if (isEmbed) {
 | 
			
		||||
        // First, check the elements attributes to see if any of them contain youtube or vimeo
 | 
			
		||||
@@ -1670,7 +1691,7 @@ Readability.prototype = {
 | 
			
		||||
    // without effecting the traversal.
 | 
			
		||||
    //
 | 
			
		||||
    // TODO: Consider taking into account original contentScore here.
 | 
			
		||||
    this._removeNodes(e.getElementsByTagName(tag), function(node) {
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) {
 | 
			
		||||
      // First check if this node IS data table, in which case don't remove it.
 | 
			
		||||
      var isDataTable = function(t) {
 | 
			
		||||
        return t._readabilityDataTable;
 | 
			
		||||
@@ -1704,10 +1725,7 @@ Readability.prototype = {
 | 
			
		||||
        var input = node.getElementsByTagName("input").length;
 | 
			
		||||
 | 
			
		||||
        var embedCount = 0;
 | 
			
		||||
        var embeds = this._concatNodeLists(
 | 
			
		||||
          node.getElementsByTagName("object"),
 | 
			
		||||
          node.getElementsByTagName("embed"),
 | 
			
		||||
          node.getElementsByTagName("iframe"));
 | 
			
		||||
        var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]);
 | 
			
		||||
 | 
			
		||||
        for (var i = 0; i < embeds.length; i++) {
 | 
			
		||||
          // If this embed has attribute that matches video regex, don't delete it.
 | 
			
		||||
@@ -1729,13 +1747,13 @@ Readability.prototype = {
 | 
			
		||||
        var contentLength = this._getInnerText(node).length;
 | 
			
		||||
 | 
			
		||||
        var haveToRemove =
 | 
			
		||||
          (img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure")) ||
 | 
			
		||||
          (!isList && li > p) ||
 | 
			
		||||
          (input > Math.floor(p/3)) ||
 | 
			
		||||
          (!isList && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure")) ||
 | 
			
		||||
          (!isList && weight < 25 && linkDensity > 0.2) ||
 | 
			
		||||
          (weight >= 25 && linkDensity > 0.5) ||
 | 
			
		||||
          ((embedCount === 1 && contentLength < 75) || embedCount > 1);
 | 
			
		||||
            (img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure")) ||
 | 
			
		||||
            (!isList && li > p) ||
 | 
			
		||||
            (input > Math.floor(p/3)) ||
 | 
			
		||||
            (!isList && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure")) ||
 | 
			
		||||
            (!isList && weight < 25 && linkDensity > 0.2) ||
 | 
			
		||||
            (weight >= 25 && linkDensity > 0.5) ||
 | 
			
		||||
            ((embedCount === 1 && contentLength < 75) || embedCount > 1);
 | 
			
		||||
        return haveToRemove;
 | 
			
		||||
      }
 | 
			
		||||
      return false;
 | 
			
		||||
@@ -1753,7 +1771,7 @@ Readability.prototype = {
 | 
			
		||||
    var endOfSearchMarkerNode = this._getNextNode(e, true);
 | 
			
		||||
    var next = this._getNextNode(e);
 | 
			
		||||
    while (next && next != endOfSearchMarkerNode) {
 | 
			
		||||
      if (filter(next, next.className + " " + next.id)) {
 | 
			
		||||
      if (filter.call(this, next, next.className + " " + next.id)) {
 | 
			
		||||
        next = this._removeAndGetNext(next);
 | 
			
		||||
      } else {
 | 
			
		||||
        next = this._getNextNode(next);
 | 
			
		||||
@@ -1766,13 +1784,11 @@ Readability.prototype = {
 | 
			
		||||
   *
 | 
			
		||||
   * @param Element
 | 
			
		||||
   * @return void
 | 
			
		||||
  **/
 | 
			
		||||
   **/
 | 
			
		||||
  _cleanHeaders: function(e) {
 | 
			
		||||
    for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) {
 | 
			
		||||
      this._removeNodes(e.getElementsByTagName("h" + headerIndex), function (header) {
 | 
			
		||||
        return this._getClassWeight(header) < 0;
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    this._removeNodes(this._getAllNodesWithTag(e, ["h1", "h2"]), function (header) {
 | 
			
		||||
      return this._getClassWeight(header) < 0;
 | 
			
		||||
    });
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  _flagIsActive: function(flag) {
 | 
			
		||||
@@ -1784,7 +1800,11 @@ Readability.prototype = {
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  _isProbablyVisible: function(node) {
 | 
			
		||||
    return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden");
 | 
			
		||||
    // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
 | 
			
		||||
    return (!node.style || node.style.display != "none")
 | 
			
		||||
        && !node.hasAttribute("hidden")
 | 
			
		||||
        //check for "fallback-image" so that wikimedia math images are displayed
 | 
			
		||||
        && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1));
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user