From d01eb2ac1e7b3335713e51cb43d3217e1703f765 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 14 Sep 2015 21:49:40 +0200 Subject: [PATCH] Use class instead of id to avoid error It generates error like `ID XXX already defined` --- src/Readability.php | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 42613ad..9cd8239 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -195,8 +195,10 @@ public function __construct($html, $url = null, $parser = 'libxml', $use_tidy = if (!($parser == 'html5lib' && ($this->dom = \HTML5_Parser::parse($html)))) { libxml_use_internal_errors(true); + $this->dom = new \DOMDocument(); $this->dom->preserveWhiteSpace = false; + $this->dom->formatOutput = true; if (PHP_VERSION_ID >= 50400) { $this->dom->loadHTML($html, LIBXML_NOBLANKS | LIBXML_COMPACT | LIBXML_NOERROR); @@ -292,11 +294,11 @@ public function init() if (!$articleContent) { $this->success = false; $articleContent = $this->dom->createElement('div'); - $articleContent->setAttribute('id', 'readability-content'); + $articleContent->setAttribute('class', 'readability-content'); $articleContent->innerHTML = '

Sorry, Readability was unable to parse this page for content.

'; } - $overlay->setAttribute('id', 'readOverlay'); - $innerDiv->setAttribute('id', 'readInner'); + $overlay->setAttribute('class', 'readOverlay'); + $innerDiv->setAttribute('class', 'readInner'); // Glue the structure of our document together. $innerDiv->appendChild($articleTitle); $innerDiv->appendChild($articleContent); @@ -403,7 +405,7 @@ protected function prepDocument() $this->body = $this->dom->createElement('body'); $this->dom->documentElement->appendChild($this->body); } - $this->body->setAttribute('id', 'readabilityBody'); + $this->body->setAttribute('class', 'readabilityBody'); // Remove all style tags in head. $styleTags = $this->dom->getElementsByTagName('style'); for ($i = $styleTags->length - 1; $i >= 0; --$i) { @@ -423,10 +425,10 @@ protected function prepDocument() public function addFootnotes($articleContent) { $footnotesWrapper = $this->dom->createElement('footer'); - $footnotesWrapper->setAttribute('id', 'readability-footnotes'); + $footnotesWrapper->setAttribute('class', 'readability-footnotes'); $footnotesWrapper->innerHTML = '

References

'; $articleFootnotes = $this->dom->createElement('ol'); - $articleFootnotes->setAttribute('id', 'readability-footnotes-list'); + $articleFootnotes->setAttribute('class', 'readability-footnotes-list'); $footnotesWrapper->appendChild($articleFootnotes); $articleLinks = $articleContent->getElementsByTagName('a'); $linkCount = 0; @@ -842,7 +844,7 @@ protected function grabArticle($page = null) * Things like preambles, content split by ads that we removed, etc. */ $articleContent = $this->dom->createElement('div'); - $articleContent->setAttribute('id', 'readability-content'); + $articleContent->setAttribute('class', 'readability-content'); $siblingScoreThreshold = max(10, ((int) $topCandidate->getAttribute('readability')) * 0.2); $siblingNodes = $topCandidate->parentNode->childNodes; if (!isset($siblingNodes)) { @@ -884,7 +886,10 @@ protected function grabArticle($page = null) $this->dbg('Altering siblingNode '.$siblingNodeName.' to div.'); $nodeToAppend = $this->dom->createElement('div'); try { - $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); + if ($siblingNode->getAttribute('id')) { + $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); + } + $nodeToAppend->setAttribute('alt', $siblingNodeName); $nodeToAppend->innerHTML = $siblingNode->innerHTML; } catch (Exception $e) {