diff --git a/.gitignore b/.gitignore
index 52b9f38..69c5f83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
vendor
composer.lock
/test.*
-/test/changed/
\ No newline at end of file
+/test/changed/
+.phpunit.result.cache
diff --git a/composer.json b/composer.json
index aba02cf..bca6442 100644
--- a/composer.json
+++ b/composer.json
@@ -32,7 +32,7 @@
"ext-xml": "*",
"ext-mbstring": "*",
"psr/log": "^1.0",
- "masterminds/html5": "^2.0",
+ "mensbeam/html-parser": "^1.2.0",
"league/uri": "^6.4"
},
"require-dev": {
diff --git a/src/Nodes/DOM/DOMDocument.php b/src/Nodes/DOM/DOMDocument.php
index d912338..49a9b5d 100644
--- a/src/Nodes/DOM/DOMDocument.php
+++ b/src/Nodes/DOM/DOMDocument.php
@@ -8,7 +8,7 @@ class DOMDocument extends \DOMDocument
{
use NodeTrait;
- public function __construct($version, $encoding)
+ public function __construct($version = "1.0", $encoding = "")
{
parent::__construct($version, $encoding);
diff --git a/src/Readability.php b/src/Readability.php
index 5c8fb84..6d89d66 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -8,9 +8,10 @@
use fivefilters\Readability\Nodes\DOM\DOMText;
use fivefilters\Readability\Nodes\NodeUtility;
use Psr\Log\LoggerInterface;
-use \Masterminds\HTML5;
use League\Uri\Http;
use League\Uri\UriResolver;
+use MensBeam\HTML\Parser;
+use MensBeam\HTML\Parser\Config as ParserConfig;
/**
* Class Readability.
@@ -286,48 +287,52 @@ private function loadHTML($html)
{
$this->logger->debug('[Loading] Loading HTML...');
- // To avoid throwing a gazillion of errors on malformed HTMLs
- libxml_use_internal_errors(true);
-
//$html = preg_replace('/(
]*>[ \n\r\t]*){2,}/i', '
', $html); if ($this->configuration->getParser() === 'html5') { $this->logger->debug('[Loading] Using HTML5 parser...'); - $html5 = new HTML5(['disable_html_ns' => true, 'target_document' => new DOMDocument('1.0', 'utf-8')]); - $dom = $html5->loadHTML($html); + $config = new ParserConfig(); + $config->documentClass = DOMDocument::class; + $config->encodingFallback = "UTF-8"; + $dom = Parser::parse($html, "", $config)->document; //TODO: Improve this so it looks inside