diff --git a/reader.js b/reader.js index 604182e..5cf0b93 100644 --- a/reader.js +++ b/reader.js @@ -1,18 +1,32 @@ +/* global DOMParser, document */ const renderPage = require('./template.js') -const {Readability} = require('@mozilla/readability') +const { Readability } = require('@mozilla/readability') const article = new Readability(document).parse() const { title, content, byline } = article +let cleanedContent = content + +try { + const parsed = (new DOMParser()).parseFromString(content, 'text/html') + + // Find the first paragraph + const firstP = parsed.querySelector('p') + const parent = firstP.parentElement + cleanedContent = parent.innerHTML +} catch (e) { + console.error('Unable to clean article', e) +} + const finalContent = ` -
${byline}
` : ''}