Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parser performance by 50% #79

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/from_dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ export class DOMParser {
/// Parse a document from the content of a DOM node.
parse(dom: DOMNode, options: ParseOptions = {}): Node {
let context = new ParseContext(this, options, false)
context.generateMatchers(dom as HTMLElement, this.tags)
context.addAll(dom, options.from, options.to)
return context.finish() as Node
}
Expand All @@ -208,6 +209,7 @@ export class DOMParser {
/// the left of the input and the end of nodes at the end.
parseSlice(dom: DOMNode, options: ParseOptions = {}) {
let context = new ParseContext(this, options, true)
context.generateMatchers(dom as HTMLElement, this.tags)
context.addAll(dom, options.from, options.to)
return Slice.maxOpen(context.finish() as Fragment)
}
Expand All @@ -216,7 +218,7 @@ export class DOMParser {
matchTag(dom: DOMNode, context: ParseContext, after?: ParseRule) {
for (let i = after ? this.tags.indexOf(after) + 1 : 0; i < this.tags.length; i++) {
let rule = this.tags[i]
if (matches(dom, rule.tag!) &&
if (context.matchesNode(dom, rule.tag!) &&
(rule.namespace === undefined || (dom as HTMLElement).namespaceURI == rule.namespace) &&
(!rule.context || context.matchesContext(rule.context))) {
if (rule.getAttrs) {
Expand Down Expand Up @@ -398,6 +400,7 @@ class ParseContext {
find: {node: DOMNode, offset: number, pos?: number}[] | undefined
needsBlock: boolean
nodes: NodeContext[]
matchers: Record<string, (node: HTMLElement) => boolean> = {};

constructor(
// The parser we are using.
Expand Down Expand Up @@ -675,6 +678,7 @@ class ParseContext {
}

finish() {
this.matchers = {}
this.open = 0
this.closeExtra(this.isOpen)
return this.nodes[0].finish(this.isOpen || this.options.topOpen)
Expand Down Expand Up @@ -795,6 +799,31 @@ class ParseContext {
if (level == upto) break
}
}

/// Match a node against a CSS selector
matchesNode(node: DOMNode, selector: string) {
return this.matchers[selector] ? this.matchers[selector](node as HTMLElement) : matches(node, selector)
}

/// Generates matchers based on the given parse rules. This is much, much
/// faster than matching each node individually.
generateMatchers(dom: HTMLElement, rules: ParseRule[]) {
for (const rule of rules) {
if (!rule.tag) continue
if (blockTags[rule.tag] || listTags[rule.tag]) {
const upperCaseTag = rule.tag.toUpperCase()
// for simple selectors like li, p etc. we can just do a simple
// tag name check.
this.matchers[rule.tag] = (node) => node.tagName === upperCaseTag
} else {
// for more complex selectors, we collect all the matching nodes
// just once instead of calling `matches` over and over again for
// each node.
const nodes = new Set(dom.querySelectorAll(rule.tag).values())
this.matchers[rule.tag] = (node) => nodes.has(node)
}
}
}
}

// Kludge to work around directly nested list nodes produced by some
Expand Down