Skip to content

Commit

Permalink
Fix phpGH-17802: \Dom\HTMLDocument querySelector attribute name is ca…
Browse files Browse the repository at this point in the history
…se sensitive in HTML

According to https://html.spec.whatwg.org/#case-sensitivity-of-selectors,
the CSS selector attribute name must be converted to lowercase in HTML elements,
and then compared case-sensitive to the attribute name in the element.
We implement this not by doing the explicit conversion, but by a manual
loop using a function that first converts the rhs characters to
lowercase and keeps the lhs characters the same, achieving the same
effect.
  • Loading branch information
nielsdos committed Feb 15, 2025
1 parent 8f8d4be commit f32da80
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
16 changes: 15 additions & 1 deletion ext/dom/lexbor/lexbor/selectors-adapted/selectors.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,21 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xml

static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
{
const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name);
const xmlAttr *attr = NULL;
ZEND_ASSERT(node->doc != NULL);
if (php_dom_ns_is_html_and_document_is_html(node)) {
/* No need to handle DTD entities as we're in HTML. */
size_t name_bound = strlen((const char *) name) + 1;
for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) {
if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) {
attr = cur;
break;
}
}
} else {
attr = xmlHasProp(node, (const xmlChar *) name);
}

if (attr != NULL && attr->ns != NULL) {
return NULL;
}
Expand Down
47 changes: 47 additions & 0 deletions ext/dom/tests/modern/css_selectors/gh17802.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
--TEST--
GH-17802 (\Dom\HTMLDocument querySelector attribute name is case sensitive in HTML)
--EXTENSIONS--
dom
--FILE--
<?php

$text = <<<TEXT
<html>
<head>
<meta charset="Windows-1252">
</head>
<body>
</body>
</html>
TEXT;

$dom = \Dom\HTMLDocument::createFromString($text, options: LIBXML_NOERROR);
$meta2 = $dom->head->appendChild($dom->createElementNS('urn:x', 'meta'));
$meta2->setAttribute('charset', 'x');
echo $dom->saveHtml(), "\n";

echo "--- charseT ---\n";

foreach ($dom->querySelectorAll('meta[charseT]') as $entry) {
var_dump($dom->saveHtml($entry));
}

echo "--- charset ---\n";

foreach ($dom->querySelectorAll('meta[charset]') as $entry) {
var_dump($dom->saveHtml($entry));
}

?>
--EXPECT--
<html><head>
<meta charset="Windows-1252">
<meta charset="x"></meta></head>
<body>

</body></html>
--- charseT ---
string(29) "<meta charset="Windows-1252">"
--- charset ---
string(29) "<meta charset="Windows-1252">"
string(25) "<meta charset="x"></meta>"

0 comments on commit f32da80

Please sign in to comment.