Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add percent encoding of URLs in imagesrcset param of Link response header #1866

Merged
merged 1 commit into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 43 additions & 11 deletions plugins/optimization-detective/class-od-link-collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -267,22 +267,32 @@ public function get_response_header(): ?string {

foreach ( $this->get_prepared_links() as $link ) {
if ( isset( $link['href'] ) ) {
$decoded_url = urldecode( $link['href'] );

// Encode characters not allowed in a URL per RFC 3986 (anything that is not among the reserved and unreserved characters).
$encoded_url = preg_replace_callback(
'/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=]/',
static function ( $matches ) {
return rawurlencode( $matches[0] );
},
$decoded_url
);
$link['href'] = esc_url_raw( $encoded_url ?? '' );
$link['href'] = $this->encode_url_for_response_header( $link['href'] );
} else {
// The about:blank is present since a Link without a reference-uri is invalid so any imagesrcset would otherwise not get downloaded.
$link['href'] = 'about:blank';
}

// Encode the URLs in the srcset.
if ( isset( $link['imagesrcset'] ) ) {
$link['imagesrcset'] = join(
', ',
array_map(
function ( $image_candidate ) {
// Parse out the URL to separate it from the descriptor.
$image_candidate_parts = (array) preg_split( '/\s+/', (string) $image_candidate, 2 );

// Encode the URL.
$image_candidate_parts[0] = $this->encode_url_for_response_header( (string) $image_candidate_parts[0] );

// Re-join the URL with the descriptor.
return implode( ' ', $image_candidate_parts );
},
(array) preg_split( '/\s*,\s*/', $link['imagesrcset'] )
)
);
}

$link_header = '<' . $link['href'] . '>';
unset( $link['href'] );
foreach ( $link as $name => $value ) {
Expand Down Expand Up @@ -310,6 +320,28 @@ static function ( $matches ) {
return 'Link: ' . implode( ', ', $link_headers );
}

/**
* Encodes a URL for serving in an HTTP response header.
*
* @since n.e.x.t
*
* @param string $url URL to percent encode. Any existing percent encodings will first be decoded.
* @return string Percent-encoded URL.
*/
private function encode_url_for_response_header( string $url ): string {
$decoded_url = urldecode( $url );

// Encode characters not allowed in a URL per RFC 3986 (anything that is not among the reserved and unreserved characters).
$encoded_url = (string) preg_replace_callback(
'/[^A-Za-z0-9\-._~:\/?#\[\]@!$&\'()*+,;=]/',
static function ( $matches ) {
return rawurlencode( $matches[0] );
},
$decoded_url
);
return esc_url_raw( $encoded_url );
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: This was originally esc_url_raw( $encoded_url ?? '' ). However, this seems to have just been a way to work around the fact that preg_replace_callback() can technically return null when an invalid pattern is provided. However, since we know the pattern is correct based on tests, the use of (string) cast seems more appropriate.

}

/**
* Counts the links.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,25 @@ public function data_provider_to_test_add_link(): array {
'expected_html' => '
<link data-od-added-tag rel="preload" href="https://example.com/bar.jpg" as="image" fetchpriority="high" imagesrcset="https://example.com/&quot;bar&quot;-480w.jpg 480w, https://example.com/&quot;bar&quot;-800w.jpg 800w" imagesizes="(max-width: 600px) 480px, 800px" crossorigin="anonymous">
',
'expected_header' => 'Link: <https://example.com/bar.jpg>; rel="preload"; as="image"; fetchpriority="high"; imagesrcset="https://example.com/\"bar\"-480w.jpg 480w, https://example.com/\"bar\"-800w.jpg 800w"; imagesizes="(max-width: 600px) 480px, 800px"; crossorigin="anonymous"',
'expected_header' => 'Link: <https://example.com/bar.jpg>; rel="preload"; as="image"; fetchpriority="high"; imagesrcset="https://example.com/%22bar%22-480w.jpg 480w, https://example.com/%22bar%22-800w.jpg 800w"; imagesizes="(max-width: 600px) 480px, 800px"; crossorigin="anonymous"',
'expected_count' => 1,
'error' => '',
),
'preload_mime_with_quotes' => array(
'links_args' => array(
array(
array(
'rel' => 'preload',
'href' => 'https://example.com/bar.webm',
'as' => 'video',
'type' => 'video/webm; codecs="vp8, vorbis"',
),
),
),
'expected_html' => '
<link data-od-added-tag rel="preload" href="https://example.com/bar.webm" as="video" type="video/webm; codecs=&quot;vp8, vorbis&quot;">
',
'expected_header' => 'Link: <https://example.com/bar.webm>; rel="preload"; as="video"; type="video/webm; codecs=\"vp8, vorbis\""',
'expected_count' => 1,
'error' => '',
),
Expand Down Expand Up @@ -422,6 +440,25 @@ public function data_provider_to_test_add_link(): array {
'expected_count' => 1,
'error' => '',
),
'non_ascii_srcset' => array(
'links_args' => array(
array(
array(
'href' => 'https://example.com/wp-content/uploads/2025/02/البيسون-1024x668-jpg.webp',
'rel' => 'preload',
'as' => 'image',
'imagesizes' => '(width <= 480px) 316px, (480px < width <= 600px) 489px, (600px < width <= 782px) 644px, (782px < width) 644px',
'imagesrcset' => 'https://example.com/wp-content/uploads/2025/02/البيسون-1024x668-jpg.webp 1024w, https://example.com/wp-content/uploads/2025/02/البيسون-300x196-jpg.webp 300w, https://example.com/wp-content/uploads/2025/02/البيسون-768x501-jpg.webp 768w, https://example.com/wp-content/uploads/2025/02/البيسون-1536x1002-jpg.webp 1536w, https://example.com/wp-content/uploads/2025/02/البيسون-2048x1336-jpg.webp 2048w',
),
),
),
'expected_html' => '
<link data-od-added-tag href="https://example.com/wp-content/uploads/2025/02/البيسون-1024x668-jpg.webp" rel="preload" as="image" imagesizes="(width &lt;= 480px) 316px, (480px &lt; width &lt;= 600px) 489px, (600px &lt; width &lt;= 782px) 644px, (782px &lt; width) 644px" imagesrcset="https://example.com/wp-content/uploads/2025/02/البيسون-1024x668-jpg.webp 1024w, https://example.com/wp-content/uploads/2025/02/البيسون-300x196-jpg.webp 300w, https://example.com/wp-content/uploads/2025/02/البيسون-768x501-jpg.webp 768w, https://example.com/wp-content/uploads/2025/02/البيسون-1536x1002-jpg.webp 1536w, https://example.com/wp-content/uploads/2025/02/البيسون-2048x1336-jpg.webp 2048w">
',
'expected_header' => 'Link: <https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-1024x668-jpg.webp>; rel="preload"; as="image"; imagesizes="(width <= 480px) 316px, (480px < width <= 600px) 489px, (600px < width <= 782px) 644px, (782px < width) 644px"; imagesrcset="https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-1024x668-jpg.webp 1024w, https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-300x196-jpg.webp 300w, https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-768x501-jpg.webp 768w, https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-1536x1002-jpg.webp 1536w, https://example.com/wp-content/uploads/2025/02/%D8%A7%D9%84%D8%A8%D9%8A%D8%B3%D9%88%D9%86-2048x1336-jpg.webp 2048w"',
'expected_count' => 1,
'error' => '',
),
'percent-in-path' => array(
'links_args' => array(
array(
Expand Down Expand Up @@ -467,6 +504,7 @@ public function data_provider_to_test_add_link(): array {
* @covers ::get_prepared_links
* @covers ::merge_consecutive_links
* @covers ::get_response_header
* @covers ::encode_url_for_response_header
* @covers ::count
*
* @dataProvider data_provider_to_test_add_link
Expand Down