Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: $removeparam #4528

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 64 additions & 18 deletions packages/adblocker/src/engine/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ export default class FilterEngine extends EventEmitter<EngineEventHandlers> {
exceptions.push(filter);
} else if (filter.isImportant()) {
importants.push(filter);
} else if (filter.isRedirect()) {
} else if (filter.isRedirectable()) {
redirects.push(filter);
} else {
filters.push(filter);
Expand Down Expand Up @@ -1443,29 +1443,70 @@ export default class FilterEngine extends EventEmitter<EngineEventHandlers> {
if (request.isSupported) {
// Check the filters in the following order:
// 1. $important (not subject to exceptions)
// 2. redirection ($redirect=resource)
// 2. redirection ($removeparam and $redirect=resource)
// 3. normal filters
// 4. exceptions
result.filter = this.importants.match(request, this.isFilterExcluded.bind(this));

let redirectNone: NetworkFilter | undefined;
let redirectRule: NetworkFilter | undefined;
let redirectUrl: string | undefined;

// If `result.filter` is `undefined`, it means there was no $important
// filter found so far. We look for a $redirect filter. There is some
// extra logic to handle special cases like redirect-rule and
// redirect=none.
//
// * If redirect=none is found, then cancel all redirects.
// * Else if redirect-rule is found, only redirect if request would be blocked.
// * Else if redirect is found, redirect.
// filter found so far. We look for $removeparam and $redirect filter.
if (result.filter === undefined) {
const redirects = this.redirects
.matchAll(request, this.isFilterExcluded.bind(this))
// highest priorty wins
.sort((a, b) => b.getRedirectPriority() - a.getRedirectPriority());
const redirectFilters = this.redirects.matchAll(request, this.isFilterExcluded.bind(this));
const resourceRedirects: NetworkFilter[] = [];
const requestRedirects: NetworkFilter[] = [];
for (const filter of redirectFilters) {
if (filter.isRemoveParam()) {
requestRedirects.push(filter);
} else {
resourceRedirects.push(filter);
}
}

// We don't need to match `removeparam` at all if:
// * `removeparam` filters not matched
// * URL doesn't have search parameter
if (requestRedirects.length !== 0) {
const url = new URL(request.url);
if (url.searchParams.size !== 0) {
for (const filter of requestRedirects) {
const key = filter.removeparam!;

// Remove all params in case of option value is empty
if (key === '') {
result.filter = filter;
redirectUrl = request.url.slice(0, request.url.indexOf('?'));
seia-soto marked this conversation as resolved.
Show resolved Hide resolved

break;
}

if (url.searchParams.has(key)) {
url.searchParams.delete(key);

result.filter = filter;
redirectUrl = url.toString();

break;
}
}
}
}

// If `result.filter` is still remains `undefined`, there
// is some extra logic to handle special cases like
// redirect-rule and redirect=none.
//
// * If redirect=none is found, then cancel all redirects.
// * Else if redirect-rule is found, only redirect if request would be blocked.
// * Else if redirect is found, redirect.
if (result.filter === undefined && resourceRedirects.length !== 0) {
const redirects = resourceRedirects
// highest priorty wins
.sort((a, b) => b.getRedirectPriority() - a.getRedirectPriority());

if (redirects.length !== 0) {
for (const filter of redirects) {
if (filter.getRedirectResource() === 'none') {
redirectNone = filter;
Expand Down Expand Up @@ -1508,12 +1549,17 @@ export default class FilterEngine extends EventEmitter<EngineEventHandlers> {
if (
result.filter !== undefined &&
result.exception === undefined &&
result.filter.isRedirect()
result.filter.isRedirectable()
) {
if (redirectNone !== undefined) {
result.exception = redirectNone;
// Prioritize if `redirectUrl` is set.
if (redirectUrl !== undefined) {
result.redirect = { body: '', contentType: 'text/plain', dataUrl: redirectUrl };
} else {
result.redirect = this.resources.getResource(result.filter.getRedirectResource());
if (redirectNone !== undefined) {
result.exception = redirectNone;
} else {
result.redirect = this.resources.getResource(result.filter.getRedirectResource());
}
}
}
}
Expand Down
35 changes: 35 additions & 0 deletions packages/adblocker/src/filters/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ export const enum NETWORK_FILTER_MASK {
isHostnameAnchor = 1 << 28,
isRedirectRule = 1 << 29,
isRedirect = 1 << 30,
isRemoveParam = 1 << 31,
// IMPORTANT: the mask is now full, no more options can be added
// Consider creating a separate fitler type for isReplace if a new
// network filter option is needed.
Expand Down Expand Up @@ -887,6 +888,16 @@ export default class NetworkFilter implements IFilter {
mask = setBit(mask, NETWORK_FILTER_MASK.isReplace);
optionValue = value;

break;
case 'removeparam':
// TODO: Support regex
if (negation || value.startsWith('/')) {
seia-soto marked this conversation as resolved.
Show resolved Hide resolved
return null;
}

mask = setBit(mask, NETWORK_FILTER_MASK.isRemoveParam);
optionValue = value;

break;
default: {
// Handle content type options separatly
Expand Down Expand Up @@ -1265,6 +1276,14 @@ export default class NetworkFilter implements IFilter {
return this.optionValue;
}

public get removeparam(): string | undefined {
if (!this.isRemoveParam()) {
return undefined;
}

return this.optionValue;
}

public isCosmeticFilter(): this is CosmeticFilter {
return false;
}
Expand Down Expand Up @@ -1535,6 +1554,14 @@ export default class NetworkFilter implements IFilter {
options.push('badfilter');
}

if (this.isRemoveParam()) {
if (this.removeparam!.length > 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wont this throw if this.removeparam === undefined ?

Copy link
Member Author

@seia-soto seia-soto Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removeparam is always string here, as we always set string in parse time. I'll add some comments or improve the type clarity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or just change it if (!this.removeparam) { or typeof this.removeparam === 'string' && this.removeparam.length > 0

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Other methods like get csp() has a same shape.
  • We use an empty value for the full removal of parameters.

options.push(`removeparam=${this.optionValue}`);
} else {
options.push('removeparam');
seia-soto marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (options.length > 0) {
if (typeof modifierReplacer === 'function') {
filter += `$${options.map(modifierReplacer).join(',')}`;
Expand Down Expand Up @@ -1608,6 +1635,14 @@ export default class NetworkFilter implements IFilter {
return getBit(this.getMask(), NETWORK_FILTER_MASK.isReplace);
}

public isRemoveParam(): boolean {
return getBit(this.getMask(), NETWORK_FILTER_MASK.isRemoveParam);
}

public isRedirectable(): boolean {
return this.isRedirect() || this.isRemoveParam();
}

// Expected to be called only with `$replace` modifiers
public getHtmlModifier(): HTMLModifier | null {
// Empty `$replace` modifier is to disable all replace modifiers on exception
Expand Down
100 changes: 95 additions & 5 deletions packages/adblocker/test/engine/engine.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ function test({
expect(importants).to.include(result.filter.rawLine);

// Handle case where important filter is also a redirect
if (filter.isRedirect()) {
if (filter.isRedirectable()) {
expect(redirects).to.include(result.filter.rawLine);
}
}

expect(result.exception).to.be.undefined;

if (!filter.isRedirect()) {
if (!filter.isRedirectable()) {
expect(result.redirect).to.be.undefined;
}

Expand All @@ -108,7 +108,7 @@ function test({
expect(result.filter).not.to.be.undefined;
expect(result.redirect).to.be.undefined;
expect(result.match).to.be.false;
} else if (filter.isRedirect() && exceptions.length === 0 && importants.length === 0) {
} else if (filter.isRedirectable() && exceptions.length === 0 && importants.length === 0) {
const result = engine.match(request);
expect(result.filter).not.to.be.undefined;
if (
Expand Down Expand Up @@ -600,6 +600,96 @@ $csp=baz,domain=bar.com
expect((filter as NetworkFilter).toString()).to.equal('||foo.com$image,redirect=foo.js');
expect(redirect).to.be.undefined;
});

context('removeparam', () => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add some tests for exceptions

function urlToDocumentRequest(url: string) {
return Request.fromRawDetails({
sourceUrl: 'https://foo.com',
url,
type: 'document',
});
}

const requests = [
'https://foo.com?utm',
'https://foo.com?utm=',
'https://foo.com?utm=a',
'https://foo.com?utm=a&utm_source=organic',
'https://foo.com?utm_source=organic&utm=a',
].map(urlToDocumentRequest);

describe('removes all parameters', () => {
const engine = createEngine('||foo.com$removeparam');
seia-soto marked this conversation as resolved.
Show resolved Hide resolved
for (const { match, redirect, request } of requests.map((request) => ({
...engine.match(request),
request,
}))) {
it(`removes all params from "${request.url}"`, () => {
expect(match).to.be.true;
expect(redirect).not.to.be.undefined;
expect(redirect!.body).to.be.eql('');
expect(redirect!.contentType).to.be.eql('text/plain');
expect(redirect!.dataUrl).to.be.eql('https://foo.com');
});
}
});

describe('removes specific parameter', () => {
const engine = createEngine('||foo.com$removeparam=utm');
for (const { match, redirect, request } of requests.map((request) => ({
...engine.match(request),
request,
}))) {
it(`removes "utm" from "${request.url}"`, () => {
expect(match).to.be.true;
expect(redirect).not.to.be.undefined;
expect(redirect!.body).to.be.eql('');
expect(redirect!.contentType).to.be.eql('text/plain');
expect(redirect!.dataUrl).not.to.include('?utm=');
expect(redirect!.dataUrl).not.to.include('&utm=');
});
}
});

describe('removes specific parameter regardless of ordering', () => {
const engine = createEngine('||foo.com$removeparam=utm');
for (const { match, redirect, request } of [
// First
'https://foo.com?utm=a&utm_source=organic&utm_event=b',
// Middle
'https://foo.com?utm_source=organic&utm=a&utm_event=b',
// Last
'https://foo.com?utm_source=organic&utm_event=b&utm=a',
]
.map(urlToDocumentRequest)
.map((request) => ({
...engine.match(request),
request,
}))) {
it(`removeparam "utm" from "${request.url}"`, () => {
expect(match).to.be.true;
expect(redirect).not.to.be.undefined;
expect(redirect!.dataUrl).to.be.eql('https://foo.com/?utm_source=organic&utm_event=b');
});
}
});

it('removes parameter sequentially', () => {
const params = ['utm', 'utm_source', 'utm_event'];
let request = urlToDocumentRequest('https://foo.com?utm_source=organic&utm_event=b&utm=a');
const engine = createEngine(
params.map((param) => `||foo.com$removeparam=${param}`).join('\n'),
);
for (let i = 0; i < params.length; i++) {
const { redirect } = engine.match(request);
expect(redirect).not.to.be.undefined;
request = urlToDocumentRequest(redirect!.dataUrl);
}
for (const param of params) {
expect(request.url).not.to.include(param);
}
});
});
});

describe('network filters', () => {
Expand Down Expand Up @@ -661,11 +751,11 @@ $csp=baz,domain=bar.com
importants.push(filter);
}

if (parsed.isRedirect()) {
if (parsed.isRedirectable()) {
redirects.push(filter);
}

if (!parsed.isRedirect() && !parsed.isException() && !parsed.isImportant()) {
if (!parsed.isRedirectable() && !parsed.isException() && !parsed.isImportant()) {
normalFilters.push(filter);
}
}
Expand Down
31 changes: 31 additions & 0 deletions packages/adblocker/test/parsing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ function network(filter: string, expected: any) {
denyallow: parsed.denyallow,
domains: parsed.domains,
redirect: parsed.getRedirect(),
removeparam: parsed.removeparam,

// Filter type
isBadFilter: parsed.isBadFilter(),
Expand All @@ -48,6 +49,8 @@ function network(filter: string, expected: any) {
isPlain: parsed.isPlain(),
isRedirect: parsed.isRedirect(),
isRedirectRule: parsed.isRedirectRule(),
isRemoveParam: parsed.isRemoveParam(),
isRedirectable: parsed.isRedirectable(),
isRegex: parsed.isRegex(),
isRightAnchor: parsed.isRightAnchor(),

Expand Down Expand Up @@ -101,6 +104,8 @@ const DEFAULT_NETWORK_FILTER = {
isPlain: false,
isRedirect: false,
isRedirectRule: false,
isRemoveParam: false,
isRedirectable: false,
isRegex: false,
isRightAnchor: false,

Expand Down Expand Up @@ -845,11 +850,13 @@ describe('Network filters', () => {
network('||foo.com$redirect-rule=bar.js', {
isRedirect: true,
isRedirectRule: true,
isRedirectable: true,
redirect: 'bar.js',
});
network('$redirect-rule=bar.js', {
isRedirect: true,
isRedirectRule: true,
isRedirectable: true,
redirect: 'bar.js',
});
});
Expand All @@ -876,6 +883,30 @@ describe('Network filters', () => {
});
});

describe('removeparam', () => {
it('parses ~removeparam', () => {
// ~removeparam is not a valid option
network('||foo.com$~removeparam=utm', null);
network('||fo.com$~removeparam', null);
});

it('parses removeparam', () => {
network('||foo.com$removeparam=utm', {
removeparam: 'utm',
isRemoveParam: true,
isRedirectable: true,
});
});

it('parses removeparam without a value', () => {
seia-soto marked this conversation as resolved.
Show resolved Hide resolved
network('||foo.com$removeparam', {
removeparam: '',
isRemoveParam: true,
isRedirectable: true,
});
});
});

describe('match-case', () => {
it('parses match-case', () => {
network('||foo.com$match-case', {});
Expand Down
Loading