Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚫 Prevent email popups from mailto: and tel: links #70

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion harambe/contrib/playwright/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from playwright_stealth import stealth_async

from harambe.contrib.playwright.impl import PlaywrightPage
from harambe.handlers import UnnecessaryResourceHandler
from harambe.handlers import UnnecessaryResourceHandler, MailtoTelBlockerHandler
from harambe.proxy import proxy_from_url
from harambe.types import SetCookieParam, BrowserType, LocalStorage
from harambe.user_agent import random_user_agent, compute_user_agent, UserAgentFactory
Expand Down Expand Up @@ -112,9 +112,11 @@ async def playwright_harness(

if abort_unnecessary_requests:
await ctx.route("**/*", UnnecessaryResourceHandler().handle)
await ctx.route("**/*", MailtoTelBlockerHandler().handle)

async def page_factory(*_: Any, **__: Any) -> PlaywrightPage:
page = await ctx.new_page()
page.on("dialog", lambda dialog: dialog.dismiss())
if on_new_page:
await on_new_page(page)
if stealth:
Expand Down
11 changes: 10 additions & 1 deletion harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,18 @@ async def run(
)
if setup:
await setup(sdk)

if not harness_options.get("disable_go_to_url", False):
await page.goto(url)
if harness == playwright_harness:
await page.wait_for_load_state("domcontentloaded")
await page.evaluate("""
document.addEventListener('click', (event) => {
const target = event.target.closest('a[href^="mailto:"], a[href^="tel:"]');
if (target) {
event.preventDefault();
}
});
Comment on lines +411 to +419
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this if we have the route handler already?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because the route handler will prevent the request from being sent, but it will not prevent the js code from being executed so the popup window will still appear, using this code, it will prevent the js code itself from opening a new window since its href starts with mailto or tel. @asim-shrestha

""")
elif isinstance(page, SoupPage):
page.url = url
await scraper(sdk, url, context)
Expand Down
10 changes: 10 additions & 0 deletions harambe/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,13 @@ async def handle(self, route: Route) -> None:
return

await route.fallback()


class MailtoTelBlockerHandler:
async def handle(self, route: Route) -> None:
request_url = route.request.url

if request_url.startswith(("mailto:", "tel:")):
await route.abort("blockedbyclient")
else:
await route.continue_()
23 changes: 23 additions & 0 deletions test/mock_html/emails.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Mock Page for Testing Mailto and Tel Blocking</title>
</head>
<body>
<h1>Test Mailto and Tel Links</h1>

<p>
<a href="mailto:[email protected]">Email Us</a>
</p>

<p>
<a href="tel:+1234567890">Call Us</a>
</p>

<p>
<a href="https://example.com">Go to Example</a>
</p>
</body>
</html>
32 changes: 32 additions & 0 deletions test/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,3 +534,35 @@ async def scraper(sdk: SDK, current_url: str, *args, **kwargs):

assert len(observer.data) == 1
assert observer.data[0]["local_storage"] == {}


@pytest.mark.parametrize("harness", [playwright_harness])
async def test_email_popup_prevention(server, observer, harness):
@SDK.scraper("test", "detail", observer=observer)
async def scraper(sdk: SDK, *args, **kwargs) -> None:
page = sdk.page
dialog_opened = False
page.on("dialog", lambda dialog: dialog.dismiss() and set_dialog_flag())

def set_dialog_flag():
nonlocal dialog_opened
dialog_opened = True

mailto_link = await page.query_selector('a[href^="mailto:"]')
await mailto_link.click()
current_url = page.url

assert not current_url.startswith(
"mailto:"
), "Popup prevention failed, mailto link triggered."
assert (
not dialog_opened
), "A dialog (popup) was opened, but it should have been prevented."

await SDK.run(
scraper=scraper,
url=f"{server}/emails",
schema={},
headless=True,
harness=harness,
)
Loading