-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from dreadnode/loaders/selenium-website-loader
feat: selenium website loader
- Loading branch information
Showing
5 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dyana.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
FROM python:3.10-alpine | ||
|
||
# install chromedriver | ||
RUN apk update | ||
RUN apk add chromium chromium-chromedriver | ||
|
||
ENV PYTHONUNBUFFERED 1 | ||
RUN pip install --upgrade pip | ||
RUN pip install selenium | ||
|
||
WORKDIR /app | ||
COPY dyana.py . | ||
COPY main.py . | ||
|
||
ENV DISPLAY=:99 | ||
|
||
ENTRYPOINT ["python3", "-W", "ignore", "main.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
all: | ||
docker build -t dyana-website-loader . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import argparse | ||
import json | ||
import shutil | ||
import time | ||
import typing | ||
from typing import Any | ||
|
||
from selenium import webdriver | ||
from selenium.common.exceptions import TimeoutException | ||
from selenium.webdriver.chrome.service import Service | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
|
||
from dyana import Profiler # type: ignore[attr-defined] | ||
|
||
|
||
def collect_performance_metrics(driver: webdriver.Chrome) -> dict[str, Any]: | ||
"""Collect detailed performance metrics using Chrome DevTools Protocol.""" | ||
metrics = {} | ||
|
||
# Navigation Timing API metrics | ||
navigation_timing = driver.execute_script(""" | ||
const performance = window.performance; | ||
const timing = performance.timing; | ||
return { | ||
'navigationStart': timing.navigationStart, | ||
'responseEnd': timing.responseEnd, | ||
'domComplete': timing.domComplete, | ||
'loadEventEnd': timing.loadEventEnd, | ||
'pageLoadTime': timing.loadEventEnd - timing.navigationStart, | ||
'dnsLookupTime': timing.domainLookupEnd - timing.domainLookupStart, | ||
'tcpConnectTime': timing.connectEnd - timing.connectStart, | ||
'serverResponseTime': timing.responseEnd - timing.requestStart, | ||
'domProcessingTime': timing.domComplete - timing.domLoading | ||
}; | ||
""") | ||
metrics["timing"] = navigation_timing | ||
|
||
# Memory info | ||
memory_info = driver.execute_script(""" | ||
return { | ||
'jsHeapSizeLimit': window.performance.memory.jsHeapSizeLimit, | ||
'totalJSHeapSize': window.performance.memory.totalJSHeapSize, | ||
'usedJSHeapSize': window.performance.memory.usedJSHeapSize | ||
}; | ||
""") | ||
metrics["memory"] = memory_info | ||
|
||
# Resource timing data | ||
resource_timing = driver.execute_script(""" | ||
return performance.getEntriesByType('resource').map(entry => ({ | ||
name: entry.name, | ||
entryType: entry.entryType, | ||
startTime: entry.startTime, | ||
duration: entry.duration, | ||
initiatorType: entry.initiatorType | ||
})); | ||
""") | ||
metrics["resources"] = resource_timing | ||
|
||
return metrics | ||
|
||
|
||
def analyze_page_content(driver: webdriver.Chrome) -> dict[str, Any]: | ||
"""Analyze page content and structure.""" | ||
return typing.cast( | ||
dict[str, Any], | ||
driver.execute_script(""" | ||
return { | ||
'elements': document.getElementsByTagName('*').length, | ||
'images': document.getElementsByTagName('img').length, | ||
'links': document.getElementsByTagName('a').length, | ||
'scripts': document.getElementsByTagName('script').length, | ||
'styles': document.getElementsByTagName('link').length, | ||
'iframes': document.getElementsByTagName('iframe').length, | ||
'documentSize': document.documentElement.innerHTML.length, | ||
}; | ||
"""), | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Profile website performance") | ||
parser.add_argument("--url", help="URL to open", required=True) | ||
parser.add_argument("--wait-for", help="CSS selector to wait for", default=None) | ||
parser.add_argument("--timeout", help="Timeout in seconds", type=int, default=30) | ||
args = parser.parse_args() | ||
|
||
# Normalize URL by adding https:// if protocol is missing | ||
if "://" not in args.url: | ||
args.url = f"https://{args.url}" | ||
|
||
profiler: Profiler = Profiler() | ||
|
||
try: | ||
chrome_options = webdriver.ChromeOptions() | ||
chrome_options.add_argument("--no-sandbox") | ||
chrome_options.add_argument("--headless=new") | ||
chrome_options.add_argument("--disable-dev-shm-usage") | ||
chrome_options.add_argument("--window-size=1920,1080") | ||
# Enable performance logging | ||
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}) | ||
|
||
driver = webdriver.Chrome(options=chrome_options, service=Service(shutil.which("chromedriver"))) | ||
driver.implicitly_wait(10) | ||
|
||
profiler.track_memory("before_load") | ||
|
||
start_time = time.time() | ||
driver.get(args.url) | ||
|
||
# Wait for specific element if requested | ||
if args.wait_for: | ||
try: | ||
WebDriverWait(driver, args.timeout).until( | ||
EC.presence_of_element_located((By.CSS_SELECTOR, args.wait_for)) | ||
) | ||
except TimeoutException: | ||
profiler.track_error("wait", f"Timeout waiting for element: {args.wait_for}") | ||
|
||
load_time = time.time() - start_time | ||
profiler.track_memory("after_load") | ||
|
||
# Collect performance metrics | ||
try: | ||
metrics = collect_performance_metrics(driver) | ||
content_analysis = analyze_page_content(driver) | ||
|
||
# Get console logs | ||
console_logs = driver.get_log("browser") | ||
|
||
# Get network logs | ||
network_logs = driver.get_log("performance") | ||
|
||
# Add all metrics to profiler | ||
profiler.extra = { | ||
"load_time": load_time, | ||
"performance_metrics": metrics, | ||
"content_analysis": content_analysis, | ||
"console_logs": console_logs, | ||
"network_logs": network_logs, | ||
"title": driver.title, | ||
"url": driver.current_url, | ||
"status_code": driver.execute_script("return window.performance.getEntries()[0].responseStatus"), | ||
} | ||
except Exception as e: | ||
profiler.track_error("metrics", str(e)) | ||
|
||
# Take screenshot | ||
try: | ||
screenshot = driver.get_screenshot_as_base64() | ||
profiler.extra["screenshot"] = screenshot | ||
except Exception as e: | ||
profiler.track_error("screenshot", str(e)) | ||
|
||
profiler.track_memory("after_profiling") | ||
|
||
except Exception as e: | ||
profiler.track_error("chrome", str(e)) | ||
finally: | ||
try: | ||
driver.quit() | ||
profiler.track_memory("after_quit") | ||
except Exception: | ||
pass | ||
|
||
print(json.dumps(profiler.as_dict())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
description: Opens a website in a headless browser and profiles its performance. | ||
network: true | ||
|
||
args: | ||
- name: url | ||
description: URL to open. | ||
required: true | ||
- name: wait-for | ||
description: CSS selector to wait for before profiling. | ||
required: false | ||
- name: timeout | ||
description: Timeout in seconds for page load and element wait. | ||
required: false | ||
type: int | ||
default: "60" |