Skip to content

Commit

Permalink
Feature: minimal addition to run remote webdriver (#55)
Browse files Browse the repository at this point in the history
* Add remote Selenium option

* Add info about remote driver to readme

* Import webdriver properly
  • Loading branch information
tiefling-cat authored Feb 28, 2020
1 parent 3adfa0f commit 2e557f6
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ Optionally, set the path to the browser executable:
SELENIUM_BROWSER_EXECUTABLE_PATH = which('firefox')
```

In order to use a remote Selenium driver, specify `SELENIUM_COMMAND_EXECUTOR` instead of `SELENIUM_DRIVER_EXECUTABLE_PATH`:
```python
SELENIUM_COMMAND_EXECUTOR = 'http://localhost:4444/wd/hub'
```

2. Add the `SeleniumMiddleware` to the downloader middlewares:
```python
DOWNLOADER_MIDDLEWARES = {
Expand Down
37 changes: 28 additions & 9 deletions scrapy_selenium/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
class SeleniumMiddleware:
"""Scrapy middleware handling the requests using selenium"""

def __init__(self, driver_name, driver_executable_path, driver_arguments,
browser_executable_path):
def __init__(self, driver_name, driver_executable_path,
browser_executable_path, command_executor, driver_arguments):
"""Initialize the selenium webdriver
Parameters
Expand All @@ -27,6 +27,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
A list of arguments to initialize the driver
browser_executable_path: str
The path of the executable binary of the browser
command_executor: str
Selenium remote server endpoint
"""

webdriver_base_path = f'selenium.webdriver.{driver_name}'
Expand All @@ -38,6 +40,7 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
driver_options_klass = getattr(driver_options_module, 'Options')

driver_options = driver_options_klass()

if browser_executable_path:
driver_options.binary_location = browser_executable_path
for argument in driver_arguments:
Expand All @@ -48,7 +51,19 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
f'{driver_name}_options': driver_options
}

self.driver = driver_klass(**driver_kwargs)
# locally installed driver
if driver_executable_path is not None:
driver_kwargs = {
'executable_path': driver_executable_path,
f'{driver_name}_options': driver_options
}
self.driver = driver_klass(**driver_kwargs)
# remote driver
elif command_executor is not None:
from selenium import webdriver
capabilities = driver_options.to_capabilities()
self.driver = webdriver.Remote(command_executor=command_executor,
desired_capabilities=capabilities)

@classmethod
def from_crawler(cls, crawler):
Expand All @@ -57,18 +72,22 @@ def from_crawler(cls, crawler):
driver_name = crawler.settings.get('SELENIUM_DRIVER_NAME')
driver_executable_path = crawler.settings.get('SELENIUM_DRIVER_EXECUTABLE_PATH')
browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH')
command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR')
driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS')

if not driver_name or not driver_executable_path:
raise NotConfigured(
'SELENIUM_DRIVER_NAME and SELENIUM_DRIVER_EXECUTABLE_PATH must be set'
)
if driver_name is None:
raise NotConfigured('SELENIUM_DRIVER_NAME must be set')

if driver_executable_path is None and command_executor is None:
raise NotConfigured('Either SELENIUM_DRIVER_EXECUTABLE_PATH '
'or SELENIUM_COMMAND_EXECUTOR must be set')

middleware = cls(
driver_name=driver_name,
driver_executable_path=driver_executable_path,
driver_arguments=driver_arguments,
browser_executable_path=browser_executable_path
browser_executable_path=browser_executable_path,
command_executor=command_executor,
driver_arguments=driver_arguments
)

crawler.signals.connect(middleware.spider_closed, signals.spider_closed)
Expand Down

1 comment on commit 2e557f6

@ezedonovan
Copy link

@ezedonovan ezedonovan commented on 2e557f6 Jan 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey guys, FYI, this is not merged when pip installing thus the README information is misleading

Please sign in to comment.