Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update kwargs to Services #137

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 16 additions & 29 deletions scrapy_selenium/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
class SeleniumMiddleware:
"""Scrapy middleware handling the requests using selenium"""

def __init__(self, driver_name, driver_executable_path,
browser_executable_path, command_executor, driver_arguments):
def __init__(self, driver_name, driver_executable_path, driver_arguments,
browser_executable_path):
"""Initialize the selenium webdriver

Parameters
Expand All @@ -27,8 +27,6 @@ def __init__(self, driver_name, driver_executable_path,
A list of arguments to initialize the driver
browser_executable_path: str
The path of the executable binary of the browser
command_executor: str
Selenium remote server endpoint
"""

webdriver_base_path = f'selenium.webdriver.{driver_name}'
Expand All @@ -40,30 +38,23 @@ def __init__(self, driver_name, driver_executable_path,
driver_options_klass = getattr(driver_options_module, 'Options')

driver_options = driver_options_klass()

if browser_executable_path:
driver_options.binary_location = browser_executable_path
for argument in driver_arguments:
driver_options.add_argument(argument)

driver_kwargs = {
'executable_path': driver_executable_path,
f'{driver_name}_options': driver_options
}

# locally installed driver
if driver_executable_path is not None:
driver_kwargs = {
service_module = import_module(f'{webdriver_base_path}.service')
service_klass = getattr(service_module, 'Service')
service_kwargs = {
'executable_path': driver_executable_path,
f'{driver_name}_options': driver_options
}
service = service_klass(**service_kwargs)
driver_kwargs = {
'service': service,
'options': driver_options
}
self.driver = driver_klass(**driver_kwargs)
# remote driver
elif command_executor is not None:
from selenium import webdriver
capabilities = driver_options.to_capabilities()
self.driver = webdriver.Remote(command_executor=command_executor,
desired_capabilities=capabilities)

@classmethod
def from_crawler(cls, crawler):
Expand All @@ -72,22 +63,18 @@ def from_crawler(cls, crawler):
driver_name = crawler.settings.get('SELENIUM_DRIVER_NAME')
driver_executable_path = crawler.settings.get('SELENIUM_DRIVER_EXECUTABLE_PATH')
browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH')
command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR')
driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS')

if driver_name is None:
raise NotConfigured('SELENIUM_DRIVER_NAME must be set')

if driver_executable_path is None and command_executor is None:
raise NotConfigured('Either SELENIUM_DRIVER_EXECUTABLE_PATH '
'or SELENIUM_COMMAND_EXECUTOR must be set')
if not driver_name or not driver_executable_path:
raise NotConfigured(
'SELENIUM_DRIVER_NAME and SELENIUM_DRIVER_EXECUTABLE_PATH must be set'
)

middleware = cls(
driver_name=driver_name,
driver_executable_path=driver_executable_path,
browser_executable_path=browser_executable_path,
command_executor=command_executor,
driver_arguments=driver_arguments
driver_arguments=driver_arguments,
browser_executable_path=browser_executable_path
)

crawler.signals.connect(middleware.spider_closed, signals.spider_closed)
Expand Down