Skip to content

Commit

Permalink
Python templates: code finetuning, improve docs and comments
Browse files Browse the repository at this point in the history
Closes #235
  • Loading branch information
vdusek committed Nov 24, 2023
1 parent f34cc03 commit 8b1d335
Show file tree
Hide file tree
Showing 21 changed files with 146 additions and 27 deletions.
9 changes: 5 additions & 4 deletions templates/python-beautifulsoup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
# for how to format them
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify ~= 1.3.0
beautifulsoup4 ~= 4.12.2
httpx ~= 0.25.0
httpx ~= 0.25.2
types-beautifulsoup4 ~= 4.12.0.7
Empty file.
9 changes: 9 additions & 0 deletions templates/python-beautifulsoup/src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
"""
This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
settings. The `main()` coroutine is then executed using `asyncio.run()`.
Feel free to modify this file to suit your specific needs.
"""

import asyncio
import logging

from apify.log import ActorLogFormatter

from .main import main

# Configure loggers
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())

Expand All @@ -16,4 +24,5 @@
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)

# Execute the Actor main coroutine
asyncio.run(main())
16 changes: 15 additions & 1 deletion templates/python-beautifulsoup/src/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
Feel free to modify this file to suit your specific needs.
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
https://docs.apify.com/sdk/python
"""

from urllib.parse import urljoin

from bs4 import BeautifulSoup
Expand All @@ -6,7 +15,12 @@
from apify import Actor


async def main():
async def main() -> None:
"""
The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
and it also enhances performance in the field of web scraping significantly.
"""
async with Actor:
# Read the Actor input
actor_input = await Actor.get_input() or {}
Expand Down
3 changes: 3 additions & 0 deletions templates/python-empty/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify ~= 1.3.0
10 changes: 9 additions & 1 deletion templates/python-empty/src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""
This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
settings. The `main()` coroutine is then executed using `asyncio.run()`.
Feel free to modify this file to suit your specific needs.
"""

import asyncio
import logging

from apify.log import ActorLogFormatter

from .main import main

# Set up logging of messages from the Apify SDK
# Configure loggers
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())

Expand All @@ -17,4 +24,5 @@
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)

# Execute the Actor main coroutine
asyncio.run(main())
17 changes: 15 additions & 2 deletions templates/python-empty/src/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python)
"""
This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
Feel free to modify this file to suit your specific needs.
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
https://docs.apify.com/sdk/python
"""

from apify import Actor


async def main():
async def main() -> None:
"""
The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
and it also enhances performance in the field of web scraping significantly.
"""
async with Actor:
Actor.log.info('Hello from the Actor!')
# Write your code here
6 changes: 3 additions & 3 deletions templates/python-playwright/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
# for how to format them
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify ~= 1.3.0
playwright ~= 1.39.0
Empty file.
9 changes: 9 additions & 0 deletions templates/python-playwright/src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
"""
This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
settings. The `main()` coroutine is then executed using `asyncio.run()`.
Feel free to modify this file to suit your specific needs.
"""

import asyncio
import logging

from apify.log import ActorLogFormatter

from .main import main

# Configure loggers
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())

Expand All @@ -16,4 +24,5 @@
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)

# Execute the Actor main coroutine
asyncio.run(main())
16 changes: 15 additions & 1 deletion templates/python-playwright/src/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
Feel free to modify this file to suit your specific needs.
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
https://docs.apify.com/sdk/python
"""

from urllib.parse import urljoin

from playwright.async_api import async_playwright
Expand All @@ -9,7 +18,12 @@
# When running on the Apify platform, they are already included in the Actor's Docker image.


async def main():
async def main() -> None:
"""
The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
and it also enhances performance in the field of web scraping significantly.
"""
async with Actor:
# Read the Actor input
actor_input = await Actor.get_input() or {}
Expand Down
4 changes: 2 additions & 2 deletions templates/python-scrapy/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# You can add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/ for how to format them.
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify[scrapy] ~= 1.3.0
nest-asyncio ~= 1.5.8
Expand Down
Empty file.
6 changes: 3 additions & 3 deletions templates/python-selenium/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
# for how to format them
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify ~= 1.3.0
selenium ~= 4.14.0
Empty file.
9 changes: 9 additions & 0 deletions templates/python-selenium/src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
"""
This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
settings. The `main()` coroutine is then executed using `asyncio.run()`.
Feel free to modify this file to suit your specific needs.
"""

import asyncio
import logging

from apify.log import ActorLogFormatter

from .main import main

# Configure loggers
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())

Expand All @@ -16,4 +24,5 @@
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)

# Execute the Actor main coroutine
asyncio.run(main())
16 changes: 15 additions & 1 deletion templates/python-selenium/src/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
Feel free to modify this file to suit your specific needs.
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
https://docs.apify.com/sdk/python
"""

from urllib.parse import urljoin

from selenium import webdriver
Expand All @@ -11,7 +20,12 @@
# When running on the Apify platform, it is already included in the Actor's Docker image.


async def main():
async def main() -> None:
"""
The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
and it also enhances performance in the field of web scraping significantly.
"""
async with Actor:
# Read the Actor input
actor_input = await Actor.get_input() or {}
Expand Down
9 changes: 5 additions & 4 deletions templates/python-start/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
# for how to format them
# Define your Python dependencies below. For formatting guidelines, refer to:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify ~= 1.3.0
beautifulsoup4 ~= 4.12.2
httpx ~= 0.25.0
httpx ~= 0.25.2
types-beautifulsoup4 ~= 4.12.0.7
Empty file.
10 changes: 9 additions & 1 deletion templates/python-start/src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""
This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
settings. The `main()` coroutine is then executed using `asyncio.run()`.
Feel free to modify this file to suit your specific needs.
"""

import asyncio
import logging

from apify.log import ActorLogFormatter

from .main import main

# Set up logging of messages from the Apify SDK
# Configure loggers
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())

Expand All @@ -17,4 +24,5 @@
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)

# Execute the Actor main coroutine
asyncio.run(main())
24 changes: 20 additions & 4 deletions templates/python-start/src/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
# Beautiful Soup - library for pulling data out of HTML and XML files (Read more at https://www.crummy.com/software/BeautifulSoup/bs4/doc)
"""
This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
Feel free to modify this file to suit your specific needs.
To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
https://docs.apify.com/sdk/python
"""

# Beautiful Soup - library for pulling data out of HTML and XML files, read more at
# https://www.crummy.com/software/BeautifulSoup/bs4/doc
from bs4 import BeautifulSoup
# HTTPX - library for making asynchronous HTTP requests in Python (Read more at https://www.python-httpx.org/)

# HTTPX - library for making asynchronous HTTP requests in Python, read more at https://www.python-httpx.org/
from httpx import AsyncClient

# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python)
# Apify SDK - toolkit for building Apify Actors, read more at https://docs.apify.com/sdk/python
from apify import Actor


async def main():
async def main() -> None:
"""
The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
and it also enhances performance in the field of web scraping significantly.
"""
async with Actor:
# Structure of input is defined in input_schema.json
actor_input = await Actor.get_input() or {}
Expand Down

0 comments on commit 8b1d335

Please sign in to comment.