# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.11

# Second, copy just requirements.txt into the actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Specify how to launch the source code of your actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor-3",
    "title": "Scrape single page in Python",
    "description": "Scrape data from single page with provided URL.",
    "version": "0.0",
    "meta": {
        "templateId": "python-start"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "Scrape data from a web page",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "url": {
            "title": "URL of the page",
            "type": "string",
            "description": "The URL of website you want to get the data from.",
            "editor": "textfield",
            "prefill": "https://www.apify.com/"
        }
    }
}

src/init.py

src/main.py

1import asyncio
2import logging
3
4from apify.log import ActorLogFormatter
5
6from .main import main
7
8# Set up logging of messages from the Apify SDK
9handler = logging.StreamHandler()
10handler.setFormatter(ActorLogFormatter())
11
12apify_client_logger = logging.getLogger('apify_client')
13apify_client_logger.setLevel(logging.INFO)
14apify_client_logger.addHandler(handler)
15
16apify_logger = logging.getLogger('apify')
17apify_logger.setLevel(logging.DEBUG)
18apify_logger.addHandler(handler)
19
20asyncio.run(main())

src/main.py

1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).
2from apify import Actor
3
4async def main():
5    async with Actor:
6        # Structure of input is defined in input_schema.json
7        actor_input = await Actor.get_input() or {}
8
9        actor_env = Actor.get_env()
10        actor_id = actor_env['actor_id']
11        actor_build_id = actor_env['actor_build_id']
12        actor_run_id = actor_env['actor_run_id']
13        actor_task_id = actor_env['actor_task_id']
14        user_id = actor_env['user_id']
15
16        client = Actor.new_client()
17        await client.actor(actor_id).get()
18        await client.build(actor_build_id).get()
19        await client.run(actor_run_id).get()
20        await client.log(actor_run_id).get()
21        if actor_task_id:
22            await client.task(actor_task_id).get()
23        
24        await client.user(user_id).get()
25
26        rq = await Actor.open_request_queue()
27        await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'})
28        await rq.fetch_next_request()
29        await Actor.set_value('test', {'test': 123})
30        await Actor.push_data({'abc': 456})
31        dataset = await Actor.open_dataset()
32        async for item in dataset.iterate_items():
33            pass

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache

.scrapy
*.log

requirements.txt

1# Add your dependencies here.
2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
3# for how to format them
4apify ~= 1.1.1
5beautifulsoup4 ~= 4.12.0
6requests ~= 2.31.0

Test public

honzakirchner43/my-actor-12

Jan Kirchner

Public Test

jancurn/public-test

Jan Curn

My public actor

effervescent_iguana/my-public-actor

testing

My public Actor

dogmatic_olive/my-public-actor

testing actor

Jan Novotny

Test Public With Env Vars

someuser/test-public-with-env-vars

fwef wefwef

Simple sample public actor

fuchsia_porcupine/simple-sample-public-actor

Kateřina Hroníková

Test

jnnv/yr-no

Jan Novotný

Test Actor 1

jk1/test-actor-1

Honza

My second public actor

dogmatic_olive/my-second-public-actor

this is second testing actor

Jan Novotny

Test worker 1

mhamas/test-worker

test 23

Matej Hamas 1

# First, specify the base Docker image. # You can see the Docker images from Apify at https://hub.docker.com/r/apify/. # You can also use any other image from Docker Hub. FROM apify/actor-python:3.11 # Second, copy just requirements.txt into the actor image, # since it should be the only file that affects the dependency install in the next step, # in order to speed up the build COPY requirements.txt ./ # Install the packages specified in requirements.txt, # Print the installed Python version, pip version # and all installed packages with their versions for debugging RUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze # Next, copy the remaining files and directories with the source code. # Since we do this after installing the dependencies, quick build will be really fast # for most source file changes. COPY . ./ # Specify how to launch the source code of your actor. # By default, the "python3 -m src" command is run CMD ["python3", "-m", "src"]

{ "actorSpecification": 1, "name": "my-actor-3", "title": "Scrape single page in Python", "description": "Scrape data from single page with provided URL.", "version": "0.0", "meta": { "templateId": "python-start" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile" }

{ "title": "Scrape data from a web page", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "URL of the page", "type": "string", "description": "The URL of website you want to get the data from.", "editor": "textfield", "prefill": "https://www.apify.com/" } } }

1import asyncio 2import logging 3 4from apify.log import ActorLogFormatter 5 6from .main import main 7 8# Set up logging of messages from the Apify SDK 9handler = logging.StreamHandler() 10handler.setFormatter(ActorLogFormatter()) 11 12apify_client_logger = logging.getLogger('apify_client') 13apify_client_logger.setLevel(logging.INFO) 14apify_client_logger.addHandler(handler) 15 16apify_logger = logging.getLogger('apify') 17apify_logger.setLevel(logging.DEBUG) 18apify_logger.addHandler(handler) 19 20asyncio.run(main())

1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python). 2from apify import Actor 3 4async def main(): 5 async with Actor: 6 # Structure of input is defined in input_schema.json 7 actor_input = await Actor.get_input() or {} 8 9 actor_env = Actor.get_env() 10 actor_id = actor_env['actor_id'] 11 actor_build_id = actor_env['actor_build_id'] 12 actor_run_id = actor_env['actor_run_id'] 13 actor_task_id = actor_env['actor_task_id'] 14 user_id = actor_env['user_id'] 15 16 client = Actor.new_client() 17 await client.actor(actor_id).get() 18 await client.build(actor_build_id).get() 19 await client.run(actor_run_id).get() 20 await client.log(actor_run_id).get() 21 if actor_task_id: 22 await client.task(actor_task_id).get() 23 24 await client.user(user_id).get() 25 26 rq = await Actor.open_request_queue() 27 await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'}) 28 await rq.fetch_next_request() 29 await Actor.set_value('test', {'test': 123}) 30 await Actor.push_data({'abc': 456}) 31 dataset = await Actor.open_dataset() 32 async for item in dataset.iterate_items(): 33 pass

# This file tells Git which files shouldn't be added to source control .idea .DS_Store apify_storage storage/* !storage/key_value_stores storage/key_value_stores/* !storage/key_value_stores/default storage/key_value_stores/default/* !storage/key_value_stores/default/INPUT.json .venv/ .env/ __pypackages__ dist/ build/ *.egg-info/ *.egg __pycache__ .mypy_cache .dmypy.json dmypy.json .pytest_cache .scrapy *.log

API features test public actor

API features test public actor

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

You might also like

Test public

Public Test

My public actor

My public Actor

Test Public With Env Vars

Simple sample public actor

Test

Test Actor 1

My second public actor

Test worker 1

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/__init__.py

src/__main__.py

src/main.py

.dockerignore

.editorconfig

.gitignore

requirements.txt

src/init.py

src/main.py

src/init.py

src/main.py