API features test public actor avatar
API features test public actor

Under maintenance

Pricing

Pay per usage

Go to Store
API features test public actor

API features test public actor

Under maintenance

Developed by

Jiri Moravcik

Jiri Moravcik

Maintained by Community

API features test public actor

0.0 (0)

Pricing

Pay per usage

3

Total users

1

Monthly users

1

Runs succeeded

>99%

Last modified

2 years ago

.actor/Dockerfile

# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python:3.11
# Second, copy just requirements.txt into the actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./
# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
&& python --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing dependencies:" \
&& pip install -r requirements.txt \
&& echo "All installed Python packages:" \
&& pip freeze
# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./
# Specify how to launch the source code of your actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
"actorSpecification": 1,
"name": "my-actor-3",
"title": "Scrape single page in Python",
"description": "Scrape data from single page with provided URL.",
"version": "0.0",
"meta": {
"templateId": "python-start"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "Scrape data from a web page",
"type": "object",
"schemaVersion": 1,
"properties": {
"url": {
"title": "URL of the page",
"type": "string",
"description": "The URL of website you want to get the data from.",
"editor": "textfield",
"prefill": "https://www.apify.com/"
}
}
}

src/__init__.py

1

src/__main__.py

1import asyncio
2import logging
3
4from apify.log import ActorLogFormatter
5
6from .main import main
7
8# Set up logging of messages from the Apify SDK
9handler = logging.StreamHandler()
10handler.setFormatter(ActorLogFormatter())
11
12apify_client_logger = logging.getLogger('apify_client')
13apify_client_logger.setLevel(logging.INFO)
14apify_client_logger.addHandler(handler)
15
16apify_logger = logging.getLogger('apify')
17apify_logger.setLevel(logging.DEBUG)
18apify_logger.addHandler(handler)
19
20asyncio.run(main())

src/main.py

1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).
2from apify import Actor
3
4async def main():
5 async with Actor:
6 # Structure of input is defined in input_schema.json
7 actor_input = await Actor.get_input() or {}
8
9 actor_env = Actor.get_env()
10 actor_id = actor_env['actor_id']
11 actor_build_id = actor_env['actor_build_id']
12 actor_run_id = actor_env['actor_run_id']
13 actor_task_id = actor_env['actor_task_id']
14 user_id = actor_env['user_id']
15
16 client = Actor.new_client()
17 await client.actor(actor_id).get()
18 await client.build(actor_build_id).get()
19 await client.run(actor_run_id).get()
20 await client.log(actor_run_id).get()
21 if actor_task_id:
22 await client.task(actor_task_id).get()
23
24 await client.user(user_id).get()
25
26 rq = await Actor.open_request_queue()
27 await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'})
28 await rq.fetch_next_request()
29 await Actor.set_value('test', {'test': 123})
30 await Actor.push_data({'abc': 456})
31 dataset = await Actor.open_dataset()
32 async for item in dataset.iterate_items():
33 pass

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
.venv
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
.DS_Store
apify_storage
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json
.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg
__pycache__
.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.scrapy
*.log

requirements.txt

1# Add your dependencies here.
2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
3# for how to format them
4apify ~= 1.1.1
5beautifulsoup4 ~= 4.12.0
6requests ~= 2.31.0