API features test public actor
Under maintenance
Pricing
Pay per usage
Go to Store
API features test public actor
Under maintenance
API features test public actor
0.0 (0)
Pricing
Pay per usage
3
Total users
1
Monthly users
1
Runs succeeded
>99%
Last modified
2 years ago
.actor/Dockerfile
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python:3.11
# Second, copy just requirements.txt into the actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Specify how to launch the source code of your actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor-3", "title": "Scrape single page in Python", "description": "Scrape data from single page with provided URL.", "version": "0.0", "meta": { "templateId": "python-start" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "Scrape data from a web page", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "URL of the page", "type": "string", "description": "The URL of website you want to get the data from.", "editor": "textfield", "prefill": "https://www.apify.com/" } }}
src/__init__.py
1
src/__main__.py
1import asyncio2import logging3
4from apify.log import ActorLogFormatter5
6from .main import main7
8# Set up logging of messages from the Apify SDK9handler = logging.StreamHandler()10handler.setFormatter(ActorLogFormatter())11
12apify_client_logger = logging.getLogger('apify_client')13apify_client_logger.setLevel(logging.INFO)14apify_client_logger.addHandler(handler)15
16apify_logger = logging.getLogger('apify')17apify_logger.setLevel(logging.DEBUG)18apify_logger.addHandler(handler)19
20asyncio.run(main())
src/main.py
1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).2from apify import Actor3
4async def main():5 async with Actor:6 # Structure of input is defined in input_schema.json7 actor_input = await Actor.get_input() or {}8
9 actor_env = Actor.get_env()10 actor_id = actor_env['actor_id']11 actor_build_id = actor_env['actor_build_id']12 actor_run_id = actor_env['actor_run_id']13 actor_task_id = actor_env['actor_task_id']14 user_id = actor_env['user_id']15
16 client = Actor.new_client()17 await client.actor(actor_id).get()18 await client.build(actor_build_id).get()19 await client.run(actor_run_id).get()20 await client.log(actor_run_id).get()21 if actor_task_id:22 await client.task(actor_task_id).get()23 24 await client.user(user_id).get()25
26 rq = await Actor.open_request_queue()27 await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'})28 await rq.fetch_next_request()29 await Actor.set_value('test', {'test': 123})30 await Actor.push_data({'abc': 456})31 dataset = await Actor.open_dataset()32 async for item in dataset.iterate_items():33 pass
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache
.scrapy*.log
requirements.txt
1# Add your dependencies here.2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/3# for how to format them4apify ~= 1.1.15beautifulsoup4 ~= 4.12.06requests ~= 2.31.0