API features test public actor
Pricing
Pay per usage
Go to Apify Store
API features test public actor
Under maintenanceAPI features test public actor
0.0 (0)
Pricing
Pay per usage
3
1
1
Last modified
2 years ago
Pricing
Pay per usage
API features test public actor
0.0 (0)
Pricing
Pay per usage
3
1
1
Last modified
2 years ago
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python:3.11
# Second, copy just requirements.txt into the actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Specify how to launch the source code of your actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
{ "actorSpecification": 1, "name": "my-actor-3", "title": "Scrape single page in Python", "description": "Scrape data from single page with provided URL.", "version": "0.0", "meta": { "templateId": "python-start" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
{ "title": "Scrape data from a web page", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "URL of the page", "type": "string", "description": "The URL of website you want to get the data from.", "editor": "textfield", "prefill": "https://www.apify.com/" } }}
1
1import asyncio2import logging3
4from apify.log import ActorLogFormatter5
6from .main import main7
8# Set up logging of messages from the Apify SDK9handler = logging.StreamHandler()10handler.setFormatter(ActorLogFormatter())11
12apify_client_logger = logging.getLogger('apify_client')13apify_client_logger.setLevel(logging.INFO)14apify_client_logger.addHandler(handler)15
16apify_logger = logging.getLogger('apify')17apify_logger.setLevel(logging.DEBUG)18apify_logger.addHandler(handler)19
20asyncio.run(main())
1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).2from apify import Actor3
4async def main():5 async with Actor:6 # Structure of input is defined in input_schema.json7 actor_input = await Actor.get_input() or {}8
9 actor_env = Actor.get_env()10 actor_id = actor_env['actor_id']11 actor_build_id = actor_env['actor_build_id']12 actor_run_id = actor_env['actor_run_id']13 actor_task_id = actor_env['actor_task_id']14 user_id = actor_env['user_id']15
16 client = Actor.new_client()17 await client.actor(actor_id).get()18 await client.build(actor_build_id).get()19 await client.run(actor_run_id).get()20 await client.log(actor_run_id).get()21 if actor_task_id:22 await client.task(actor_task_id).get()23 24 await client.user(user_id).get()25
26 rq = await Actor.open_request_queue()27 await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'})28 await rq.fetch_next_request()29 await Actor.set_value('test', {'test': 123})30 await Actor.push_data({'abc': 456})31 dataset = await Actor.open_dataset()32 async for item in dataset.iterate_items():33 pass
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache
.scrapy*.log
1# Add your dependencies here.2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/3# for how to format them4apify ~= 1.1.15beautifulsoup4 ~= 4.12.06requests ~= 2.31.0