API features test public actor
Pricing
Pay per usage
Go to Apify Store
API features test public actor
Under maintenanceAPI features test public actor
0.0 (0)
Pricing
Pay per usage
3
1
1
Last modified
2 years ago
Pricing
Pay per usage
API features test public actor
0.0 (0)
Pricing
Pay per usage
3
1
1
Last modified
2 years ago
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python:3.11
# Second, copy just requirements.txt into the actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Specify how to launch the source code of your actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]{    "actorSpecification": 1,    "name": "my-actor-3",    "title": "Scrape single page in Python",    "description": "Scrape data from single page with provided URL.",    "version": "0.0",    "meta": {        "templateId": "python-start"    },    "input": "./input_schema.json",    "dockerfile": "./Dockerfile"}{    "title": "Scrape data from a web page",    "type": "object",    "schemaVersion": 1,    "properties": {        "url": {            "title": "URL of the page",            "type": "string",            "description": "The URL of website you want to get the data from.",            "editor": "textfield",            "prefill": "https://www.apify.com/"        }    }}1
1import asyncio2import logging3
4from apify.log import ActorLogFormatter5
6from .main import main7
8# Set up logging of messages from the Apify SDK9handler = logging.StreamHandler()10handler.setFormatter(ActorLogFormatter())11
12apify_client_logger = logging.getLogger('apify_client')13apify_client_logger.setLevel(logging.INFO)14apify_client_logger.addHandler(handler)15
16apify_logger = logging.getLogger('apify')17apify_logger.setLevel(logging.DEBUG)18apify_logger.addHandler(handler)19
20asyncio.run(main())1# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).2from apify import Actor3
4async def main():5    async with Actor:6        # Structure of input is defined in input_schema.json7        actor_input = await Actor.get_input() or {}8
9        actor_env = Actor.get_env()10        actor_id = actor_env['actor_id']11        actor_build_id = actor_env['actor_build_id']12        actor_run_id = actor_env['actor_run_id']13        actor_task_id = actor_env['actor_task_id']14        user_id = actor_env['user_id']15
16        client = Actor.new_client()17        await client.actor(actor_id).get()18        await client.build(actor_build_id).get()19        await client.run(actor_run_id).get()20        await client.log(actor_run_id).get()21        if actor_task_id:22            await client.task(actor_task_id).get()23        24        await client.user(user_id).get()25
26        rq = await Actor.open_request_queue()27        await rq.add_request({'uniqueKey': 'https://apify.com', 'url': 'https://apify.com'})28        await rq.fetch_next_request()29        await Actor.set_value('test', {'test': 123})30        await Actor.push_data({'abc': 456})31        dataset = await Actor.open_dataset()32        async for item in dataset.iterate_items():33            pass# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.gitroot = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache
.scrapy*.log1# Add your dependencies here.2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/3# for how to format them4apify ~= 1.1.15beautifulsoup4 ~= 4.12.06requests ~= 2.31.0