
Test OG image
Pricing
Pay per usage
Go to Store

Test OG image
0.0 (0)
Pricing
Pay per usage
1
Total users
1
Monthly users
1
Runs succeeded
>99%
Last modified
3 years ago
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify"}
.gitignore
# This file tells Git which files shouldn't be added to source control
.ideanode_modules
Dockerfile
# First, specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-puppeteer-chrome:16
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the Node.js source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:## CMD npm start
INPUT_SCHEMA.json
{ "title": "PuppeteerCrawler Template", "description": "lorem ipsum", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "URLs to start with.", "editor": "requestListSources", "prefill": [ { "url": "https://apify.com" } ] } }, "required": [ "startUrls" ]}
apify.json
{ "env": { "npm_config_loglevel": "silent" }}
main.js
1/**2 * This template is a production ready boilerplate for developing with `PuppeteerCrawler`.3 * Use this to bootstrap your projects using the most up-to-date code.4 * If you're looking for examples or want to learn more, see README.5 */6
7const Apify = require('apify');8const { handleStart, handleList, handleDetail } = require('./src/routes');9
10const { utils: { log } } = Apify;11
12Apify.main(async () => {13 const { startUrls } = await Apify.getInput();14
15 const requestList = await Apify.openRequestList('start-urls', startUrls);16 const requestQueue = await Apify.openRequestQueue();17 const proxyConfiguration = await Apify.createProxyConfiguration();18
19 const crawler = new Apify.PuppeteerCrawler({20 requestList,21 requestQueue,22 proxyConfiguration,23 launchContext: {24 // Chrome with stealth should work for most websites.25 // If it doesn't, feel free to remove this.26 useChrome: true,27 stealth: true,28 },29 handlePageFunction: async (context) => {30 const { url, userData: { label } } = context.request;31 log.info('Page opened.', { label, url });32 switch (label) {33 case 'LIST':34 return handleList(context);35 case 'DETAIL':36 return handleDetail(context);37 default:38 return handleStart(context);39 }40 },41 });42
43 log.info('Starting the crawl.');44 await crawler.run();45 log.info('Crawl finished.');46});
package.json
{ "name": "project-puppeteer-crawler", "version": "0.0.1", "description": "This is a boilerplate of an Apify actor.", "dependencies": { "apify": "^2.0.7", "puppeteer": "*" }, "devDependencies": { "@apify/eslint-config": "^0.1.3", "eslint": "^7.0.0" }, "scripts": { "start": "node main.js", "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx", "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}
src/routes.js
1const Apify = require('apify');2
3const { utils: { log } } = Apify;4
5exports.handleStart = async ({ request, page }) => {6 // Handle Start URLs7};8
9exports.handleList = async ({ request, page }) => {10 // Handle pagination11};12
13exports.handleDetail = async ({ request, page }) => {14 // Handle details15};