Test OG image avatar
Test OG image

Pricing

Pay per usage

Go to Store
Test OG image

Test OG image

Developed by

brownies

brownies

Maintained by Community

0.0 (0)

Pricing

Pay per usage

1

Total users

1

Monthly users

1

Runs succeeded

>99%

Last modified

3 years ago

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify"
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
node_modules

Dockerfile

# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-puppeteer-chrome:16
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

INPUT_SCHEMA.json

{
"title": "PuppeteerCrawler Template",
"description": "lorem ipsum",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URLs",
"type": "array",
"description": "URLs to start with.",
"editor": "requestListSources",
"prefill": [
{ "url": "https://apify.com" }
]
}
},
"required": [
"startUrls"
]
}

apify.json

{
"env": { "npm_config_loglevel": "silent" }
}

main.js

1/**
2 * This template is a production ready boilerplate for developing with `PuppeteerCrawler`.
3 * Use this to bootstrap your projects using the most up-to-date code.
4 * If you're looking for examples or want to learn more, see README.
5 */
6
7const Apify = require('apify');
8const { handleStart, handleList, handleDetail } = require('./src/routes');
9
10const { utils: { log } } = Apify;
11
12Apify.main(async () => {
13 const { startUrls } = await Apify.getInput();
14
15 const requestList = await Apify.openRequestList('start-urls', startUrls);
16 const requestQueue = await Apify.openRequestQueue();
17 const proxyConfiguration = await Apify.createProxyConfiguration();
18
19 const crawler = new Apify.PuppeteerCrawler({
20 requestList,
21 requestQueue,
22 proxyConfiguration,
23 launchContext: {
24 // Chrome with stealth should work for most websites.
25 // If it doesn't, feel free to remove this.
26 useChrome: true,
27 stealth: true,
28 },
29 handlePageFunction: async (context) => {
30 const { url, userData: { label } } = context.request;
31 log.info('Page opened.', { label, url });
32 switch (label) {
33 case 'LIST':
34 return handleList(context);
35 case 'DETAIL':
36 return handleDetail(context);
37 default:
38 return handleStart(context);
39 }
40 },
41 });
42
43 log.info('Starting the crawl.');
44 await crawler.run();
45 log.info('Crawl finished.');
46});

package.json

{
"name": "project-puppeteer-crawler",
"version": "0.0.1",
"description": "This is a boilerplate of an Apify actor.",
"dependencies": {
"apify": "^2.0.7",
"puppeteer": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.1.3",
"eslint": "^7.0.0"
},
"scripts": {
"start": "node main.js",
"lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

src/routes.js

1const Apify = require('apify');
2
3const { utils: { log } } = Apify;
4
5exports.handleStart = async ({ request, page }) => {
6 // Handle Start URLs
7};
8
9exports.handleList = async ({ request, page }) => {
10 // Handle pagination
11};
12
13exports.handleDetail = async ({ request, page }) => {
14 // Handle details
15};