Live View
Pricing
Pay per usage
Go to Store
Live View
0.0 (0)
Pricing
Pay per usage
0
Total users
2
Monthly users
1
Runs succeeded
>99%
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-chrome:beta
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "1.0.0-beta.22" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2
3Apify.main(async () => {4 // Get queue and enqueue first url.5 const requestQueue = await Apify.openRequestQueue();6 const enqueueUrl = async url => requestQueue.addRequest(new Apify.Request({ url }));7 await enqueueUrl('https://news.ycombinator.com/');8
9 // Create crawler.10 const crawler = new Apify.PuppeteerCrawler({11 requestQueue,12 disableProxy: true,13 launchPuppeteerOptions: {14 liveView: true,15 slowMo: 0,16 },17
18 // This page is executed for each request.19 // If request failes then it's retried 3 times.20 // Parameter page is Puppeteers page object with loaded page.21 handlePageFunction: async ({ page, request }) => {22 console.log(`Request ${request.url} succeeded!`);23
24 // Extract all posts.25 const data = await page.$$eval('.athing', (els) => {26 return els.map(el => el.innerText);27 });28 29 // Save data.30 await Apify.pushData({31 url: request.url,32 data,33 });34 35 // Enqueue next page.36 try {37 const nextHref = await page.$eval('.morelink', el => el.href);38 await enqueueUrl(nextHref);39 } catch (err) {40 console.log(`Url ${request.url} is the last page!`);41 }42 },43
44 // If request failed 4 times then this function is executed.45 handleFailedRequestFunction: async ({ request }) => {46 console.log(`Request ${request.url} failed 4 times`);47 48 await Apify.pushData({49 url: request.url,50 errors: request.errorMessages,51 })52 },53 });54 55 // Run crawler.56 await crawler.run();57});