1const Apify = require('apify');
2
3Apify.main(async () => {
4
5 const requestQueue = await Apify.openRequestQueue();
6 const enqueueUrl = async url => requestQueue.addRequest(new Apify.Request({ url }));
7 await enqueueUrl('https://news.ycombinator.com/');
8
9
10 const crawler = new Apify.PuppeteerCrawler({
11 requestQueue,
12 disableProxy: true,
13 launchPuppeteerOptions: {
14 liveView: true,
15 slowMo: 0,
16 },
17
18
19
20
21 handlePageFunction: async ({ page, request }) => {
22 console.log(`Request ${request.url} succeeded!`);
23
24
25 const data = await page.$$eval('.athing', (els) => {
26 return els.map(el => el.innerText);
27 });
28
29
30 await Apify.pushData({
31 url: request.url,
32 data,
33 });
34
35
36 try {
37 const nextHref = await page.$eval('.morelink', el => el.href);
38 await enqueueUrl(nextHref);
39 } catch (err) {
40 console.log(`Url ${request.url} is the last page!`);
41 }
42 },
43
44
45 handleFailedRequestFunction: async ({ request }) => {
46 console.log(`Request ${request.url} failed 4 times`);
47
48 await Apify.pushData({
49 url: request.url,
50 errors: request.errorMessages,
51 })
52 },
53 });
54
55
56 await crawler.run();
57
58 await new Promise((resolve) => setTimeout(resolve, 10_000));
59});