My Actor 1
Pricing
Pay per event
Go to Apify Store
My Actor 1
Under maintenance4.0 (1)
Pricing
Pay per event
2
1
1
Last modified
8 months ago
Pricing
Pay per event
4.0 (1)
Pricing
Pay per event
2
1
1
Last modified
8 months ago
# Specify the base Docker image. You can read more about# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:20 AS builder
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set# in the base image.COPY . ./
# Install all dependencies and build the project.# Don't audit to speed up the installation.RUN npm run build
# Create final imageFROM apify/actor-node:20
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \    && npm install --omit=dev --omit=optional \    && echo "Installed NPM packages:" \    && (npm list --omit=dev --all || true) \    && echo "Node.js version:" \    && node --version \    && echo "NPM version:" \    && npm --version \    && rm -r ~/.npm
# Copy built JS files from builder imageCOPY  /usr/src/app/dist ./dist
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm run start:prod --silent{    "actorSpecification": 1,    "name": "my-actor-1",    "title": "Scrape single page in TypeScript",    "description": "Scrape data from single page with provided URL.",    "version": "0.0",    "meta": {        "templateId": "ts-start"    },    "input": "./input_schema.json",    "dockerfile": "./Dockerfile"}{    "title": "Scrape data from a web page",    "type": "object",    "schemaVersion": 1,    "properties": {        "url": {        "title": "URL of the page",        "type": "string",        "description": "The URL of website you want to get the data from.",        "editor": "textfield",        "prefill": "https://www.apify.com"        }    },    "required": ["url"]}# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
# dist folderdiststorageapify_storagecrawlee_storagenode_modulesdisttsconfig.tsbuildinfostorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json{    "name": "ts-start",    "version": "0.0.1",    "type": "module",    "description": "This is an example of an Apify actor.",    "engines": {        "node": ">=18.0.0"    },    "dependencies": {        "apify": "^3.1.10",        "axios": "^1.5.0",        "cheerio": "^1.0.0-rc.12"    },    "devDependencies": {        "@apify/tsconfig": "^0.1.0",        "tsx": "^4.6.2",        "typescript": "^5.3.3"    },    "scripts": {        "start": "npm run start:dev",        "start:prod": "node dist/main.js",        "start:dev": "tsx src/main.ts",        "build": "tsc",        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"    },    "author": "It's not you it's me",    "license": "ISC"}1// Axios - Promise based HTTP client for the browser and node.js (Read more at https://axios-http.com/docs/intro).2import axios from 'axios';3// Cheerio - The fast, flexible & elegant library for parsing and manipulating HTML and XML (Read more at https://cheerio.js.org/).4import * as cheerio from 'cheerio';5// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).6import { Actor } from 'apify';7
8// this is ESM project, and as such, it requires you to specify extensions in your relative imports9// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions10// note that we need to use `.js` even when inside TS files11// import { router } from './routes.js';12
13// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().14await Actor.init();15
16interface Input {17    url: string;18}19// Structure of input is defined in input_schema.json20const input = await Actor.getInput<Input>();21if (!input) throw new Error("Input is missing!");22const { url } = input;23
24// Fetch the HTML content of the page.25const response = await axios.get(url);26
27// Parse the downloaded HTML with Cheerio to enable data extraction.28const $ = cheerio.load(response.data);29
30// Extract all headings from the page (tag name and text).31const headings: { level: string, text: string }[] = [];32$("h1, h2, h3, h4, h5, h6").each((_i, element) => {33    const headingObject = {34        level: $(element).prop("tagName").toLowerCase(),35        text: $(element).text(),36    };37    console.log("Extracted heading", headingObject);38    headings.push(headingObject);39});40
41// Save headings to Dataset - a table-like storage.42await Actor.pushData(headings);43
44// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().45await Actor.exit();{    "extends": "@apify/tsconfig",    "compilerOptions": {        "module": "NodeNext",        "moduleResolution": "NodeNext",        "target": "ES2022",        "outDir": "dist",        "noUnusedLocals": false,        "skipLibCheck": true,        "lib": ["DOM"]    },    "include": [        "./src/**/*"    ]}