Skip to content

Commit e20b4bf

Browse files
authored
chore(CI): typecheck TS examples in docs automatically (apify#3278)
The examples in the documentation are no longer failing with TS errors on build. This PR improves the DX (users copying examples for docs will get valid scripts) and acts as an additional guard rail when making larger changes in Crawlee.
1 parent df822c2 commit e20b4bf

24 files changed

+558
-99
lines changed

.github/workflows/test-ci.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ jobs:
7676
env:
7777
YARN_IGNORE_NODE: 1
7878

79+
- name: Typecheck documentation examples
80+
working-directory: ./docs
81+
run: |
82+
yarn
83+
yarn typecheck
84+
env:
85+
YARN_IGNORE_NODE: 1
86+
7987
- name: Tests
8088
run: yarn test
8189
env:

docs/examples/.eslintrc.json

Lines changed: 0 additions & 13 deletions
This file was deleted.

docs/examples/cheerio_crawler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ const crawler = new CheerioCrawler({
3535
// Extract data from the page using cheerio.
3636
const title = $('title').text();
3737
const h1texts: { text: string }[] = [];
38-
$('h1').each((index, el) => {
38+
$('h1').each((_, el) => {
3939
h1texts.push({
4040
text: $(el).text(),
4141
});

docs/examples/crawler-plugins/playwright-extra.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ const crawler = new PlaywrightCrawler({
3535

3636
// A function to be evaluated by Puppeteer within the browser context.
3737
const data = await page.$$eval('.athing', ($posts) => {
38-
const scrapedData: { title: string; rank: string; href: string }[] = [];
38+
const scrapedData: { title?: string; rank?: string; href?: string }[] = [];
3939

4040
// We're getting the title, rank and URL of each post on Hacker News.
4141
$posts.forEach(($post) => {
4242
scrapedData.push({
43-
title: $post.querySelector('.title a').innerText,
44-
rank: $post.querySelector('.rank').innerText,
45-
href: $post.querySelector('.title a').href,
43+
title: $post.querySelector<HTMLElement>('.title a')?.innerText,
44+
rank: $post.querySelector<HTMLElement>('.rank')?.innerText,
45+
href: $post.querySelector<HTMLAnchorElement>('.title a')?.href,
4646
});
4747
});
4848

docs/examples/crawler-plugins/puppeteer-extra.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import stealthPlugin from 'puppeteer-extra-plugin-stealth';
44

55
// First, we tell puppeteer-extra to use the plugin (or plugins) we want.
66
// Certain plugins might have options you can pass in - read up on their documentation!
7+
// @ts-expect-error - The default export types for puppeteer-extra don't properly expose the 'use' method in ESM contexts
78
puppeteerExtra.use(stealthPlugin());
89

910
// Create an instance of the PuppeteerCrawler class - a crawler
@@ -32,14 +33,14 @@ const crawler = new PuppeteerCrawler({
3233

3334
// A function to be evaluated by Puppeteer within the browser context.
3435
const data = await page.$$eval('.athing', ($posts) => {
35-
const scrapedData: { title: string; rank: string; href: string }[] = [];
36+
const scrapedData: { title?: string; rank?: string; href?: string }[] = [];
3637

3738
// We're getting the title, rank and URL of each post on Hacker News.
3839
$posts.forEach(($post) => {
3940
scrapedData.push({
40-
title: $post.querySelector('.title a').innerText,
41-
rank: $post.querySelector('.rank').innerText,
42-
href: $post.querySelector('.title a').href,
41+
title: $post.querySelector<HTMLElement>('.title a')?.innerText,
42+
rank: $post.querySelector<HTMLElement>('.rank')?.innerText,
43+
href: $post.querySelector<HTMLAnchorElement>('.title a')?.href,
4344
});
4445
});
4546

docs/examples/playwright_crawler.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ const crawler = new PlaywrightCrawler({
2525

2626
// A function to be evaluated by Playwright within the browser context.
2727
const data = await page.$$eval('.athing', ($posts) => {
28-
const scrapedData: { title: string; rank: string; href: string }[] = [];
28+
const scrapedData: { title?: string; rank?: string; href?: string }[] = [];
2929

3030
// We're getting the title, rank and URL of each post on Hacker News.
3131
$posts.forEach(($post) => {
3232
scrapedData.push({
33-
title: $post.querySelector('.title a').innerText,
34-
rank: $post.querySelector('.rank').innerText,
35-
href: $post.querySelector('.title a').href,
33+
title: $post.querySelector<HTMLElement>('.title a')?.innerText,
34+
rank: $post.querySelector<HTMLElement>('.rank')?.innerText,
35+
href: $post.querySelector<HTMLAnchorElement>('.title a')?.href,
3636
});
3737
});
3838

docs/examples/puppeteer_crawler.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ const crawler = new PuppeteerCrawler({
2525

2626
// A function to be evaluated by Puppeteer within the browser context.
2727
const data = await page.$$eval('.athing', ($posts) => {
28-
const scrapedData: { title: string; rank: string; href: string }[] = [];
28+
const scrapedData: { title?: string; rank?: string; href?: string }[] = [];
2929

3030
// We're getting the title, rank and URL of each post on Hacker News.
3131
$posts.forEach(($post) => {
3232
scrapedData.push({
33-
title: $post.querySelector('.title a').innerText,
34-
rank: $post.querySelector('.rank').innerText,
35-
href: $post.querySelector('.title a').href,
33+
title: $post.querySelector<HTMLElement>('.title a')?.innerText,
34+
rank: $post.querySelector<HTMLElement>('.rank')?.innerText,
35+
href: $post.querySelector<HTMLAnchorElement>('.title a')?.href,
3636
});
3737
});
3838

docs/examples/tsconfig.json

Lines changed: 0 additions & 21 deletions
This file was deleted.

docs/guides/custom-http-client/implementation.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import type {
88
} from '@crawlee/core';
99
import { Readable } from 'node:stream';
1010

11-
class CustomHttpClient implements BaseHttpClient {
11+
export class CustomHttpClient implements BaseHttpClient {
1212
async sendRequest<TResponseType extends keyof ResponseTypes = 'text'>(
1313
request: HttpRequest<TResponseType>,
1414
): Promise<HttpResponse<TResponseType>> {
@@ -59,7 +59,7 @@ class CustomHttpClient implements BaseHttpClient {
5959
};
6060
}
6161

62-
async stream(request: HttpRequest, onRedirect?: RedirectHandler): Promise<StreamingHttpResponse> {
62+
async stream(request: HttpRequest, _onRedirect?: RedirectHandler): Promise<StreamingHttpResponse> {
6363
const fetchResponse = await fetch(request.url, {
6464
method: request.method,
6565
headers: new Headers(),
@@ -79,7 +79,7 @@ class CustomHttpClient implements BaseHttpClient {
7979
return null;
8080
}
8181
return pump();
82-
function pump() {
82+
function pump(): Promise<void> {
8383
return reader!.read().then(({ done, value }) => {
8484
// When no more data needs to be consumed, close the stream
8585
if (done) {

docs/guides/custom-http-client/usage.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import { HttpCrawler } from 'crawlee';
2+
import { CustomHttpClient } from './implementation.js';
3+
14
const crawler = new HttpCrawler({
25
httpClient: new CustomHttpClient(),
36
async requestHandler() {

0 commit comments

Comments
 (0)