-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper-example.js
More file actions
81 lines (69 loc) · 1.79 KB
/
scraper-example.js
File metadata and controls
81 lines (69 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// Example: Facebook Scraper using Crawlbase API
const { CrawlingAPI } = require('crawlbase');
// Initialize the API with your token
const api = new CrawlingAPI({ token: 'YOUR_CRAWLBASE_TOKEN' });
/**
* Scrape a Facebook page
*/
async function scrapeFacebookPage(pageUrl) {
try {
const response = await api.get(pageUrl, {
scraper: 'facebook-scraper',
format: 'json'
});
console.log('Page data:', JSON.parse(response.body));
return JSON.parse(response.body);
} catch (error) {
console.error('Error scraping page:', error);
throw error;
}
}
/**
* Scrape Facebook posts from a profile
*/
async function scrapeFacebookPosts(profileUrl, limit = 10) {
try {
const response = await api.get(profileUrl, {
scraper: 'facebook-scraper',
format: 'json',
post_limit: limit
});
const data = JSON.parse(response.body);
console.log(`Scraped ${data.posts.length} posts`);
return data.posts;
} catch (error) {
console.error('Error scraping posts:', error);
throw error;
}
}
/**
* Batch scrape multiple Facebook URLs
*/
async function batchScrape(urls) {
const results = [];
for (const url of urls) {
try {
const data = await scrapeFacebookPage(url);
results.push({ url, success: true, data });
} catch (error) {
results.push({ url, success: false, error: error.message });
}
}
return results;
}
// Example usage
if (require.main === module) {
const examplePageUrl = 'https://facebook.com/example-page';
scrapeFacebookPage(examplePageUrl)
.then(data => {
console.log('Successfully scraped page!');
})
.catch(error => {
console.error('Failed to scrape page:', error);
});
}
module.exports = {
scrapeFacebookPage,
scrapeFacebookPosts,
batchScrape
};