-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscraper2.js
More file actions
57 lines (47 loc) · 1.72 KB
/
scraper2.js
File metadata and controls
57 lines (47 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
const puppeteer = require('puppeteer');
const fs = require('fs');
let url = 'https://www.rekhta.org/ghazals/sunaa-hai-log-use-aankh-bhar-ke-dekhte-hain-ahmad-faraz-ghazals?lang=ur';
(async () => {
let tst = "";
let arr =[];
let total = 5;
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (req) => {
if(req.resourceType() == 'stylesheet' || req.resourceType() == 'font' || req.resourceType() == 'image'){
req.abort();
}
else {
req.continue();
}
});
await page.goto(url,{waitUntil: 'domcontentloaded'});
console.log('page loaded...')
let next;
await page.evaluate((arr, tst)=> {
tst= document.querySelector("div.pMC").innerText;
let a = document.querySelector("div.pMC").innerText;
arr.push(a);
console.log('p1 pushed=' + a)
// next = (document.querySelector("a.nextPoem")).href;
// await page.goto(next,{waitUntil: 'domcontentloaded'});
},arr, tst);
console.log(arr);
console.log(tst);
// for (let index = 0; index < total; index++) {
// console.log('p'+index+1 + ' pushed. Now on page ' + index+2)
// console.log(next);
// await page.evaluate(async (arr,next)=> {
// let a = document.querySelector("div.pMC").innerText;
// arr.push(a);
// console.log('p'+index+2+' pushed')
// },arr,next);
// }
await browser.close();
let result = arr.join('\r\n\r\n');
fs.writeFile("faraz.txt", result, (err)=>{
if (err) throw err;
console.log('Saved!');
});
})();