-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathgetTranscript.js
More file actions
50 lines (43 loc) · 1.36 KB
/
getTranscript.js
File metadata and controls
50 lines (43 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
require('dotenv').config()
const ora = require('ora')
const cliSpinners = require('cli-spinners')
const Throttle = require('promise-parallel-throttle')
const db = require('monk')(process.env.MONGO_DB)
const posts = db.get('posts')
const parsePdf2 = require('./parsePdf2')
const getTranscript = async () => {
const options = {}
const query = {
transcriptURL: { $regex: 'softwareengineeringdaily.com' },
$or: [
{ transcript: { $exists: false } },
{ transcript: { $regex: /\[SPONSOR MESSAGE\]/ig } },
{ transcript: { $regex: '.</p><p>com' } }
],
}
const reply = await posts.find(query, options)
const queue = reply.map(post => {
return async () => {
const spinner = ora({
text: `Parsing ${post.transcriptURL}`,
spinner: cliSpinners.bouncingBar,
})
try {
spinner.start()
const transcript = await parsePdf2(post.transcriptURL)
await posts.update({ id: post.id }, { $set: { transcript } })
spinner.succeed(`[SUCCESS]: ${post.id} ${post.title.rendered} - ${post.transcriptURL}`)
}
catch (err) {
spinner.fail(`ERROR: ${post.title.rendered} ${post.transcriptURL}: `, err)
}
return Promise.resolve()
}
})
Throttle.sync(queue).then(() => {
console.log('Processed all transcripts')
db.close()
process.exit()
})
}
getTranscript()