-
Notifications
You must be signed in to change notification settings - Fork 32
Expand file tree
/
Copy pathbaseline.py
More file actions
28 lines (20 loc) · 790 Bytes
/
baseline.py
File metadata and controls
28 lines (20 loc) · 790 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, 'textrank'))
from summa.preprocessing.textcleaner import get_sentences # Uses textrank's method for extracting sentences.
BASELINE_WORD_COUNT = 100
def baseline(text):
""" Creates a baseline summary to be used as reference.
The baseline is set to an extract of the first 100 words.
"""
sentences = list(get_sentences(text))
baseline_summary = ""
word_count = 0
for sentence in sentences:
for word in sentence.split():
baseline_summary += word + " "
word_count += 1
if word_count == BASELINE_WORD_COUNT:
return baseline_summary
baseline_summary += "\n"
return baseline_summary