diff --git a/data_processing/doc_parser.py b/data_processing/doc_parser.py index 8a8c388..6d0e16a 100644 --- a/data_processing/doc_parser.py +++ b/data_processing/doc_parser.py @@ -5,11 +5,14 @@ def read_pdf(file_path): - reader = PdfReader(file_path) - text = "" - for page in reader.pages: - text += page.extract_text() + "\n" - return text + try: + reader = PdfReader(file_path) + text = "" + for page in reader.pages: + text += page.extract_text() + "\n" + return text + except: + return "" def read_docx(file_path): doc = docx.Document(file_path)