-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_to_string.py
More file actions
63 lines (50 loc) · 1.69 KB
/
Copy pathimage_to_string.py
File metadata and controls
63 lines (50 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Authors Alexey Titov and Shir Bentabou
# Version 1.0
# Date 03.2019
# USAGE
# python image_to_string.py example.png
# python image_to_string.py example2.jpg eng+rus
# import the necessary packages
from PIL import Image
import cv2
import sys
import pytesseract
if __name__ == '__main__':
if (len(sys.argv) < 2 or len(sys.argv) > 3):
print ('Usage: python code.py example.jpg')
print ('OR python code.py example.jpg lan1+lan2')
sys.exit(1)
print('--- Start recognize text from image ---')
# Read image path from command line
imgPath = sys.argv[1]
# Define config parameter
# '--oem 1' for using LSTM OCR Engine
config = ('--oem 1 --psm 3')
# Default
if (len(sys.argv) == 2):
print ('You choose default configuration')
# Define config parameter
# 'eng' for using the English language
langs = ('eng')
# Read image from disk
img = cv2.imread(imgPath, cv2.IMREAD_COLOR)
# Run tesseract OCR on image
text = pytesseract.image_to_string(img, lang = langs, config = config)
# Print recognized text
print(text)
# Print recognized text to file.txt
with open (imgPath[:-4]+".txt", 'w') as f: f.write(text.encode('utf-8'))
# Multiply language
if (len(sys.argv) == 3):
print ('You choose multiply language configuration')
# sys.argv[2] for using the multiplies languages (Example: eng+rus)
langs = (sys.argv[2])
# Read image from disk
img = cv2.imread(imgPath, cv2.IMREAD_COLOR)
# Run tesseract OCR on image
text = pytesseract.image_to_string(img, lang = langs, config = config)
# Print recognized text
print(text)
# Print recognized text to file.txt
with open (imgPath[:-4]+".txt", 'w') as f: f.write(text.encode('utf-8'))
print('------ Done ------')