ErikBoesen · NegassaB · Dec 29, 2021 · Dec 29, 2021 · Dec 29, 2021 · Dec 29, 2021
diff --git a/.gitignore b/.gitignore
@@ -88,3 +88,6 @@ ENV/
 # Rope project settings
 .ropeproject
 MANIFEST
+
+# vscode
+.vscode/
diff --git a/LICENSE b/LICENSE
@@ -1,7 +1,8 @@
-MIT License
+ MIT License
 
 Copyright (c) 2017 Ali Najafi
 Copyright (c) 2019 Erik Bøsen
+Copyright (c) 2022 Negassa Berhanu
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -3,34 +3,61 @@
 > A Python wrapper for using the [ocr.space API](https://ocr.space/ocrapi).
 
 ## Installation
+
 Simply install from `pip`:
+
 ```sh
 pip install ocrspace
 ```
 
 ## Use
+
+First, get an API key from [Free OCR API](https://ocr.space/OCRAPI), otherwise the default apikey `helloworld` will be used, which is [severely rate limited.](https://ocr.space/faq#span12)
+
 First you'll need to import and instantiate the API wrapper:
-```py
+
+```python
+import ocrspace
+
+api_key = 'apikey retrieved from Free OCR API'
+
+api = ocrspace.API(api_key=api_key)
+# Or if you have a custom API host, API key or desired language, pass those:
+api = ocrspace.API(endpoint='https://example.host', api_key=api_key, language=ocrspace.Language.Croatian)
+```
+
+If you wish to change the OCR engine used, use the enum class Engine from ocrspace and pass the value of ocrspace.Engine.ENGINE_2 to the api instantiation. By default it uses ocrspace.Engine.ENGINE_1:
+
+```python
 import ocrspace
-api = ocrspace.API()
+
+api_key = "apikey retrieved from Free OCR API"
+
+api = ocrspace.API(api_key=api_key, engine=ocrspace.Engine.ENGINE_2)
 # Or if you have a custom API host, API key or desired language, pass those:
-api = ocrspace.API(endpoint='https://example.host', api_key='Insert key here', language=ocrspace.Language.Croatian)
+api = ocrspace.API(endpoint='https://example.host', api_key=api_key, language=ocrspace.Language.Croatian, engine=ocrspace.Engine.ENGINE_2)
 ```
+
 To perform recognition on an image hosted at some URL:
-```py
+
+```python
 api.ocr_url('URL of image goes here')
 ```
+
 Or, if you have an image locally upon which to perform recognition:
-```py
+
+```python
 api.ocr_file('image.jpg')
 # or:
 api.ocr_file(open('image.jpg', 'rb'))  # or any other file pointer
 ```
-That's it! Look at [`example.py`](example.py) for a demonstration.
 
+That's it! Look at [`example.py`](example.py) for a demonstration.
 
 ## Authorship
-This package was created by [Ali Najafi](https://github.com/a4fr) and is maintained by [Erik Boesen](https://github.com/ErikBoesen).
+
+This package was created by [Ali Najafi](https://github.com/a4fr) and is maintained by [Erik Boesen](https://github.com/ErikBoesen) and [Negassa Berhanu](https://github.com/NegassaB).
 
 ## License
+
 [MIT](LICENSE)
diff --git a/example.py b/example.py
@@ -1,22 +1,34 @@
 import ocrspace
+from ocrspace import Engine
 import requests
 
-
+# api with the default ocrengine aka engine 1
 api = ocrspace.API()
+
+# api with engine 2
+api_with_engine_two = ocrspace.API(engine=Engine.ENGINE_2)
 TEST_IMAGE_URL = 'https://images-na.ssl-images-amazon.com/images/I/71ovNJN1URL._SL1244_.jpg'
 
 print('Testing URL-based OCR:')
 print(api.ocr_url(TEST_IMAGE_URL))
+print('Testing URL-based OCR using engine_two:')
+print(api_with_engine_two.ocr_url(TEST_IMAGE_URL))
 
-print('Testing file-based OCR:')
 # Download image for demo purposes
 TEST_FILENAME = '/tmp/test_image.jpg'
 with open(TEST_FILENAME, 'wb') as f:
     r = requests.get(TEST_IMAGE_URL)
     r.raw.decode_content = True
     f.write(r.content)
 
+print('Testing file-based OCR:')
 # With file path
 print(api.ocr_file(TEST_FILENAME))
 # With file pointer
 print(api.ocr_file(open(TEST_FILENAME, 'rb')))
+
+print('Testing file-based OCR using engine_two:')
+# With file path
+print(api_with_engine_two.ocr_file(TEST_FILENAME))
+# With file pointer
+print(api_with_engine_two.ocr_file(open(TEST_FILENAME, 'rb')))
diff --git a/ocrspace/main.py b/ocrspace/main.py
@@ -1,4 +1,5 @@
 import requests
+from enum import IntEnum
 
 
 class Language:
@@ -28,25 +29,40 @@ class Language:
     Turkish = 'tur'
 
 
+class Engine(IntEnum):
+    """
+    Enum representing the OCR engine to use
+    """
+    ENGINE_1 = 1
+    ENGINE_2 = 2
+
+
 class API:
     def __init__(
         self,
         endpoint='https://api.ocr.space/parse/image',
         api_key='helloworld',
         language=Language.English,
+        engine=Engine.ENGINE_1,
         **kwargs,
     ):
         """
         :param endpoint: API endpoint to contact
         :param api_key: API key string
         :param language: document language
+        :param engine: ocr engine to use
         :param **kwargs: other settings to API
         """
+        if not isinstance(engine, Engine):
+            raise TypeError('engine must be an instance of Engine')
+        if engine.value != 1 and engine.value != 2:
+            raise ValueError('the value of engine must be either 1 or 2, use ocrspace.Engine')
         self.endpoint = endpoint
+        self.api_key = api_key
         self.payload = {
             'isOverlayRequired': True,
-            'apikey': api_key,
             'language': language,
+            'OCREngine': engine.value,
             **kwargs
         }
 
@@ -57,21 +73,44 @@ def _parse(self, raw):
             raise Exception(raw['ErrorMessage'][0])
         return raw['ParsedResults'][0]['ParsedText']
 
-
-    def ocr_file(self, fp):
+    def query_api(self, image_url=None, image_file=None):
         """
-        Process image from a local path.
-        :param fp: A path or pointer to your file
+        Process the provided parameter.
+        :param image_url: An Image url or base64image encoded string
+        :param image_file: A path or file pointer to the image file
         :return: Result in JSON format
+        :raise: request.exceptions or general Exception
         """
-        with (open(fp, 'rb') if type(fp) == str else fp) as f:
+
+        if image_file:
             r = requests.post(
                 self.endpoint,
-                files={'filename': f},
+                headers={'apikey': self.api_key},
+                files={'filename': image_file},
                 data=self.payload,
+                timeout=30
+            )
+        elif image_url:
+            r = requests.post(
+                self.endpoint,
+                headers={'apikey': self.api_key},
+                data=image_url,
+                timeout=30
             )
+        else:
+            raise TypeError('either image_file or image_url must be provided')
+        r.raise_for_status()
         return self._parse(r.json())
 
+    def ocr_file(self, fp):
+        """
+        Process image from a local path.
+        :param fp: A path or pointer to your file
+        :return: Result in JSON format
+        """
+        with (open(fp, 'rb') if type(fp) == str else fp) as f:
+            return self.query_api(image_file=f)
+
     def ocr_url(self, url):
         """
         Process an image at a given URL.
@@ -80,11 +119,7 @@ def ocr_url(self, url):
         """
         data = self.payload
         data['url'] = url
-        r = requests.post(
-            self.endpoint,
-            data=data,
-        )
-        return self._parse(r.json())
+        return self.query_api(image_url=data)
 
     def ocr_base64(self, base64image):
         """
@@ -94,8 +129,4 @@ def ocr_base64(self, base64image):
         """
         data = self.payload
         data['base64Image'] = base64image
-        r = requests.post(
-            self.endpoint,
-            data=data,
-        )
-        return self._parse(r.json())
+        return self.query_api(image_url=data)
diff --git a/setup.py b/setup.py
@@ -1,13 +1,14 @@
 from distutils.core import setup
 
 setup(
-    name = 'ocrspace',
-    packages = ['ocrspace'], # this must be the same as the name above
-    version = '2.3.0',
-    description = 'Perform OCR through ocr.space API',
-    author = ['Ali Najafi', 'Erik Boesen'],
-    author_email = 'me@erikboesen.com',
-    url = 'https://github.com/ErikBoesen/ocrspace',
-    keywords = ['ocr'],
-    classifiers = [],
+    name='ocrspace',
+    packages=['ocrspace'],  # this must be the same as the name above
+    requires=['requests'],
+    version='2.4.0',
+    description='Perform OCR through ocr.space API',
+    author=['Ali Najafi', 'Erik Boesen', 'Negassa Berhanu'],
+    author_email='me@erikboesen.com',
+    url='https://github.com/ErikBoesen/ocrspace',
+    keywords=['ocr'],
+    classifiers=[],
 )