diff --git a/README.md b/README.md
index 3d1dfef..76ff2f1 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,13 @@
+
Forked repository and added conversion python script
+My added script is:
+convert_annotations.py
+
+Use toolkit normally to gather images from open images dataset. After gathering images just run from root directory:
+```bash
+python convert_annotations.py
+```
+This will generate .txt annotation files in proper format for custom object detection with YOLOv3. The text files are generated in folder with images.
+
~ OIDv4 ToolKit ~
Do you want to build your personal object detector but you don't have enough images to train your model? Do you want to train your personal image classifier, but you are tired of the deadly slowness of ImageNet? Have you already discovered [Open Images Dataset v4](https://storage.googleapis.com/openimages/web/index.html) that has [600](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html) classes and more than 1,700,000 images with related bounding boxes ready to use? Do you want to exploit it for your projects but you don't want to download gigabytes and gigabytes of data!?
diff --git a/convert_annotations.py b/convert_annotations.py
new file mode 100644
index 0000000..96b8bcc
--- /dev/null
+++ b/convert_annotations.py
@@ -0,0 +1,79 @@
+import os
+import cv2
+import numpy as np
+from tqdm import tqdm
+import argparse
+import fileinput
+
+# function that turns XMin, YMin, XMax, YMax coordinates to normalized yolo format
+def convert(filename_str, coords):
+ os.chdir("..")
+ image = cv2.imread(filename_str + ".jpg")
+ coords[2] -= coords[0]
+ coords[3] -= coords[1]
+ x_diff = int(coords[2]/2)
+ y_diff = int(coords[3]/2)
+ coords[0] = coords[0]+x_diff
+ coords[1] = coords[1]+y_diff
+ coords[0] /= int(image.shape[1])
+ coords[1] /= int(image.shape[0])
+ coords[2] /= int(image.shape[1])
+ coords[3] /= int(image.shape[0])
+ os.chdir("Label")
+ return coords
+
+ROOT_DIR = os.getcwd()
+
+# create dict to map class names to numbers for yolo
+classes = {}
+with open("classes.txt", "r") as myFile:
+ for num, line in enumerate(myFile, 0):
+ line = line.rstrip("\n")
+ classes[line] = num
+ myFile.close()
+# step into dataset directory
+os.chdir(os.path.join("OID", "Dataset"))
+DIRS = os.listdir(os.getcwd())
+
+# for all train, validation and test folders
+for DIR in DIRS:
+ if os.path.isdir(DIR):
+ os.chdir(DIR)
+ print("Currently in subdirectory:", DIR)
+
+ CLASS_DIRS = os.listdir(os.getcwd())
+ # for all class folders step into directory to change annotations
+ for CLASS_DIR in CLASS_DIRS:
+ if os.path.isdir(CLASS_DIR):
+ os.chdir(CLASS_DIR)
+ print("Converting annotations for class: ", CLASS_DIR)
+
+ # Step into Label folder where annotations are generated
+ os.chdir("Label")
+
+ for filename in tqdm(os.listdir(os.getcwd())):
+ filename_str = str.split(filename, ".")[0]
+ if filename.endswith(".txt"):
+ annotations = []
+ with open(filename) as f:
+ for line in f:
+ for class_type in classes:
+ line = line.replace(class_type, str(classes.get(class_type)))
+ labels = line.split()
+ coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
+ coords = convert(filename_str, coords)
+ labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
+ newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
+ line = line.replace(line, newline)
+ annotations.append(line)
+ f.close()
+ os.chdir("..")
+ with open(filename, "w") as outfile:
+ for line in annotations:
+ outfile.write(line)
+ outfile.write("\n")
+ outfile.close()
+ os.chdir("Label")
+ os.chdir("..")
+ os.chdir("..")
+ os.chdir("..")
diff --git a/modules/downloader.py b/modules/downloader.py
index c09f93c..746101c 100644
--- a/modules/downloader.py
+++ b/modules/downloader.py
@@ -130,7 +130,7 @@ def get_label(folder, dataset_dir, class_name, class_code, df_val, class_list, a
box[2] *= int(dataset_image.shape[0])
box[3] *= int(dataset_image.shape[0])
- # each row in a file is name of the class_name, XMin, YMix, XMax, YMax (left top right bottom)
+ # each row in a file is name of the class_name, XMin, YMin, XMax, YMax (left top right bottom)
print(class_name, box[0], box[2], box[1], box[3], file=f)
except Exception as e:
diff --git a/requirements.txt b/requirements.txt
index c222e63..d3130ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,6 @@
pandas
numpy
awscli
-
urllib3
-
tqdm
-
-opencv-python
\ No newline at end of file
+opencv-python