From 490b2da48dad426b5f160d6103b5dbd700ea198f Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Fri, 3 Jan 2020 21:16:35 -0500
Subject: [PATCH 1/7] Remove random line breaks
---
requirements.txt | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index c222e63..d3130ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,6 @@
pandas
numpy
awscli
-
urllib3
-
tqdm
-
-opencv-python
\ No newline at end of file
+opencv-python
From 9083acf654dfac2bf9f79cd8a441756cf33db0af Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Fri, 3 Jan 2020 21:17:31 -0500
Subject: [PATCH 2/7] Fix small typo
---
modules/downloader.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/downloader.py b/modules/downloader.py
index c09f93c..746101c 100644
--- a/modules/downloader.py
+++ b/modules/downloader.py
@@ -130,7 +130,7 @@ def get_label(folder, dataset_dir, class_name, class_code, df_val, class_list, a
box[2] *= int(dataset_image.shape[0])
box[3] *= int(dataset_image.shape[0])
- # each row in a file is name of the class_name, XMin, YMix, XMax, YMax (left top right bottom)
+ # each row in a file is name of the class_name, XMin, YMin, XMax, YMax (left top right bottom)
print(class_name, box[0], box[2], box[1], box[3], file=f)
except Exception as e:
From ed4ae373a8b2c649880f261fb8d7e559e25a6736 Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Sun, 5 Jan 2020 16:44:11 -0500
Subject: [PATCH 3/7] Add python script to convert annotations.
---
convert_annotations.py | 86 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
create mode 100644 convert_annotations.py
diff --git a/convert_annotations.py b/convert_annotations.py
new file mode 100644
index 0000000..09246f6
--- /dev/null
+++ b/convert_annotations.py
@@ -0,0 +1,86 @@
+import os
+import cv2
+import numpy as np
+from tqdm import tqdm
+import argparse
+import fileinput
+
+# function that turns XMin, YMin, XMax, YMax coordinates to normalized yolo format
+def convert(filename_str, coords):
+ os.chdir("..")
+ image = cv2.imread(filename_str + ".jpg")
+ coords[2] -= coords[0]
+ coords[3] -= coords[1]
+ x_diff = int(coords[2]/2)
+ y_diff = int(coords[3]/2)
+ coords[0] = coords[0]+x_diff
+ coords[1] = coords[1]+y_diff
+ coords[0] /= int(image.shape[1])
+ coords[1] /= int(image.shape[0])
+ coords[2] /= int(image.shape[1])
+ coords[3] /= int(image.shape[0])
+ os.chdir("Label")
+ return coords
+
+ROOT_DIR = os.getcwd()
+
+# create dict to map class names to numbers for yolo
+classes = {}
+with open("classes.txt", "r") as myFile:
+ for num, line in enumerate(myFile, 0):
+ line = line.rstrip("\n")
+ classes[line] = num
+ myFile.close()
+
+# step into dataset directory
+os.chdir(os.path.join("OID", "Dataset"))
+DIRS = os.listdir(os.getcwd())
+
+# for all train, validation and test folders
+for DIR in DIRS:
+ if os.path.isdir(DIR):
+ os.chdir(DIR)
+ print("Currently in subdirectory:", DIR)
+
+ CLASS_DIRS = os.listdir(os.getcwd())
+ # for all class folders rename folder if space occurs in name
+ for CLASS_DIR in CLASS_DIRS:
+ if " " in CLASS_DIR:
+ os.rename(CLASS_DIR, CLASS_DIR.replace(" ", "_"))
+
+ CLASS_DIRS = os.listdir(os.getcwd())
+ # for all class folders step into directory to change annotations
+ for CLASS_DIR in CLASS_DIRS:
+ if os.path.isdir(CLASS_DIR):
+ os.chdir(CLASS_DIR)
+ print("Converting annotations for class: ", CLASS_DIR)
+
+ # Step into Label folder where annotations are generated
+ os.chdir("Label")
+
+ for filename in tqdm(os.listdir(os.getcwd())):
+ filename_str = str.split(filename, ".")[0]
+ if filename.endswith(".txt"):
+ annotations = []
+ with open(filename) as f:
+ for line in f:
+ labels = line.split()
+ if classes.get(labels[0]) != None:
+ labels[0] = classes.get(labels[0])
+ coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
+ coords = convert(filename_str, coords)
+ labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
+ newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
+ line = line.replace(line, newline)
+ annotations.append(line)
+ f.close()
+ os.chdir("..")
+ with open(filename, "w") as outfile:
+ for line in annotations:
+ outfile.write(line)
+ outfile.write("\n")
+ outfile.close()
+ os.chdir("Label")
+ os.chdir("..")
+ os.chdir("..")
+ os.chdir("..")
From 2ad7d2953ff33d3c005db782344c90d8cbe8516f Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Sun, 5 Jan 2020 16:50:53 -0500
Subject: [PATCH 4/7] Update README.md
---
README.md | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/README.md b/README.md
index 3d1dfef..e03e1b8 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,13 @@
+
Forked repository and added conversion python script
+My added script is:
+**convert_annotations.py**
+
+Use toolkit normally to gather images from open images dataset. After gathering images just run from root directory:
+```bash
+python convert_annotations.py
+```
+This will generate .txt annotation files in proper format for custom object detection with YOLOv3. The text files are generated in folder with images.
+
~ OIDv4 ToolKit ~
Do you want to build your personal object detector but you don't have enough images to train your model? Do you want to train your personal image classifier, but you are tired of the deadly slowness of ImageNet? Have you already discovered [Open Images Dataset v4](https://storage.googleapis.com/openimages/web/index.html) that has [600](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html) classes and more than 1,700,000 images with related bounding boxes ready to use? Do you want to exploit it for your projects but you don't want to download gigabytes and gigabytes of data!?
From 5062dd62f76ad9e2c07f72104e1f34df3af1c794 Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Sun, 5 Jan 2020 16:51:48 -0500
Subject: [PATCH 5/7] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index e03e1b8..76ff2f1 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
Forked repository and added conversion python script
My added script is:
-**convert_annotations.py**
+convert_annotations.py
Use toolkit normally to gather images from open images dataset. After gathering images just run from root directory:
```bash
From bc87c777cff4864c61329f89739dabb401fd84a9 Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Sun, 5 Jan 2020 20:21:23 -0500
Subject: [PATCH 6/7] Updated code to handle multi-word classes
---
convert_annotations.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/convert_annotations.py b/convert_annotations.py
index 09246f6..ac146ee 100644
--- a/convert_annotations.py
+++ b/convert_annotations.py
@@ -29,6 +29,7 @@ def convert(filename_str, coords):
with open("classes.txt", "r") as myFile:
for num, line in enumerate(myFile, 0):
line = line.rstrip("\n")
+ line = line.replace(" ", "_")
classes[line] = num
myFile.close()
@@ -64,6 +65,9 @@ def convert(filename_str, coords):
annotations = []
with open(filename) as f:
for line in f:
+ # this is for finding classes with space in name ex. 'Alarm clock' and replacing space with underscore
+ if CLASS_DIR.replace("_", " ") in line:
+ line = line.replace(CLASS_DIR.replace("_", " "), CLASS_DIR)
labels = line.split()
if classes.get(labels[0]) != None:
labels[0] = classes.get(labels[0])
From 36d500d20e22e01904e672b8e96ed50b15e39aea Mon Sep 17 00:00:00 2001
From: The AI Guy <56439609+theAIGuysCode@users.noreply.github.com>
Date: Sun, 1 Mar 2020 13:34:07 -0500
Subject: [PATCH 7/7] Updated to work on multiclass labels.
---
convert_annotations.py | 21 +++++----------------
1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/convert_annotations.py b/convert_annotations.py
index ac146ee..96b8bcc 100644
--- a/convert_annotations.py
+++ b/convert_annotations.py
@@ -29,10 +29,8 @@ def convert(filename_str, coords):
with open("classes.txt", "r") as myFile:
for num, line in enumerate(myFile, 0):
line = line.rstrip("\n")
- line = line.replace(" ", "_")
classes[line] = num
myFile.close()
-
# step into dataset directory
os.chdir(os.path.join("OID", "Dataset"))
DIRS = os.listdir(os.getcwd())
@@ -43,12 +41,6 @@ def convert(filename_str, coords):
os.chdir(DIR)
print("Currently in subdirectory:", DIR)
- CLASS_DIRS = os.listdir(os.getcwd())
- # for all class folders rename folder if space occurs in name
- for CLASS_DIR in CLASS_DIRS:
- if " " in CLASS_DIR:
- os.rename(CLASS_DIR, CLASS_DIR.replace(" ", "_"))
-
CLASS_DIRS = os.listdir(os.getcwd())
# for all class folders step into directory to change annotations
for CLASS_DIR in CLASS_DIRS:
@@ -65,15 +57,12 @@ def convert(filename_str, coords):
annotations = []
with open(filename) as f:
for line in f:
- # this is for finding classes with space in name ex. 'Alarm clock' and replacing space with underscore
- if CLASS_DIR.replace("_", " ") in line:
- line = line.replace(CLASS_DIR.replace("_", " "), CLASS_DIR)
+ for class_type in classes:
+ line = line.replace(class_type, str(classes.get(class_type)))
labels = line.split()
- if classes.get(labels[0]) != None:
- labels[0] = classes.get(labels[0])
- coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
- coords = convert(filename_str, coords)
- labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
+ coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
+ coords = convert(filename_str, coords)
+ labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
line = line.replace(line, newline)
annotations.append(line)