diff --git a/.gitignore b/.gitignore index 6a0f2d4..376f22a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,1712 +1,107 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - gitignore/Python.gitignore at master · github/gitignore - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Skip to content -
- - - - - - - - - - -
- -
- - -
- -
- - - -
-
-
- - - - - - - - - - - - -
-
- -
    - - - - -
  • - -
    - -
    - - - Watch - - -
    - Notifications -
    -
    - - - - - - - -
    -
    -
    - -
    -
  • - -
  • -
    -
    - - -
    -
    - - -
    - -
  • - -
  • -
    -
    - -
  • -
- -

- - /gitignore - - -

- -
- - - - - - -
-
-
- - - - - - - Permalink - - - - -
- - -
- - Branch: - master - - - - - - - -
- -
- - Find file - - - Copy path - -
-
- - -
- - Find file - - - Copy path - -
-
- - - - -
- - -
-
- - 73 contributors - - -
- -

- Users who have contributed to this file -

-
- -
-
- - - @arcresu - - @shiftkey - - @Lucretiel - - @Harrison-G - - @jwg4 - - @GabrielC101 - - @Metallicow - - @misaelnieto - - @pmsosa - - @svkampen - - @vltr - - @matheussl - - @hugovk - - @ghisvail - - @EvandroLG - - @DSIW - - @amigcamel - - @toanant - - @2Cubed - - @brettcannon - - @weinihou - - @Visgean - - @nvie - - @skuschel - - @sigo - - @sethmlarson - - - - -
-
- - - - - -
- -
- -
- 125 lines (101 sloc) - - 1.67 KB -
- -
- -
- Raw - Blame - History -
- - -
- -
- -
-
- -
-
-
- - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
-
# C extensions
*.so
-
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
-
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
-
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
-
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
-
# Translations
*.mo
*.pot
-
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
-
# Flask stuff:
instance/
.webassets-cache
-
# Scrapy stuff:
.scrapy
-
# Sphinx documentation
docs/_build/
-
# PyBuilder
target/
-
# Jupyter Notebook
.ipynb_checkpoints
-
# IPython
profile_default/
ipython_config.py
-
# pyenv
.python-version
-
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
-
# celery beat schedule file
celerybeat-schedule
-
# SageMath parsed files
*.sage.py
-
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
-
# Spyder project settings
.spyderproject
.spyproject
-
# Rope project settings
.ropeproject
-
# mkdocs documentation
/site
-
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
-
# Pyre type checker
.pyre/
- - - -
- -
- - - -
- - -
- - -
-
- - - -
- -
- -
-
- - -
- - - - - - -
- - - You can’t perform that action at this time. -
- - - - - - - - - - - - - - -
- - - - +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# PyCharm +.idea diff --git a/main.py b/main.py index 7d6b3df..bf70914 100644 --- a/main.py +++ b/main.py @@ -13,15 +13,9 @@ Usage: refer to README.md file """ -from sys import exit -from textwrap import dedent -from modules.parser import * -from modules.utils import * -from modules.downloader import * -from modules.show import * -from modules.csv_downloader import * -from modules.bounding_boxes import * -from modules.image_level import * +from oidv4_toolkit.parser import * +from oidv4_toolkit.bounding_boxes import * +from oidv4_toolkit.image_level import * ROOT_DIR = '' diff --git a/modules/image_level.py b/modules/image_level.py deleted file mode 100644 index 9b2997f..0000000 --- a/modules/image_level.py +++ /dev/null @@ -1,155 +0,0 @@ -import os -from modules.utils import * -from modules.downloader import * -from modules.show import * -from modules.csv_downloader import * - -from modules.utils import bcolors as bc - -def image_level(args, DEFAULT_OID_DIR): - - if not args.Dataset: - dataset_dir = os.path.join(DEFAULT_OID_DIR, 'Dataset_nl') - csv_dir = os.path.join(DEFAULT_OID_DIR, 'csv_folder_nl') - else: - dataset_dir = os.path.join(DEFAULT_OID_DIR, args.Dataset) - csv_dir = os.path.join(DEFAULT_OID_DIR, 'csv_folder_nl') - - name_file_class = 'class-descriptions.csv' - CLASSES_CSV = os.path.join(csv_dir, name_file_class) - - if args.sub is None: - print(bc.FAIL + 'Missing subset argument.' + bc.ENDC) - exit(1) - - if args.sub == 'h': - - file_list = ['train-annotations-human-imagelabels.csv', \ - 'validation-annotations-human-imagelabels.csv', \ - 'test-annotations-human-imagelabels.csv'] - - if args.sub == 'm': - - file_list = ['train-annotations-machine-imagelabels.csv', \ - 'validation-annotations-machine-imagelabels.csv', \ - 'test-annotations-machine-imagelabels.csv'] - - if args.sub == 'h' or args.sub == 'm': - - logo(args.command) - - if args.type_csv is None: - print(bc.FAIL + 'Missing type_csv argument.' + bc.ENDC) - exit(1) - if args.classes is None: - print(bc.FAIL + 'Missing classes argument.' + bc.ENDC) - exit(1) - if args.multiclasses is None: - args.multiclasses = 0 - - folder = ['train', 'validation', 'test'] - - if args.classes[0].endswith('.txt'): - with open(args.classes[0]) as f: - args.classes = f.readlines() - args.classes = [x.strip() for x in args.classes] - else: - args.classes = [arg.replace('_', ' ') for arg in args.classes] - - if args.multiclasses == '0': - - mkdirs(dataset_dir, csv_dir, args.classes, args.type_csv) - - for classes in args.classes: - - class_name = classes - - error_csv(name_file_class, csv_dir, args.yes) - df_classes = pd.read_csv(CLASSES_CSV, header=None) - - class_code = df_classes.loc[df_classes[1] == class_name].values[0][0] - - if args.type_csv == 'train': - name_file = file_list[0] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[0], dataset_dir, class_name, class_code) - else: - download(args, df_val, folder[0], dataset_dir, class_name, class_code, threads = int(args.n_threads)) - - elif args.type_csv == 'validation': - name_file = file_list[1] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[1], dataset_dir, class_name, class_code) - else: - download(args, df_val, folder[1], dataset_dir, class_name, class_code, threads = int(args.n_threads)) - - elif args.type_csv == 'test': - name_file = file_list[2] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[2], dataset_dir, class_name, class_code) - else: - download(args, df_val, folder[2], dataset_dir, class_name, class_code, threads = int(args.n_threads)) - - elif args.type_csv == 'all': - for i in range(3): - name_file = file_list[i] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[i], dataset_dir, class_name, class_code) - else: - download(args, df_val, folder[i], dataset_dir, class_name, class_code, threads = int(args.n_threads)) - else: - print(bc.FAIL + 'csv file not specified' + bc.ENDC) - exit(1) - - elif args.multiclasses == '1': - - class_list = args.classes - print(bc.INFO + "Downloading {} together.".format(class_list) + bc.ENDC) - multiclass_name = ['_'.join(class_list)] - mkdirs(dataset_dir, csv_dir, multiclass_name, args.type_csv) - - error_csv(name_file_class, csv_dir, args.yes) - df_classes = pd.read_csv(CLASSES_CSV, header=None) - - class_dict = {} - for class_name in class_list: - class_dict[class_name] = df_classes.loc[df_classes[1] == class_name].values[0][0] - - for class_name in class_list: - - if args.type_csv == 'train': - name_file = file_list[0] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[0], dataset_dir, class_name, class_dict[class_name], class_list) - else: - download(args, df_val, folder[0], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) - - elif args.type_csv == 'validation': - name_file = file_list[1] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[1], dataset_dir, class_name, class_dict[class_name], class_list) - else: - download(args, df_val, folder[1], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) - - elif args.type_csv == 'test': - name_file = file_list[2] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[2], dataset_dir, class_name, class_dict[class_name], class_list) - else: - download(args, df_val, folder[2], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) - - elif args.type_csv == 'all': - for i in range(3): - name_file = file_list[i] - df_val = TTV(csv_dir, name_file, args.yes) - if not args.n_threads: - download(args, df_val, folder[i], dataset_dir, class_name, class_dict[class_name], class_list) - else: - download(args, df_val, folder[i], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) diff --git a/oidv4_toolkit/__init__.py b/oidv4_toolkit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/oidv4_toolkit/__main__.py b/oidv4_toolkit/__main__.py new file mode 100644 index 0000000..22c8300 --- /dev/null +++ b/oidv4_toolkit/__main__.py @@ -0,0 +1,23 @@ +import os + +from oidv4_toolkit.parser import parser_arguments +from oidv4_toolkit.bounding_boxes import bounding_boxes_images +from oidv4_toolkit.image_level import image_level + +ROOT_DIR = '' +DEFAULT_OID_DIR = os.path.join(ROOT_DIR, 'OID') + + +def main(): + + args = parser_arguments() + + if args.command == 'downloader_ill': + image_level(args, DEFAULT_OID_DIR) + else: + bounding_boxes_images(args, DEFAULT_OID_DIR) + + +if __name__ == "__main__": + + main() diff --git a/modules/bounding_boxes.py b/oidv4_toolkit/bounding_boxes.py similarity index 97% rename from modules/bounding_boxes.py rename to oidv4_toolkit/bounding_boxes.py index 2a66e56..b27e4d7 100644 --- a/modules/bounding_boxes.py +++ b/oidv4_toolkit/bounding_boxes.py @@ -1,10 +1,10 @@ import os -from modules.utils import * -from modules.downloader import * -from modules.show import * -from modules.csv_downloader import * +from oidv4_toolkit.utils import * +from oidv4_toolkit.downloader import * +from oidv4_toolkit.show import * +from oidv4_toolkit.csv_downloader import * -from modules.utils import bcolors as bc +from oidv4_toolkit.utils import bcolors as bc def bounding_boxes_images(args, DEFAULT_OID_DIR): diff --git a/modules/csv_downloader.py b/oidv4_toolkit/csv_downloader.py similarity index 98% rename from modules/csv_downloader.py rename to oidv4_toolkit/csv_downloader.py index 7b2c29d..09035b8 100644 --- a/modules/csv_downloader.py +++ b/oidv4_toolkit/csv_downloader.py @@ -4,7 +4,7 @@ import urllib.request import pandas as pd -from modules.utils import bcolors as bc +from oidv4_toolkit.utils import bcolors as bc OID_URL = 'https://storage.googleapis.com/openimages/2018_04/' diff --git a/modules/downloader.py b/oidv4_toolkit/downloader.py similarity index 88% rename from modules/downloader.py rename to oidv4_toolkit/downloader.py index c09f93c..4f340e3 100644 --- a/modules/downloader.py +++ b/oidv4_toolkit/downloader.py @@ -1,10 +1,11 @@ import os import cv2 from tqdm import tqdm -from modules.utils import images_options -from modules.utils import bcolors as bc +from oidv4_toolkit.utils import images_options +from oidv4_toolkit.utils import bcolors as bc from multiprocessing.dummy import Pool as ThreadPool + def download(args, df_val, folder, dataset_dir, class_name, class_code, class_list=None, threads = 20): ''' Manage the download of the images and the label maker. @@ -18,19 +19,19 @@ def download(args, df_val, folder, dataset_dir, class_name, class_code, class_li :param threads: number of threads :return: None ''' - if os.name == 'posix': - rows, columns = os.popen('stty size', 'r').read().split() - elif os.name == 'nt': - try: - columns, rows = os.get_terminal_size(0) - except OSError: - columns, rows = os.get_terminal_size(1) - else: - columns = 50 - l = int((int(columns) - len(class_name))/2) - - print ('\n' + bc.HEADER + '-'*l + class_name + '-'*l + bc.ENDC) - print(bc.INFO + 'Downloading {} images.'.format(args.type_csv) + bc.ENDC) + # if os.name == 'posix': + # rows, columns = os.popen('stty size', 'r').read().split() + # elif os.name == 'nt': + # try: + # columns, rows = os.get_terminal_size(0) + # except OSError: + # columns, rows = os.get_terminal_size(1) + # else: + # columns = 50 + # l = int((int(columns) - len(class_name))/2) + # + # print ('\n' + bc.HEADER + '-'*l + class_name + '-'*l + bc.ENDC) + # print(bc.INFO + 'Downloading {} images.'.format(args.type_csv) + bc.ENDC) df_val_images = images_options(df_val, args) images_list = df_val_images['ImageID'][df_val_images.LabelName == class_code].values @@ -74,7 +75,7 @@ def download_img(folder, dataset_dir, class_name, images_list, threads): commands = [] for image in images_list: path = image_dir + '/' + str(image) + '.jpg ' + '"' + download_dir + '"' - command = 'aws s3 --no-sign-request --only-show-errors cp s3://open-images-dataset/' + path + command = 'aws s3 --no-sign-request --only-show-errors cp s3://open-images-dataset/' + path commands.append(command) list(tqdm(pool.imap(os.system, commands), total = len(commands) )) diff --git a/oidv4_toolkit/image_level.py b/oidv4_toolkit/image_level.py new file mode 100644 index 0000000..af6b53c --- /dev/null +++ b/oidv4_toolkit/image_level.py @@ -0,0 +1,156 @@ +import os +from oidv4_toolkit.utils import * +from oidv4_toolkit.downloader import * +from oidv4_toolkit.show import * +from oidv4_toolkit.csv_downloader import * + +from oidv4_toolkit.utils import bcolors as bc + + +def image_level(args, DEFAULT_OID_DIR): + + if not args.Dataset: + dataset_dir = os.path.join(DEFAULT_OID_DIR, 'Dataset_nl') + csv_dir = os.path.join(DEFAULT_OID_DIR, 'csv_folder_nl') + else: + dataset_dir = os.path.join(DEFAULT_OID_DIR, args.Dataset) + csv_dir = os.path.join(DEFAULT_OID_DIR, 'csv_folder_nl') + + name_file_class = 'class-descriptions.csv' + CLASSES_CSV = os.path.join(csv_dir, name_file_class) + + if args.sub is None: + print(bc.FAIL + 'Missing subset argument.' + bc.ENDC) + exit(1) + + if args.sub == 'h': + + file_list = ['train-annotations-human-imagelabels.csv', \ + 'validation-annotations-human-imagelabels.csv', \ + 'test-annotations-human-imagelabels.csv'] + + if args.sub == 'm': + + file_list = ['train-annotations-machine-imagelabels.csv', \ + 'validation-annotations-machine-imagelabels.csv', \ + 'test-annotations-machine-imagelabels.csv'] + + if args.sub == 'h' or args.sub == 'm': + + logo(args.command) + + if args.type_csv is None: + print(bc.FAIL + 'Missing type_csv argument.' + bc.ENDC) + exit(1) + if args.classes is None: + print(bc.FAIL + 'Missing classes argument.' + bc.ENDC) + exit(1) + if args.multiclasses is None: + args.multiclasses = 0 + + folder = ['train', 'validation', 'test'] + + if args.classes[0].endswith('.txt'): + with open(args.classes[0]) as f: + args.classes = f.readlines() + args.classes = [x.strip() for x in args.classes] + else: + args.classes = [arg.replace('_', ' ') for arg in args.classes] + + if args.multiclasses == '0': + + mkdirs(dataset_dir, csv_dir, args.classes, args.type_csv) + + for classes in args.classes: + + class_name = classes + + error_csv(name_file_class, csv_dir, args.yes) + df_classes = pd.read_csv(CLASSES_CSV, header=None) + + class_code = df_classes.loc[df_classes[1] == class_name].values[0][0] + + if args.type_csv == 'train': + name_file = file_list[0] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[0], dataset_dir, class_name, class_code) + else: + download(args, df_val, folder[0], dataset_dir, class_name, class_code, threads = int(args.n_threads)) + + elif args.type_csv == 'validation': + name_file = file_list[1] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[1], dataset_dir, class_name, class_code) + else: + download(args, df_val, folder[1], dataset_dir, class_name, class_code, threads = int(args.n_threads)) + + elif args.type_csv == 'test': + name_file = file_list[2] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[2], dataset_dir, class_name, class_code) + else: + download(args, df_val, folder[2], dataset_dir, class_name, class_code, threads = int(args.n_threads)) + + elif args.type_csv == 'all': + for i in range(3): + name_file = file_list[i] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[i], dataset_dir, class_name, class_code) + else: + download(args, df_val, folder[i], dataset_dir, class_name, class_code, threads = int(args.n_threads)) + else: + print(bc.FAIL + 'csv file not specified' + bc.ENDC) + exit(1) + + elif args.multiclasses == '1': + + class_list = args.classes + print(bc.INFO + "Downloading {} together.".format(class_list) + bc.ENDC) + multiclass_name = ['_'.join(class_list)] + mkdirs(dataset_dir, csv_dir, multiclass_name, args.type_csv) + + error_csv(name_file_class, csv_dir, args.yes) + df_classes = pd.read_csv(CLASSES_CSV, header=None) + + class_dict = {} + for class_name in class_list: + class_dict[class_name] = df_classes.loc[df_classes[1] == class_name].values[0][0] + + for class_name in class_list: + + if args.type_csv == 'train': + name_file = file_list[0] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[0], dataset_dir, class_name, class_dict[class_name], class_list) + else: + download(args, df_val, folder[0], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) + + elif args.type_csv == 'validation': + name_file = file_list[1] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[1], dataset_dir, class_name, class_dict[class_name], class_list) + else: + download(args, df_val, folder[1], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) + + elif args.type_csv == 'test': + name_file = file_list[2] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[2], dataset_dir, class_name, class_dict[class_name], class_list) + else: + download(args, df_val, folder[2], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) + + elif args.type_csv == 'all': + for i in range(3): + name_file = file_list[i] + df_val = TTV(csv_dir, name_file, args.yes) + if not args.n_threads: + download(args, df_val, folder[i], dataset_dir, class_name, class_dict[class_name], class_list) + else: + download(args, df_val, folder[i], dataset_dir, class_name, class_dict[class_name], class_list, int(args.n_threads)) diff --git a/modules/parser.py b/oidv4_toolkit/parser.py similarity index 66% rename from modules/parser.py rename to oidv4_toolkit/parser.py index cef592d..e8408e5 100644 --- a/modules/parser.py +++ b/oidv4_toolkit/parser.py @@ -1,58 +1,64 @@ import argparse + def parser_arguments(): - ''' + """ Manage the input from the terminal. :return: parser - ''' + """ parser = argparse.ArgumentParser(description='Open Image Dataset Downloader') parser.add_argument("command", - metavar=" 'downloader', 'visualizer' or 'ill_downloader'.", + metavar=" 'downloader', " + "'visualizer' or 'ill_downloader'.", help="'downloader', 'visualizer' or 'ill_downloader'.") parser.add_argument('--Dataset', required=False, metavar="/path/to/OID/csv/", help='Directory of the OID dataset folder') parser.add_argument('-y', '--yes', required=False, action='store_true', - #metavar="Yes to download missing files", + # metavar="Yes to download missing files", help='ans Yes to possible download of missing files') parser.add_argument('--classes', required=False, nargs='+', metavar="list of classes", help="Sequence of 'strings' of the wanted classes") - parser.add_argument('--type_csv', required=False, choices=['train', 'test', 'validation', 'all'], + parser.add_argument('--type_csv', required=False, + choices=['train', 'test', 'validation', 'all'], metavar="'train' or 'validation' or 'test' or 'all'", help='From what csv search the images') - parser.add_argument('--sub', required=False, choices=['h', 'm'], - metavar="Subset of human verified images or machine generated (h or m)", - help='Download from the human verified dataset or from the machine generated one.') - + metavar="Subset of human verified images or " + "machine generated (h or m)", + help='Download from the human verified dataset ' + 'or from the machine generated one.') parser.add_argument('--image_IsOccluded', required=False, choices=['0', '1'], metavar="1 or 0", - help='Optional characteristic of the images. Indicates that the object is occluded by another object in the image.') + help='Optional characteristic of the images. ' + 'Indicates that the object is occluded by ' + 'another object in the image.') parser.add_argument('--image_IsTruncated', required=False, choices=['0', '1'], metavar="1 or 0", - help='Optional characteristic of the images. Indicates that the object extends beyond the boundary of the image.') + help='Optional characteristic of the images. Indicates ' + 'that the object extends beyond the boundary of the image.') parser.add_argument('--image_IsGroupOf', required=False, choices=['0', '1'], metavar="1 or 0", - help='Optional characteristic of the images. Indicates that the box spans a group of objects (min 5).') + help='Optional characteristic of the images. ' + 'Indicates that the box spans a group of objects (min 5).') parser.add_argument('--image_IsDepiction', required=False, choices=['0', '1'], metavar="1 or 0", - help='Optional characteristic of the images. Indicates that the object is a depiction.') + help='Optional characteristic of the images. ' + 'Indicates that the object is a depiction.') parser.add_argument('--image_IsInside', required=False, choices=['0', '1'], metavar="1 or 0", - help='Optional characteristic of the images. Indicates a picture taken from the inside of the object.') - - parser.add_argument('--multiclasses', required=False, default='0', choices=['0', '1'], - metavar="0 (default) or 1", - help='Download different classes separately (0) or together (1)') - + help='Optional characteristic of the images. Indicates ' + 'a picture taken from the inside of the object.') + parser.add_argument('--multiclasses', required=False, default='0', + choices=['0', '1'], + metavar="0 (default) or 1", + help='Download different classes separately (0) or together (1)') parser.add_argument('--n_threads', required=False, metavar="[default 20]", - help='Num of the threads to use') - + help='Num of the threads to use') parser.add_argument('--noLabels', required=False, action='store_true', - help='No labels creations') - + help='No labels creations') parser.add_argument('--limit', required=False, type=int, default=None, metavar="integer number", help='Optional limit on number of images to download') diff --git a/modules/show.py b/oidv4_toolkit/show.py similarity index 93% rename from modules/show.py rename to oidv4_toolkit/show.py index a840901..0e604a0 100644 --- a/modules/show.py +++ b/oidv4_toolkit/show.py @@ -1,18 +1,20 @@ -import cv2 import os import re + +import cv2 import numpy as np class_list = [] color_dic = dict() flag = 0 + def color_gen(): ''' Generate a new color. As first color generates (0, 255, 0) ''' global flag - + if flag == 0: color = (0, 255, 0) flag += 1 @@ -21,6 +23,7 @@ def color_gen(): color = tuple(255 * np.random.rand(3)) return color + def show(class_name, download_dir, label_dir,total_images, index): ''' Show the images with the labeled boxes. @@ -31,7 +34,7 @@ def show(class_name, download_dir, label_dir,total_images, index): :param index: self explanatory :return: None ''' - + global class_list, color_dic if not os.listdir(download_dir)[index].endswith('.jpg'): @@ -50,17 +53,17 @@ def show(class_name, download_dir, label_dir,total_images, index): height = int((img.shape[0] * width) / img.shape[1]) cv2.resizeWindow(window_name, width, height) - for line in f: + for line in f: # each row in a file is class_name, XMin, YMix, XMax, YMax match_class_name = re.compile('^[a-zA-Z]+(\s+[a-zA-Z]+)*').match(line) class_name = line[:match_class_name.span()[1]] ax = line[match_class_name.span()[1]:].lstrip().rstrip().split(' ') - # opencv top left bottom right + # opencv top left bottom right if class_name not in class_list: class_list.append(class_name) - color = color_gen() - color_dic[class_name] = color + color = color_gen() + color_dic[class_name] = color font = cv2.FONT_HERSHEY_SIMPLEX r ,g, b = color_dic[class_name] diff --git a/modules/utils.py b/oidv4_toolkit/utils.py similarity index 97% rename from modules/utils.py rename to oidv4_toolkit/utils.py index baad371..1c5fb7e 100644 --- a/modules/utils.py +++ b/oidv4_toolkit/utils.py @@ -1,14 +1,14 @@ import os -from textwrap import dedent + def images_options(df_val, args): - ''' + """ Manage the options for the images downloader. :param df_val: DataFrame Value. :param args: argument parser. :return: modified df_val - ''' + """ if args.image_IsOccluded is not None: rejectedID = df_val.ImageID[df_val.IsOccluded != int(args.image_IsOccluded)].values df_val = df_val[~df_val.ImageID.isin(rejectedID)] @@ -31,19 +31,20 @@ def images_options(df_val, args): return df_val + def mkdirs(Dataset_folder, csv_folder, classes, type_csv): - ''' + """ Make the folder structure for the system. :param Dataset_folder: Self explanatory :param csv_folder: folder path of csv files :param classes: list of classes to download - :param type_csv: train, validation, test or all + :param type_csv: train, validation, test or all :return: None - ''' + """ directory_list = ['train', 'validation', 'test'] - + if not type_csv == 'all': for class_name in classes: if not Dataset_folder.endswith('_nl'): @@ -72,14 +73,15 @@ def mkdirs(Dataset_folder, csv_folder, classes, type_csv): if not os.path.exists(csv_folder): os.makedirs(csv_folder) + def progression_bar(total_images, index): - ''' + """ Print the progression bar for the download of the images. :param total_images: self explanatory :param index: self explanatory :return: None - ''' + """ # for windows os if os.name == 'nt': from ctypes import windll, create_string_buffer @@ -107,18 +109,20 @@ def progression_bar(total_images, index): bar = "[{}{}] {}/{}".format('-' * index, ' ' * (toolbar_width - index), image_index, total_images) print(bar.rjust(int(columns)), end='\r') + def show_classes(classes): - '''imag + """imag Show the downloaded classes in the selected folder during visualization mode - ''' + """ for n in classes: print("- {}".format(n)) print("\n") + def logo(command): - ''' + """ Print the logo for the downloader and the visualizer when selected - ''' + """ bc = bcolors print(bc.OKGREEN + """ @@ -165,13 +169,14 @@ def logo(command): ''' + bc.ENDC) + class bcolors: HEADER = '\033[95m' - + INFO = ' [INFO] | ' OKBLUE = '\033[94m[DOWNLOAD] | ' WARNING = '\033[93m [WARN] | ' FAIL = '\033[91m [ERROR] | ' OKGREEN = '\033[92m' - ENDC = '\033[0m' \ No newline at end of file + ENDC = '\033[0m' diff --git a/requirements.txt b/requirements.txt index c222e63..7616547 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,18 @@ -pandas -numpy -awscli - -urllib3 - -tqdm - -opencv-python \ No newline at end of file +awscli==1.16.230 +botocore==1.12.220 +certifi==2019.6.16 +colorama==0.3.9 +docutils==0.15.2 +jmespath==0.9.4 +numpy==1.22.0 +opencv-python>=4.2.0.32 +pandas==0.25.1 +pyasn1==0.4.7 +python-dateutil==2.8.0 +pytz==2019.2 +PyYAML==5.4 +rsa==4.7 +s3transfer==0.2.1 +six==1.12.0 +tqdm==4.35.0 +urllib3==1.26.5 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f186377 --- /dev/null +++ b/setup.py @@ -0,0 +1,48 @@ +import pathlib +from setuptools import setup + +# the directory containing this file +BASE_DIR = pathlib.Path(__file__).parent + +# the text of the README file +README = (BASE_DIR / "README.md").read_text() + +setup( + name="OIDv4_ToolKit", + version="1.0.0", + url="https://github.com/EscVM/OIDv4_ToolKit", + license="GPL3", + author="Vittorio, Angelo", + author_email="EscVM@github.com", + description=( + "Toolkit to facilitate the download and usage of " + "the Open Images Dataset (v4)." + ), + long_description=README, + long_description_content_type="text/markdown", + packages=["oidv4_toolkit"], + include_package_data=True, + install_requires=[ + "awscli", + "pandas", + "opencv-python", + "tqdm", + ], + keywords=[ + "openimages", + ], + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + ], + entry_points={ + "console_scripts": [ + "oidv4_toolkit = oidv4_toolkit.__main__:main", + ], + } +)