diff --git a/.gitignore b/.gitignore index 69435bb8..c0f4fe14 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ *.pyc .ipynb_checkpoints +images +test_images +*.avi +train.record +val.record +object_detection diff --git a/create_tf_record.py b/create_tf_record.py index 9774a306..e0969e19 100644 --- a/create_tf_record.py +++ b/create_tf_record.py @@ -148,6 +148,7 @@ def create_tf_record(output_filename, writer.close() def main(_): + logging.getLogger().setLevel(logging.INFO) label_map_dict = label_map_util.get_label_map_dict('annotations/label_map.pbtxt') logging.info('Reading from Pet dataset.') diff --git a/extract_towncentre.py b/extract_towncentre.py index 5d3f64a4..52397ce5 100644 --- a/extract_towncentre.py +++ b/extract_towncentre.py @@ -1,23 +1,30 @@ import os import cv2 import numpy as np +import logging as log #Dataset from http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009bbenfold_headpose/project.html#datasets -def video2im(src, train_path='images', test_path='test_images', factor=2): +def video2im(src='TownCentreXVID.avi', train_path='images', test_path='test_images', factor=2): """ Extracts all frames from a video and saves them as jpgs """ - os.mkdir(train_path) - os.mkdir(test_path) + try: + os.mkdir(train_path) + os.mkdir(test_path) + except FileExistsError as fee: + log.error(f"Error creating output directories - {fee.strerror}: {fee.filename}") + logging.getLogger().setLevel(logging.INFO) + log.info("delete or rename offending directory") + return frame = 0 cap = cv2.VideoCapture(src) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - + print('Total Frame Count:', length ) - + while True: check, img = cap.read() if check: @@ -25,17 +32,46 @@ def video2im(src, train_path='images', test_path='test_images', factor=2): path = train_path else: path = test_path - + img = cv2.resize(img, (1920 // factor, 1080 // factor)) cv2.imwrite(os.path.join(path, str(frame) + ".jpg"), img) frame += 1 print('Processed: ',frame, end = '\r') - + else: break - + cap.release() +def validate_video_path(path): + """ + returns a tuple. first element of the tuple indicates whether the validation succeeded + second element is an optional logging message + """ + if os.path.exists(path): + return (True, f"Processing {path}...") + else: + return (False, f"{path} does not exist") + +# validator=validate_video_path, processor=video2i +def process_video_cmd_args(argv, validator=validate_video_path, processor=video2im): + """ + calls video2im() with validated path or none (default path to be used) when there are no args passed + """ + try: + path = argv[1] + status, msg = validator(path) + if status: + processor(src=path) + log.info(msg) + else: + log.error(msg) + except IndexError: + log.warning('Video file path was not passed to script arguements, trying default location, name') + processor() + + if __name__ == '__main__': - video2im('TownCentreXVID.avi') + import sys + process_video_cmd_args(sys.argv, validate_video_path, video2im) diff --git a/object_detection/BUILD b/object_detection/BUILD deleted file mode 100644 index df835b74..00000000 --- a/object_detection/BUILD +++ /dev/null @@ -1,136 +0,0 @@ -# Tensorflow Object Detection API: main runnables. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 -py_binary( - name = "train", - srcs = [ - "train.py", - ], - deps = [ - ":trainer", - "//tensorflow", - "//tensorflow_models/object_detection/builders:input_reader_builder", - "//tensorflow_models/object_detection/builders:model_builder", - "//tensorflow_models/object_detection/utils:config_util", - ], -) - -py_library( - name = "trainer", - srcs = ["trainer.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/builders:optimizer_builder", - "//tensorflow_models/object_detection/builders:preprocessor_builder", - "//tensorflow_models/object_detection/core:batcher", - "//tensorflow_models/object_detection/core:preprocessor", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/object_detection/utils:variables_helper", - "//tensorflow_models/slim:model_deploy", - ], -) - -py_test( - name = "trainer_test", - srcs = ["trainer_test.py"], - deps = [ - ":trainer", - "//tensorflow", - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/core:model", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/protos:train_py_pb2", - ], -) - -py_library( - name = "eval_util", - srcs = [ - "eval_util.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:box_list_ops", - "//tensorflow_models/object_detection/core:keypoint_ops", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:label_map_util", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/object_detection/utils:visualization_utils", - ], -) - -py_library( - name = "evaluator", - srcs = ["evaluator.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection:eval_util", - "//tensorflow_models/object_detection/core:prefetcher", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/protos:eval_py_pb2", - "//tensorflow_models/object_detection/utils:object_detection_evaluation", - ], -) - -py_binary( - name = "eval", - srcs = [ - "eval.py", - ], - deps = [ - ":evaluator", - "//tensorflow", - "//tensorflow_models/object_detection/builders:input_reader_builder", - "//tensorflow_models/object_detection/builders:model_builder", - "//tensorflow_models/object_detection/utils:config_util", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) - -py_library( - name = "exporter", - srcs = [ - "exporter.py", - ], - deps = [ - "//tensorflow", - "//tensorflow/python/tools:freeze_graph_lib", - "//tensorflow_models/object_detection/builders:model_builder", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/data_decoders:tf_example_decoder", - ], -) - -py_test( - name = "exporter_test", - srcs = [ - "exporter_test.py", - ], - deps = [ - ":exporter", - "//tensorflow", - "//tensorflow_models/object_detection/builders:model_builder", - "//tensorflow_models/object_detection/core:model", - "//tensorflow_models/object_detection/protos:pipeline_py_pb2", - ], -) - -py_binary( - name = "export_inference_graph", - srcs = [ - "export_inference_graph.py", - ], - deps = [ - ":exporter", - "//tensorflow", - "//tensorflow_models/object_detection/protos:pipeline_py_pb2", - ], -) diff --git a/object_detection/CONTRIBUTING.md b/object_detection/CONTRIBUTING.md deleted file mode 100644 index e3d87e3c..00000000 --- a/object_detection/CONTRIBUTING.md +++ /dev/null @@ -1,13 +0,0 @@ -# Contributing to the Tensorflow Object Detection API - -Patches to Tensorflow Object Detection API are welcome! - -We require contributors to fill out either the individual or corporate -Contributor License Agreement (CLA). - - * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html). - * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html). - -Please follow the -[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) -when submitting pull requests. diff --git a/object_detection/README.md b/object_detection/README.md deleted file mode 100644 index a0d8ddf2..00000000 --- a/object_detection/README.md +++ /dev/null @@ -1,155 +0,0 @@ - -# Tensorflow Object Detection API -Creating accurate machine learning models capable of localizing and identifying -multiple objects in a single image remains a core challenge in computer vision. -The TensorFlow Object Detection API is an open source framework built on top of -TensorFlow that makes it easy to construct, train and deploy object detection -models. At Google we’ve certainly found this codebase to be useful for our -computer vision needs, and we hope that you will as well. -

- -

-Contributions to the codebase are welcome and we would love to hear back from -you if you find this API useful. Finally if you use the Tensorflow Object -Detection API for a research publication, please consider citing: - -``` -"Speed/accuracy trade-offs for modern convolutional object detectors." -Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, -Song Y, Guadarrama S, Murphy K, CVPR 2017 -``` -\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex]( -https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\] - -

- -

- -## Maintainers - -* Jonathan Huang, github: [jch1](https://github.com/jch1) -* Vivek Rathod, github: [tombstone](https://github.com/tombstone) -* Derek Chow, github: [derekjchow](https://github.com/derekjchow) -* Chen Sun, github: [jesu9](https://github.com/jesu9) -* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon) - - -## Table of contents - -Quick Start: - - * - Quick Start: Jupyter notebook for off-the-shelf inference
- * Quick Start: Training a pet detector
- -Setup: - - * Installation
- * - Configuring an object detection pipeline
- * Preparing inputs
- -Running: - - * Running locally
- * Running on the cloud
- -Extras: - - * Tensorflow detection model zoo
- * - Exporting a trained model for inference
- * - Defining your own model architecture
- * - Bringing in your own dataset
- * - Supported object detection evaluation protocols
- * - Inference and evaluation on the Open Images dataset
- -## Getting Help - -To get help with issues you may encounter using the Tensorflow Object Detection -API, create a new question on [StackOverflow](https://stackoverflow.com/) with -the tags "tensorflow" and "object-detection". - -Please report bugs (actually broken code, not usage questions) to the -tensorflow/models Github -[issue tracker](https://github.com/tensorflow/models/issues), prefixing the -issue name with "object_detection". - - - -## Release information - -### November 17, 2017 - -As a part of the Open Images V3 release we have released: - -* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images). -* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)). -* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)). - -See more information on the [Open Images website](https://github.com/openimages/dataset)! - -Thanks to contributors: Stefan Popov, Alina Kuznetsova - -### November 6, 2017 - -We have re-released faster versions of our (pre-trained) models in the -model zoo. In addition to what -was available before, we are also adding Faster R-CNN models trained on COCO -with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN -with Resnet-101 model trained on the KITTI dataset. - -Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, -Tal Remez, Chen Sun. - -### October 31, 2017 - -We have released a new state-of-the-art model for object detection using -the Faster-RCNN with the -[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This -model achieves mAP of 43.1% on the test-dev validation dataset for COCO, -improving on the best available model in the zoo by 6% in terms -of absolute mAP. - -Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le - -### August 11, 2017 - -We have released an update to the [Android Detect -demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android) -which will now run models trained using the Tensorflow Object -Detection API on an Android device. By default, it currently runs a -frozen SSD w/Mobilenet detector trained on COCO, but we encourage -you to try out other detection models! - -Thanks to contributors: Jonathan Huang, Andrew Harp - - -### June 15, 2017 - -In addition to our base Tensorflow detection model definitions, this -release includes: - -* A selection of trainable detection models, including: - * Single Shot Multibox Detector (SSD) with MobileNet, - * SSD with Inception V2, - * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101, - * Faster RCNN with Resnet 101, - * Faster RCNN with Inception Resnet v2 -* Frozen weights (trained on the COCO dataset) for each of the above models to - be used for out-of-the-box inference purposes. -* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing - out-of-the-box inference with one of our released models -* Convenient [local training](g3doc/running_locally.md) scripts as well as - distributed training and evaluation pipelines via - [Google Cloud](g3doc/running_on_cloud.md). - - -Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, -Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, -Viacheslav Kovalevskyi, Kevin Murphy - diff --git a/object_detection/__init__.py b/object_detection/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/__pycache__/__init__.cpython-35.pyc b/object_detection/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index a9a89667..00000000 Binary files a/object_detection/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/__pycache__/exporter.cpython-35.pyc b/object_detection/__pycache__/exporter.cpython-35.pyc deleted file mode 100644 index b0148785..00000000 Binary files a/object_detection/__pycache__/exporter.cpython-35.pyc and /dev/null differ diff --git a/object_detection/anchor_generators/BUILD b/object_detection/anchor_generators/BUILD deleted file mode 100644 index cb421a0c..00000000 --- a/object_detection/anchor_generators/BUILD +++ /dev/null @@ -1,56 +0,0 @@ -# Tensorflow Object Detection API: Anchor Generator implementations. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 -py_library( - name = "grid_anchor_generator", - srcs = [ - "grid_anchor_generator.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:anchor_generator", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/utils:ops", - ], -) - -py_test( - name = "grid_anchor_generator_test", - srcs = [ - "grid_anchor_generator_test.py", - ], - deps = [ - ":grid_anchor_generator", - "//tensorflow", - ], -) - -py_library( - name = "multiple_grid_anchor_generator", - srcs = [ - "multiple_grid_anchor_generator.py", - ], - deps = [ - ":grid_anchor_generator", - "//tensorflow", - "//tensorflow_models/object_detection/core:anchor_generator", - "//tensorflow_models/object_detection/core:box_list_ops", - ], -) - -py_test( - name = "multiple_grid_anchor_generator_test", - srcs = [ - "multiple_grid_anchor_generator_test.py", - ], - deps = [ - ":multiple_grid_anchor_generator", - "//third_party/py/numpy", - ], -) diff --git a/object_detection/anchor_generators/__init__.py b/object_detection/anchor_generators/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 23341928..00000000 Binary files a/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc deleted file mode 100644 index 1d43e0c5..00000000 Binary files a/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc and /dev/null differ diff --git a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc deleted file mode 100644 index e3e39400..00000000 Binary files a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc and /dev/null differ diff --git a/object_detection/anchor_generators/grid_anchor_generator.py b/object_detection/anchor_generators/grid_anchor_generator.py deleted file mode 100644 index d2ea2c07..00000000 --- a/object_detection/anchor_generators/grid_anchor_generator.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates grid anchors on the fly as used in Faster RCNN. - -Generates grid anchors on the fly as described in: -"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" -Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. -""" - -import tensorflow as tf - -from object_detection.core import anchor_generator -from object_detection.core import box_list -from object_detection.utils import ops - - -class GridAnchorGenerator(anchor_generator.AnchorGenerator): - """Generates a grid of anchors at given scales and aspect ratios.""" - - def __init__(self, - scales=(0.5, 1.0, 2.0), - aspect_ratios=(0.5, 1.0, 2.0), - base_anchor_size=None, - anchor_stride=None, - anchor_offset=None): - """Constructs a GridAnchorGenerator. - - Args: - scales: a list of (float) scales, default=(0.5, 1.0, 2.0) - aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0) - base_anchor_size: base anchor size as height, width ( - (length-2 float32 list, default=[256, 256]) - anchor_stride: difference in centers between base anchors for adjacent - grid positions (length-2 float32 list, default=[16, 16]) - anchor_offset: center of the anchor with scale and aspect ratio 1 for the - upper left element of the grid, this should be zero for - feature networks with only VALID padding and even receptive - field size, but may need additional calculation if other - padding is used (length-2 float32 tensor, default=[0, 0]) - """ - # Handle argument defaults - if base_anchor_size is None: - base_anchor_size = [256, 256] - base_anchor_size = tf.constant(base_anchor_size, tf.float32) - if anchor_stride is None: - anchor_stride = [16, 16] - anchor_stride = tf.constant(anchor_stride, dtype=tf.float32) - if anchor_offset is None: - anchor_offset = [0, 0] - anchor_offset = tf.constant(anchor_offset, dtype=tf.float32) - - self._scales = scales - self._aspect_ratios = aspect_ratios - self._base_anchor_size = base_anchor_size - self._anchor_stride = anchor_stride - self._anchor_offset = anchor_offset - - def name_scope(self): - return 'GridAnchorGenerator' - - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the `generate` function. - """ - return [len(self._scales) * len(self._aspect_ratios)] - - def _generate(self, feature_map_shape_list): - """Generates a collection of bounding boxes to be used as anchors. - - Args: - feature_map_shape_list: list of pairs of convnet layer resolutions in the - format [(height_0, width_0)]. For example, setting - feature_map_shape_list=[(8, 8)] asks for anchors that correspond - to an 8x8 layer. For this anchor generator, only lists of length 1 are - allowed. - - Returns: - boxes: a BoxList holding a collection of N anchor boxes - Raises: - ValueError: if feature_map_shape_list, box_specs_list do not have the same - length. - ValueError: if feature_map_shape_list does not consist of pairs of - integers - """ - if not (isinstance(feature_map_shape_list, list) - and len(feature_map_shape_list) == 1): - raise ValueError('feature_map_shape_list must be a list of length 1.') - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in feature_map_shape_list]): - raise ValueError('feature_map_shape_list must be a list of pairs.') - grid_height, grid_width = feature_map_shape_list[0] - scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales, - self._aspect_ratios) - scales_grid = tf.reshape(scales_grid, [-1]) - aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1]) - return tile_anchors(grid_height, - grid_width, - scales_grid, - aspect_ratios_grid, - self._base_anchor_size, - self._anchor_stride, - self._anchor_offset) - - -def tile_anchors(grid_height, - grid_width, - scales, - aspect_ratios, - base_anchor_size, - anchor_stride, - anchor_offset): - """Create a tiled set of anchors strided along a grid in image space. - - This op creates a set of anchor boxes by placing a "basis" collection of - boxes with user-specified scales and aspect ratios centered at evenly - distributed points along a grid. The basis collection is specified via the - scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] - and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale - .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 - and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before - placing it over its respective center. - - Grid points are specified via grid_height, grid_width parameters as well as - the anchor_stride and anchor_offset parameters. - - Args: - grid_height: size of the grid in the y direction (int or int scalar tensor) - grid_width: size of the grid in the x direction (int or int scalar tensor) - scales: a 1-d (float) tensor representing the scale of each box in the - basis set. - aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each - box in the basis set. The length of the scales and aspect_ratios tensors - must be equal. - base_anchor_size: base anchor size as [height, width] - (float tensor of shape [2]) - anchor_stride: difference in centers between base anchors for adjacent grid - positions (float tensor of shape [2]) - anchor_offset: center of the anchor with scale and aspect ratio 1 for the - upper left element of the grid, this should be zero for - feature networks with only VALID padding and even receptive - field size, but may need some additional calculation if other - padding is used (float tensor of shape [2]) - Returns: - a BoxList holding a collection of N anchor boxes - """ - ratio_sqrts = tf.sqrt(aspect_ratios) - heights = scales / ratio_sqrts * base_anchor_size[0] - widths = scales * ratio_sqrts * base_anchor_size[1] - - # Get a grid of box centers - y_centers = tf.to_float(tf.range(grid_height)) - y_centers = y_centers * anchor_stride[0] + anchor_offset[0] - x_centers = tf.to_float(tf.range(grid_width)) - x_centers = x_centers * anchor_stride[1] + anchor_offset[1] - x_centers, y_centers = ops.meshgrid(x_centers, y_centers) - - widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers) - heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers) - bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3) - bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3) - bbox_centers = tf.reshape(bbox_centers, [-1, 2]) - bbox_sizes = tf.reshape(bbox_sizes, [-1, 2]) - bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) - return box_list.BoxList(bbox_corners) - - -def _center_size_bbox_to_corners_bbox(centers, sizes): - """Converts bbox center-size representation to corners representation. - - Args: - centers: a tensor with shape [N, 2] representing bounding box centers - sizes: a tensor with shape [N, 2] representing bounding boxes - - Returns: - corners: tensor with shape [N, 4] representing bounding boxes in corners - representation - """ - return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1) diff --git a/object_detection/anchor_generators/grid_anchor_generator_test.py b/object_detection/anchor_generators/grid_anchor_generator_test.py deleted file mode 100644 index 80a82a39..00000000 --- a/object_detection/anchor_generators/grid_anchor_generator_test.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.grid_anchor_generator.""" - -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator - - -class GridAnchorGeneratorTest(tf.test.TestCase): - - def test_construct_single_anchor(self): - """Builds a 1x1 anchor grid to test the size of the output boxes.""" - scales = [0.5, 1.0, 2.0] - aspect_ratios = [0.25, 1.0, 4.0] - anchor_offset = [7, -3] - exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], - [-505, -131, 519, 125], [-57, -67, 71, 61], - [-121, -131, 135, 125], [-249, -259, 263, 253], - [-25, -131, 39, 125], [-57, -259, 71, 253], - [-121, -515, 135, 509]] - - anchor_generator = grid_anchor_generator.GridAnchorGenerator( - scales, aspect_ratios, - anchor_offset=anchor_offset) - anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid(self): - base_anchor_size = [10, 10] - anchor_stride = [19, 19] - anchor_offset = [0, 0] - scales = [0.5, 1.0, 2.0] - aspect_ratios = [1.0] - - exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], - [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], - [-5., 14., 5, 24], [-10., 9., 10, 29], - [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], - [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], - [14., 14., 24, 24], [9., 9., 29, 29]] - - anchor_generator = grid_anchor_generator.GridAnchorGenerator( - scales, - aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=anchor_stride, - anchor_offset=anchor_offset) - - anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator.py b/object_detection/anchor_generators/multiple_grid_anchor_generator.py deleted file mode 100644 index b49f12dc..00000000 --- a/object_detection/anchor_generators/multiple_grid_anchor_generator.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates grid anchors on the fly corresponding to multiple CNN layers. - -Generates grid anchors on the fly corresponding to multiple CNN layers as -described in: -"SSD: Single Shot MultiBox Detector" -Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, -Cheng-Yang Fu, Alexander C. Berg -(see Section 2.2: Choosing scales and aspect ratios for default boxes) -""" - -import numpy as np - -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.core import anchor_generator -from object_detection.core import box_list_ops - - -class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator): - """Generate a grid of anchors for multiple CNN layers.""" - - def __init__(self, - box_specs_list, - base_anchor_size=None, - anchor_strides=None, - anchor_offsets=None, - clip_window=None): - """Constructs a MultipleGridAnchorGenerator. - - To construct anchors, at multiple grid resolutions, one must provide a - list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid - size, a corresponding list of (scale, aspect ratio) box specifications. - - For example: - box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid - [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid - - To support the fully convolutional setting, we pass grid sizes in at - generation time, while scale and aspect ratios are fixed at construction - time. - - Args: - box_specs_list: list of list of (scale, aspect ratio) pairs with the - outside list having the same number of entries as feature_map_shape_list - (which is passed in at generation time). - base_anchor_size: base anchor size as [height, width] - (length-2 float tensor, default=[1.0, 1.0]). - The height and width values are normalized to the - minimum dimension of the input height and width, so that - when the base anchor height equals the base anchor - width, the resulting anchor is square even if the input - image is not square. - anchor_strides: list of pairs of strides in pixels (in y and x directions - respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] - means that we want the anchors corresponding to the first layer to be - strided by 25 pixels and those in the second layer to be strided by 50 - pixels in both y and x directions. If anchor_strides=None, they are set - to be the reciprocal of the corresponding feature map shapes. - anchor_offsets: list of pairs of offsets in pixels (in y and x directions - respectively). The offset specifies where we want the center of the - (0, 0)-th anchor to lie for each layer. For example, setting - anchor_offsets=[(10, 10), (20, 20)]) means that we want the - (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space - and likewise that we want the (0, 0)-th anchor of the second layer to - lie at (25, 25) in pixel space. If anchor_offsets=None, then they are - set to be half of the corresponding anchor stride. - clip_window: a tensor of shape [4] specifying a window to which all - anchors should be clipped. If clip_window is None, then no clipping - is performed. - - Raises: - ValueError: if box_specs_list is not a list of list of pairs - ValueError: if clip_window is not either None or a tensor of shape [4] - """ - if isinstance(box_specs_list, list) and all( - [isinstance(list_item, list) for list_item in box_specs_list]): - self._box_specs = box_specs_list - else: - raise ValueError('box_specs_list is expected to be a ' - 'list of lists of pairs') - if base_anchor_size is None: - base_anchor_size = tf.constant([256, 256], dtype=tf.float32) - self._base_anchor_size = base_anchor_size - self._anchor_strides = anchor_strides - self._anchor_offsets = anchor_offsets - if clip_window is not None and clip_window.get_shape().as_list() != [4]: - raise ValueError('clip_window must either be None or a shape [4] tensor') - self._clip_window = clip_window - self._scales = [] - self._aspect_ratios = [] - for box_spec in self._box_specs: - if not all([isinstance(entry, tuple) and len(entry) == 2 - for entry in box_spec]): - raise ValueError('box_specs_list is expected to be a ' - 'list of lists of pairs') - scales, aspect_ratios = zip(*box_spec) - self._scales.append(scales) - self._aspect_ratios.append(aspect_ratios) - - for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets], - ['anchor_strides', 'anchor_offsets']): - if arg and not (isinstance(arg, list) and - len(arg) == len(self._box_specs)): - raise ValueError('%s must be a list with the same length ' - 'as self._box_specs' % arg_name) - if arg and not all([ - isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in arg - ]): - raise ValueError('%s must be a list of pairs.' % arg_name) - - def name_scope(self): - return 'MultipleGridAnchorGenerator' - - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the Generate function. - """ - return [len(box_specs) for box_specs in self._box_specs] - - def _generate(self, feature_map_shape_list, im_height=1, im_width=1): - """Generates a collection of bounding boxes to be used as anchors. - - The number of anchors generated for a single grid with shape MxM where we - place k boxes over each grid center is k*M^2 and thus the total number of - anchors is the sum over all grids. In our box_specs_list example - (see the constructor docstring), we would place two boxes over each grid - point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and - thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the - output anchors follows the order of how the grid sizes and box_specs are - specified (with box_spec index varying the fastest, followed by width - index, then height index, then grid index). - - Args: - feature_map_shape_list: list of pairs of convnet layer resolutions in the - format [(height_0, width_0), (height_1, width_1), ...]. For example, - setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that - correspond to an 8x8 layer followed by a 7x7 layer. - im_height: the height of the image to generate the grid for. If both - im_height and im_width are 1, the generated anchors default to - normalized coordinates, otherwise absolute coordinates are used for the - grid. - im_width: the width of the image to generate the grid for. If both - im_height and im_width are 1, the generated anchors default to - normalized coordinates, otherwise absolute coordinates are used for the - grid. - - Returns: - boxes: a BoxList holding a collection of N anchor boxes - Raises: - ValueError: if feature_map_shape_list, box_specs_list do not have the same - length. - ValueError: if feature_map_shape_list does not consist of pairs of - integers - """ - if not (isinstance(feature_map_shape_list, list) - and len(feature_map_shape_list) == len(self._box_specs)): - raise ValueError('feature_map_shape_list must be a list with the same ' - 'length as self._box_specs') - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in feature_map_shape_list]): - raise ValueError('feature_map_shape_list must be a list of pairs.') - - im_height = tf.to_float(im_height) - im_width = tf.to_float(im_width) - - if not self._anchor_strides: - anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1])) - for pair in feature_map_shape_list] - else: - anchor_strides = [(tf.to_float(stride[0]) / im_height, - tf.to_float(stride[1]) / im_width) - for stride in self._anchor_strides] - if not self._anchor_offsets: - anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) - for stride in anchor_strides] - else: - anchor_offsets = [(tf.to_float(offset[0]) / im_height, - tf.to_float(offset[1]) / im_width) - for offset in self._anchor_offsets] - - for arg, arg_name in zip([anchor_strides, anchor_offsets], - ['anchor_strides', 'anchor_offsets']): - if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): - raise ValueError('%s must be a list with the same length ' - 'as self._box_specs' % arg_name) - if not all([isinstance(list_item, tuple) and len(list_item) == 2 - for list_item in arg]): - raise ValueError('%s must be a list of pairs.' % arg_name) - - anchor_grid_list = [] - min_im_shape = tf.minimum(im_height, im_width) - scale_height = min_im_shape / im_height - scale_width = min_im_shape / im_width - base_anchor_size = [ - scale_height * self._base_anchor_size[0], - scale_width * self._base_anchor_size[1] - ] - for grid_size, scales, aspect_ratios, stride, offset in zip( - feature_map_shape_list, self._scales, self._aspect_ratios, - anchor_strides, anchor_offsets): - anchor_grid_list.append( - grid_anchor_generator.tile_anchors( - grid_height=grid_size[0], - grid_width=grid_size[1], - scales=scales, - aspect_ratios=aspect_ratios, - base_anchor_size=base_anchor_size, - anchor_stride=stride, - anchor_offset=offset)) - concatenated_anchors = box_list_ops.concatenate(anchor_grid_list) - num_anchors = concatenated_anchors.num_boxes_static() - if num_anchors is None: - num_anchors = concatenated_anchors.num_boxes() - if self._clip_window is not None: - concatenated_anchors = box_list_ops.clip_to_window( - concatenated_anchors, self._clip_window, filter_nonoverlapping=False) - # TODO(jonathanhuang): make reshape an option for the clip_to_window op - concatenated_anchors.set( - tf.reshape(concatenated_anchors.get(), [num_anchors, 4])) - - stddevs_tensor = 0.01 * tf.ones( - [num_anchors, 4], dtype=tf.float32, name='stddevs') - concatenated_anchors.add_field('stddev', stddevs_tensor) - - return concatenated_anchors - - -def create_ssd_anchors(num_layers=6, - min_scale=0.2, - max_scale=0.95, - scales=None, - aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), - interpolated_scale_aspect_ratio=1.0, - base_anchor_size=None, - anchor_strides=None, - anchor_offsets=None, - reduce_boxes_in_lowest_layer=True): - """Creates MultipleGridAnchorGenerator for SSD anchors. - - This function instantiates a MultipleGridAnchorGenerator that reproduces - ``default box`` construction proposed by Liu et al in the SSD paper. - See Section 2.2 for details. Grid sizes are assumed to be passed in - at generation time from finest resolution to coarsest resolution --- this is - used to (linearly) interpolate scales of anchor boxes corresponding to the - intermediate grid sizes. - - Anchors that are returned by calling the `generate` method on the returned - MultipleGridAnchorGenerator object are always in normalized coordinates - and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]). - - Args: - num_layers: integer number of grid layers to create anchors for (actual - grid sizes passed in at generation time) - min_scale: scale of anchors corresponding to finest resolution (float) - max_scale: scale of anchors corresponding to coarsest resolution (float) - scales: As list of anchor scales to use. When not None and not emtpy, - min_scale and max_scale are not used. - aspect_ratios: list or tuple of (float) aspect ratios to place on each - grid point. - interpolated_scale_aspect_ratio: An additional anchor is added with this - aspect ratio and a scale interpolated between the scale for a layer - and the scale for the next layer (1.0 for the last layer). - This anchor is not included if this value is 0. - base_anchor_size: base anchor size as [height, width]. - The height and width values are normalized to the minimum dimension of the - input height and width, so that when the base anchor height equals the - base anchor width, the resulting anchor is square even if the input image - is not square. - anchor_strides: list of pairs of strides in pixels (in y and x directions - respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] - means that we want the anchors corresponding to the first layer to be - strided by 25 pixels and those in the second layer to be strided by 50 - pixels in both y and x directions. If anchor_strides=None, they are set to - be the reciprocal of the corresponding feature map shapes. - anchor_offsets: list of pairs of offsets in pixels (in y and x directions - respectively). The offset specifies where we want the center of the - (0, 0)-th anchor to lie for each layer. For example, setting - anchor_offsets=[(10, 10), (20, 20)]) means that we want the - (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space - and likewise that we want the (0, 0)-th anchor of the second layer to lie - at (25, 25) in pixel space. If anchor_offsets=None, then they are set to - be half of the corresponding anchor stride. - reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3 - boxes per location is used in the lowest layer. - - Returns: - a MultipleGridAnchorGenerator - """ - if base_anchor_size is None: - base_anchor_size = [1.0, 1.0] - base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32) - box_specs_list = [] - if scales is None or not scales: - scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) - for i in range(num_layers)] + [1.0] - else: - # Add 1.0 to the end, which will only be used in scale_next below and used - # for computing an interpolated scale for the largest scale in the list. - scales += [1.0] - - for layer, scale, scale_next in zip( - range(num_layers), scales[:-1], scales[1:]): - layer_box_specs = [] - if layer == 0 and reduce_boxes_in_lowest_layer: - layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)] - else: - for aspect_ratio in aspect_ratios: - layer_box_specs.append((scale, aspect_ratio)) - # Add one more anchor, with a scale between the current scale, and the - # scale for the next layer, with a specified aspect ratio (1.0 by - # default). - if interpolated_scale_aspect_ratio > 0.0: - layer_box_specs.append((np.sqrt(scale*scale_next), - interpolated_scale_aspect_ratio)) - box_specs_list.append(layer_box_specs) - - return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size, - anchor_strides, anchor_offsets) diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py b/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py deleted file mode 100644 index 03ec970b..00000000 --- a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py +++ /dev/null @@ -1,267 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py.""" - -import numpy as np - -import tensorflow as tf - -from object_detection.anchor_generators import multiple_grid_anchor_generator as ag - - -class MultipleGridAnchorGeneratorTest(tf.test.TestCase): - - def test_construct_single_anchor_grid(self): - """Builds a 1x1 anchor grid to test the size of the output boxes.""" - exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61], - [-505, -131, 519, 125], [-57, -67, 71, 61], - [-121, -131, 135, 125], [-249, -259, 263, 253], - [-25, -131, 39, 125], [-57, -259, 71, 253], - [-121, -515, 135, 509]] - - box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25), - (.5, 1.0), (1.0, 1.0), (2.0, 1.0), - (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]] - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([256, 256], dtype=tf.float32), - anchor_strides=[(16, 16)], - anchor_offsets=[(7, -3)]) - anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)]) - anchor_corners = anchors.get() - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid(self): - box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]] - - exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.], - [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5], - [-5., 14., 5, 24], [-10., 9., 10, 29], - [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5], - [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5], - [14., 14., 24, 24], [9., 9., 29, 29]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([10, 10], dtype=tf.float32), - anchor_strides=[(19, 19)], - anchor_offsets=[(0, 0)]) - anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid_non_square(self): - box_specs_list = [[(1.0, 1.0)]] - - exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, base_anchor_size=tf.constant([1, 1], dtype=tf.float32)) - anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant( - 1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_anchor_grid_normalized(self): - box_specs_list = [[(1.0, 1.0)]] - - exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, base_anchor_size=tf.constant([1, 1], dtype=tf.float32)) - anchors = anchor_generator.generate( - feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant( - 2, dtype=tf.int32))], - im_height=320, - im_width=640) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertAllClose(anchor_corners_out, exp_anchor_corners) - - def test_construct_multiple_grids(self): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - # height and width of box with .5 aspect ratio - h = np.sqrt(2) - w = 1.0/np.sqrt(2) - exp_small_grid_corners = [[-.25, -.25, .75, .75], - [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w], - [-.25, .25, .75, 1.25], - [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w], - [.25, -.25, 1.25, .75], - [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w], - [.25, .25, 1.25, 1.25], - [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]] - # only test first entry of larger set of anchors - exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5], - [.125-1.0, .125-1.0, .125+1.0, .125+1.0], - [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],] - - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertEquals(anchor_corners_out.shape, (56, 4)) - big_grid_corners = anchor_corners_out[0:3, :] - small_grid_corners = anchor_corners_out[48:, :] - self.assertAllClose(small_grid_corners, exp_small_grid_corners) - self.assertAllClose(big_grid_corners, exp_big_grid_corners) - - def test_construct_multiple_grids_with_clipping(self): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - # height and width of box with .5 aspect ratio - h = np.sqrt(2) - w = 1.0/np.sqrt(2) - exp_small_grid_corners = [[0, 0, .75, .75], - [0, 0, .25+.5*h, .25+.5*w], - [0, .25, .75, 1], - [0, .75-.5*w, .25+.5*h, 1], - [.25, 0, 1, .75], - [.75-.5*h, 0, 1, .25+.5*w], - [.25, .25, 1, 1], - [.75-.5*h, .75-.5*w, 1, 1]] - - clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32) - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - clip_window=clip_window) - anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - anchor_corners = anchors.get() - - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - small_grid_corners = anchor_corners_out[48:, :] - self.assertAllClose(small_grid_corners, exp_small_grid_corners) - - def test_invalid_box_specs(self): - # not all box specs are pairs - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5, .3)]] - with self.assertRaises(ValueError): - ag.MultipleGridAnchorGenerator(box_specs_list) - - # box_specs_list is not a list of lists - box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)] - with self.assertRaises(ValueError): - ag.MultipleGridAnchorGenerator(box_specs_list) - - def test_invalid_generate_arguments(self): - box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)], - [(1.0, 1.0), (1.0, 0.5)]] - - # incompatible lengths with box_specs_list - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.5, .5)], - anchor_offsets=[(.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - - # not pairs - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25, .1), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)]) - with self.assertRaises(ValueError): - anchor_generator = ag.MultipleGridAnchorGenerator( - box_specs_list, - base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32), - anchor_strides=[(.25, .25), (.5, .5)], - anchor_offsets=[(.125, .125), (.25, .25)]) - anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)]) - - -class CreateSSDAnchorsTest(tf.test.TestCase): - - def test_create_ssd_anchors_returns_correct_shape(self): - anchor_generator = ag.create_ssd_anchors( - num_layers=6, - min_scale=0.2, - max_scale=0.95, - aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3), - reduce_boxes_in_lowest_layer=True) - - feature_map_shape_list = [(38, 38), (19, 19), (10, 10), - (5, 5), (3, 3), (1, 1)] - anchors = anchor_generator.generate( - feature_map_shape_list=feature_map_shape_list) - anchor_corners = anchors.get() - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertEquals(anchor_corners_out.shape, (7308, 4)) - - anchor_generator = ag.create_ssd_anchors( - num_layers=6, min_scale=0.2, max_scale=0.95, - aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3), - reduce_boxes_in_lowest_layer=False) - - feature_map_shape_list = [(38, 38), (19, 19), (10, 10), - (5, 5), (3, 3), (1, 1)] - anchors = anchor_generator.generate( - feature_map_shape_list=feature_map_shape_list) - anchor_corners = anchors.get() - with self.test_session(): - anchor_corners_out = anchor_corners.eval() - self.assertEquals(anchor_corners_out.shape, (11640, 4)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/box_coders/BUILD b/object_detection/box_coders/BUILD deleted file mode 100644 index ecb3cc7a..00000000 --- a/object_detection/box_coders/BUILD +++ /dev/null @@ -1,102 +0,0 @@ -# Tensorflow Object Detection API: Box Coder implementations. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 -py_library( - name = "faster_rcnn_box_coder", - srcs = [ - "faster_rcnn_box_coder.py", - ], - deps = [ - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_test( - name = "faster_rcnn_box_coder_test", - srcs = [ - "faster_rcnn_box_coder_test.py", - ], - deps = [ - ":faster_rcnn_box_coder", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_library( - name = "keypoint_box_coder", - srcs = [ - "keypoint_box_coder.py", - ], - deps = [ - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_test( - name = "keypoint_box_coder_test", - srcs = [ - "keypoint_box_coder_test.py", - ], - deps = [ - ":keypoint_box_coder", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_library( - name = "mean_stddev_box_coder", - srcs = [ - "mean_stddev_box_coder.py", - ], - deps = [ - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_test( - name = "mean_stddev_box_coder_test", - srcs = [ - "mean_stddev_box_coder_test.py", - ], - deps = [ - ":mean_stddev_box_coder", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_library( - name = "square_box_coder", - srcs = [ - "square_box_coder.py", - ], - deps = [ - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_test( - name = "square_box_coder_test", - srcs = [ - "square_box_coder_test.py", - ], - deps = [ - ":square_box_coder", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - ], -) diff --git a/object_detection/box_coders/__init__.py b/object_detection/box_coders/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc b/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 975284e4..00000000 Binary files a/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc deleted file mode 100644 index 8dcd397f..00000000 Binary files a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc deleted file mode 100644 index f0c7151d..00000000 Binary files a/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc deleted file mode 100644 index fbee9205..00000000 Binary files a/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc deleted file mode 100644 index 6f3a5280..00000000 Binary files a/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/box_coders/faster_rcnn_box_coder.py b/object_detection/box_coders/faster_rcnn_box_coder.py deleted file mode 100644 index af25e21a..00000000 --- a/object_detection/box_coders/faster_rcnn_box_coder.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Faster RCNN box coder. - -Faster RCNN box coder follows the coding schema described below: - ty = (y - ya) / ha - tx = (x - xa) / wa - th = log(h / ha) - tw = log(w / wa) - where x, y, w, h denote the box's center coordinates, width and height - respectively. Similarly, xa, ya, wa, ha denote the anchor's center - coordinates, width and height. tx, ty, tw and th denote the anchor-encoded - center, width and height respectively. - - See http://arxiv.org/abs/1506.01497 for details. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - -EPSILON = 1e-8 - - -class FasterRcnnBoxCoder(box_coder.BoxCoder): - """Faster RCNN box coder.""" - - def __init__(self, scale_factors=None): - """Constructor for FasterRcnnBoxCoder. - - Args: - scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. - If set to None, does not perform scaling. For Faster RCNN, - the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. - """ - if scale_factors: - assert len(scale_factors) == 4 - for scalar in scale_factors: - assert scalar > 0 - self._scale_factors = scale_factors - - @property - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - """Encode a box collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, th, tw]. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - # Avoid NaN in division and log below. - ha += EPSILON - wa += EPSILON - h += EPSILON - w += EPSILON - - tx = (xcenter - xcenter_a) / wa - ty = (ycenter - ycenter_a) / ha - tw = tf.log(w / wa) - th = tf.log(h / ha) - # Scales location targets as used in paper for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - th *= self._scale_factors[2] - tw *= self._scale_factors[3] - return tf.transpose(tf.stack([ty, tx, th, tw])) - - def _decode(self, rel_codes, anchors): - """Decode relative codes to boxes. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - - ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - th /= self._scale_factors[2] - tw /= self._scale_factors[3] - w = tf.exp(tw) * wa - h = tf.exp(th) * ha - ycenter = ty * ha + ycenter_a - xcenter = tx * wa + xcenter_a - ymin = ycenter - h / 2. - xmin = xcenter - w / 2. - ymax = ycenter + h / 2. - xmax = xcenter + w / 2. - return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/object_detection/box_coders/faster_rcnn_box_coder_test.py b/object_detection/box_coders/faster_rcnn_box_coder_test.py deleted file mode 100644 index b2135f06..00000000 --- a/object_detection/box_coders/faster_rcnn_box_coder_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.faster_rcnn_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.core import box_list - - -class FasterRcnnBoxCoderTest(tf.test.TestCase): - - def test_get_correct_relative_codes_after_encoding(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], - [-0.083333, -0.222222, -0.693147, -1.098612]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_relative_codes_after_encoding_with_scaling(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4, 5] - expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608], - [-0.166667, -0.666667, -2.772588, -5.493062]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_boxes_after_decoding(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321], - [-0.083333, -0.222222, -0.693147, -1.098612]] - expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_get_correct_boxes_after_decoding_with_scaling(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-1., -1.25, -1.62186, -0.911608], - [-0.166667, -0.666667, -2.772588, -5.493062]] - scale_factors = [2, 3, 4, 5] - expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_very_small_Width_nan_after_encoding(self): - boxes = [[10.0, 10.0, 10.0000001, 20.0]] - anchors = [[15.0, 12.0, 30.0, 18.0]] - expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/box_coders/keypoint_box_coder.py b/object_detection/box_coders/keypoint_box_coder.py deleted file mode 100644 index 67df3b82..00000000 --- a/object_detection/box_coders/keypoint_box_coder.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Keypoint box coder. - -The keypoint box coder follows the coding schema described below (this is -similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition -to box coordinates): - ty = (y - ya) / ha - tx = (x - xa) / wa - th = log(h / ha) - tw = log(w / wa) - tky0 = (ky0 - ya) / ha - tkx0 = (kx0 - xa) / wa - tky1 = (ky1 - ya) / ha - tkx1 = (kx1 - xa) / wa - ... - where x, y, w, h denote the box's center coordinates, width and height - respectively. Similarly, xa, ya, wa, ha denote the anchor's center - coordinates, width and height. tx, ty, tw and th denote the anchor-encoded - center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the - keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the - anchor-encoded keypoint coordinates. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list -from object_detection.core import standard_fields as fields - -EPSILON = 1e-8 - - -class KeypointBoxCoder(box_coder.BoxCoder): - """Keypoint box coder.""" - - def __init__(self, num_keypoints, scale_factors=None): - """Constructor for KeypointBoxCoder. - - Args: - num_keypoints: Number of keypoints to encode/decode. - scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. - In addition to scaling ty and tx, the first 2 scalars are used to scale - the y and x coordinates of the keypoints as well. If set to None, does - not perform scaling. - """ - self._num_keypoints = num_keypoints - - if scale_factors: - assert len(scale_factors) == 4 - for scalar in scale_factors: - assert scalar > 0 - self._scale_factors = scale_factors - self._keypoint_scale_factors = None - if scale_factors is not None: - self._keypoint_scale_factors = tf.expand_dims(tf.tile( - [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])], - [num_keypoints]), 1) - - @property - def code_size(self): - return 4 + self._num_keypoints * 2 - - def _encode(self, boxes, anchors): - """Encode a box and keypoint collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are - tensors with the shape [N, 4], and keypoints are tensors with the shape - [N, num_keypoints, 2]. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0 - represent the y and x coordinates of the first keypoint, tky1 and tkx1 - represent the y and x coordinates of the second keypoint, and so on. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - keypoints = boxes.get_field(fields.BoxListFields.keypoints) - keypoints = tf.transpose(tf.reshape(keypoints, - [-1, self._num_keypoints * 2])) - num_boxes = boxes.num_boxes() - - # Avoid NaN in division and log below. - ha += EPSILON - wa += EPSILON - h += EPSILON - w += EPSILON - - tx = (xcenter - xcenter_a) / wa - ty = (ycenter - ycenter_a) / ha - tw = tf.log(w / wa) - th = tf.log(h / ha) - - tiled_anchor_centers = tf.tile( - tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) - tiled_anchor_sizes = tf.tile( - tf.stack([ha, wa]), [self._num_keypoints, 1]) - tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes - - # Scales location targets as used in paper for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - th *= self._scale_factors[2] - tw *= self._scale_factors[3] - tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes]) - - tboxes = tf.stack([ty, tx, th, tw]) - return tf.transpose(tf.concat([tboxes, tkeypoints], 0)) - - def _decode(self, rel_codes, anchors): - """Decode relative codes to boxes and keypoints. - - Args: - rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N - anchor-encoded boxes and keypoints - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes and keypoints. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - - num_codes = tf.shape(rel_codes)[0] - result = tf.unstack(tf.transpose(rel_codes)) - ty, tx, th, tw = result[:4] - tkeypoints = result[4:] - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - th /= self._scale_factors[2] - tw /= self._scale_factors[3] - tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes]) - - w = tf.exp(tw) * wa - h = tf.exp(th) * ha - ycenter = ty * ha + ycenter_a - xcenter = tx * wa + xcenter_a - ymin = ycenter - h / 2. - xmin = xcenter - w / 2. - ymax = ycenter + h / 2. - xmax = xcenter + w / 2. - decoded_boxes_keypoints = box_list.BoxList( - tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) - - tiled_anchor_centers = tf.tile( - tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1]) - tiled_anchor_sizes = tf.tile( - tf.stack([ha, wa]), [self._num_keypoints, 1]) - keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers - keypoints = tf.reshape(tf.transpose(keypoints), - [-1, self._num_keypoints, 2]) - decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints) - return decoded_boxes_keypoints diff --git a/object_detection/box_coders/keypoint_box_coder_test.py b/object_detection/box_coders/keypoint_box_coder_test.py deleted file mode 100644 index 330641e5..00000000 --- a/object_detection/box_coders/keypoint_box_coder_test.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.keypoint_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import keypoint_box_coder -from object_detection.core import box_list -from object_detection.core import standard_fields as fields - - -class KeypointBoxCoderTest(tf.test.TestCase): - - def test_get_correct_relative_codes_after_encoding(self): - boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(keypoints[0]) - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - expected_rel_codes = [ - [-0.5, -0.416666, -0.405465, -0.182321, - -0.5, -0.5, -0.833333, 0.], - [-0.083333, -0.222222, -0.693147, -1.098612, - 0.166667, -0.166667, -0.333333, -0.055556] - ] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_relative_codes_after_encoding_with_scaling(self): - boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(keypoints[0]) - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4, 5] - expected_rel_codes = [ - [-1., -1.25, -1.62186, -0.911608, - -1.0, -1.5, -1.666667, 0.], - [-0.166667, -0.666667, -2.772588, -5.493062, - 0.333333, -0.5, -0.666667, -0.166667] - ] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints, scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_get_correct_boxes_after_decoding(self): - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - rel_codes = [ - [-0.5, -0.416666, -0.405465, -0.182321, - -0.5, -0.5, -0.833333, 0.], - [-0.083333, -0.222222, -0.693147, -1.098612, - 0.166667, -0.166667, -0.333333, -0.055556] - ] - expected_boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - expected_keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(expected_keypoints[0]) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, keypoints_out = sess.run( - [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) - self.assertAllClose(boxes_out, expected_boxes) - self.assertAllClose(keypoints_out, expected_keypoints) - - def test_get_correct_boxes_after_decoding_with_scaling(self): - anchors = [[15., 12., 30., 18.], - [0.1, 0.0, 0.7, 0.9]] - rel_codes = [ - [-1., -1.25, -1.62186, -0.911608, - -1.0, -1.5, -1.666667, 0.], - [-0.166667, -0.666667, -2.772588, -5.493062, - 0.333333, -0.5, -0.666667, -0.166667] - ] - scale_factors = [2, 3, 4, 5] - expected_boxes = [[10., 10., 20., 15.], - [0.2, 0.1, 0.5, 0.4]] - expected_keypoints = [[[15., 12.], [10., 15.]], - [[0.5, 0.3], [0.2, 0.4]]] - num_keypoints = len(expected_keypoints[0]) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder( - num_keypoints, scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - boxes_out, keypoints_out = sess.run( - [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)]) - self.assertAllClose(boxes_out, expected_boxes) - self.assertAllClose(keypoints_out, expected_keypoints) - - def test_very_small_width_nan_after_encoding(self): - boxes = [[10., 10., 10.0000001, 20.]] - keypoints = [[[10., 10.], [10.0000001, 20.]]] - anchors = [[15., 12., 30., 18.]] - expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826, - -0.833333, -0.833333, -0.833333, 0.833333]] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = keypoint_box_coder.KeypointBoxCoder(2) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - rel_codes_out, = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/box_coders/mean_stddev_box_coder.py b/object_detection/box_coders/mean_stddev_box_coder.py deleted file mode 100644 index 726b4a61..00000000 --- a/object_detection/box_coders/mean_stddev_box_coder.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Mean stddev box coder. - -This box coder use the following coding schema to encode boxes: -rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev. -""" -from object_detection.core import box_coder -from object_detection.core import box_list - - -class MeanStddevBoxCoder(box_coder.BoxCoder): - """Mean stddev box coder.""" - - @property - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - """Encode a box collection with respect to anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of N anchors. We assume that anchors has an associated - stddev field. - - Returns: - a tensor representing N anchor-encoded boxes - Raises: - ValueError: if the anchors BoxList does not have a stddev field - """ - if not anchors.has_field('stddev'): - raise ValueError('anchors must have a stddev field') - box_corners = boxes.get() - means = anchors.get() - stddev = anchors.get_field('stddev') - return (box_corners - means) / stddev - - def _decode(self, rel_codes, anchors): - """Decode. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. We assume that anchors has an associated - stddev field. - - Returns: - boxes: BoxList holding N bounding boxes - Raises: - ValueError: if the anchors BoxList does not have a stddev field - """ - if not anchors.has_field('stddev'): - raise ValueError('anchors must have a stddev field') - means = anchors.get() - stddevs = anchors.get_field('stddev') - box_corners = rel_codes * stddevs + means - return box_list.BoxList(box_corners) diff --git a/object_detection/box_coders/mean_stddev_box_coder_test.py b/object_detection/box_coders/mean_stddev_box_coder_test.py deleted file mode 100644 index 0d3a8952..00000000 --- a/object_detection/box_coders/mean_stddev_box_coder_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.mean_stddev_boxcoder.""" - -import tensorflow as tf - -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_list - - -class MeanStddevBoxCoderTest(tf.test.TestCase): - - def testGetCorrectRelativeCodesAfterEncoding(self): - box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] - boxes = box_list.BoxList(tf.constant(box_corners)) - expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]] - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) - prior_stddevs = tf.constant(2 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - coder = mean_stddev_box_coder.MeanStddevBoxCoder() - rel_codes = coder.encode(boxes, priors) - with self.test_session() as sess: - rel_codes_out = sess.run(rel_codes) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def testGetCorrectBoxesAfterDecoding(self): - rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]) - expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]] - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]]) - prior_stddevs = tf.constant(2 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - coder = mean_stddev_box_coder.MeanStddevBoxCoder() - decoded_boxes = coder.decode(rel_codes, priors) - decoded_box_corners = decoded_boxes.get() - with self.test_session() as sess: - decoded_out = sess.run(decoded_box_corners) - self.assertAllClose(decoded_out, expected_box_corners) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/box_coders/square_box_coder.py b/object_detection/box_coders/square_box_coder.py deleted file mode 100644 index ee46b689..00000000 --- a/object_detection/box_coders/square_box_coder.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Square box coder. - -Square box coder follows the coding schema described below: -l = sqrt(h * w) -la = sqrt(ha * wa) -ty = (y - ya) / la -tx = (x - xa) / la -tl = log(l / la) -where x, y, w, h denote the box's center coordinates, width, and height, -respectively. Similarly, xa, ya, wa, ha denote the anchor's center -coordinates, width and height. tx, ty, tl denote the anchor-encoded -center, and length, respectively. Because the encoded box is a square, only -one length is encoded. - -This has shown to provide performance improvements over the Faster RCNN box -coder when the objects being detected tend to be square (e.g. faces) and when -the input images are not distorted via resizing. -""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - -EPSILON = 1e-8 - - -class SquareBoxCoder(box_coder.BoxCoder): - """Encodes a 3-scalar representation of a square box.""" - - def __init__(self, scale_factors=None): - """Constructor for SquareBoxCoder. - - Args: - scale_factors: List of 3 positive scalars to scale ty, tx, and tl. - If set to None, does not perform scaling. For faster RCNN, - the open-source implementation recommends using [10.0, 10.0, 5.0]. - - Raises: - ValueError: If scale_factors is not length 3 or contains values less than - or equal to 0. - """ - if scale_factors: - if len(scale_factors) != 3: - raise ValueError('The argument scale_factors must be a list of length ' - '3.') - if any(scalar <= 0 for scalar in scale_factors): - raise ValueError('The values in scale_factors must all be greater ' - 'than 0.') - self._scale_factors = scale_factors - - @property - def code_size(self): - return 3 - - def _encode(self, boxes, anchors): - """Encodes a box collection with respect to an anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded. - anchors: BoxList of anchors. - - Returns: - a tensor representing N anchor-encoded boxes of the format - [ty, tx, tl]. - """ - # Convert anchors to the center coordinate representation. - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - la = tf.sqrt(ha * wa) - ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() - l = tf.sqrt(h * w) - # Avoid NaN in division and log below. - la += EPSILON - l += EPSILON - - tx = (xcenter - xcenter_a) / la - ty = (ycenter - ycenter_a) / la - tl = tf.log(l / la) - # Scales location targets for joint training. - if self._scale_factors: - ty *= self._scale_factors[0] - tx *= self._scale_factors[1] - tl *= self._scale_factors[2] - return tf.transpose(tf.stack([ty, tx, tl])) - - def _decode(self, rel_codes, anchors): - """Decodes relative codes to boxes. - - Args: - rel_codes: a tensor representing N anchor-encoded boxes. - anchors: BoxList of anchors. - - Returns: - boxes: BoxList holding N bounding boxes. - """ - ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() - la = tf.sqrt(ha * wa) - - ty, tx, tl = tf.unstack(tf.transpose(rel_codes)) - if self._scale_factors: - ty /= self._scale_factors[0] - tx /= self._scale_factors[1] - tl /= self._scale_factors[2] - l = tf.exp(tl) * la - ycenter = ty * la + ycenter_a - xcenter = tx * la + xcenter_a - ymin = ycenter - l / 2. - xmin = xcenter - l / 2. - ymax = ycenter + l / 2. - xmax = xcenter + l / 2. - return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))) diff --git a/object_detection/box_coders/square_box_coder_test.py b/object_detection/box_coders/square_box_coder_test.py deleted file mode 100644 index 7f739c6b..00000000 --- a/object_detection/box_coders/square_box_coder_test.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.box_coder.square_box_coder.""" - -import tensorflow as tf - -from object_detection.box_coders import square_box_coder -from object_detection.core import box_list - - -class SquareBoxCoderTest(tf.test.TestCase): - - def test_correct_relative_codes_with_default_scale(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = None - expected_rel_codes = [[-0.790569, -0.263523, -0.293893], - [-0.068041, -0.272166, -0.89588]] - - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_relative_codes_with_non_default_scale(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - scale_factors = [2, 3, 4] - expected_rel_codes = [[-1.581139, -0.790569, -1.175573], - [-0.136083, -0.816497, -3.583519]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_relative_codes_with_small_width(self): - boxes = [[10.0, 10.0, 10.0000001, 20.0]] - anchors = [[15.0, 12.0, 30.0, 18.0]] - scale_factors = None - expected_rel_codes = [[-1.317616, 0., -20.670586]] - boxes = box_list.BoxList(tf.constant(boxes)) - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - rel_codes = coder.encode(boxes, anchors) - with self.test_session() as sess: - (rel_codes_out,) = sess.run([rel_codes]) - self.assertAllClose(rel_codes_out, expected_rel_codes) - - def test_correct_boxes_with_default_scale(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-0.5, -0.416666, -0.405465], - [-0.083333, -0.222222, -0.693147]] - scale_factors = None - expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], - [0.155051, 0.102989, 0.522474, 0.470412]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - (boxes_out,) = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - def test_correct_boxes_with_non_default_scale(self): - anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]] - rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]] - scale_factors = [2, 3, 4] - expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432], - [0.155051, 0.102989, 0.522474, 0.470412]] - anchors = box_list.BoxList(tf.constant(anchors)) - coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors) - boxes = coder.decode(rel_codes, anchors) - with self.test_session() as sess: - (boxes_out,) = sess.run([boxes.get()]) - self.assertAllClose(boxes_out, expected_boxes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/BUILD b/object_detection/builders/BUILD deleted file mode 100644 index d1bb3f03..00000000 --- a/object_detection/builders/BUILD +++ /dev/null @@ -1,305 +0,0 @@ -# Tensorflow Object Detection API: component builders. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 -py_library( - name = "model_builder", - srcs = ["model_builder.py"], - deps = [ - ":anchor_generator_builder", - ":box_coder_builder", - ":box_predictor_builder", - ":hyperparams_builder", - ":image_resizer_builder", - ":losses_builder", - ":matcher_builder", - ":post_processing_builder", - ":region_similarity_calculator_builder", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch", - "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", - "//tensorflow_models/object_detection/models:embedded_ssd_mobilenet_v1_feature_extractor", - "//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor", - "//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor", - "//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor", - "//tensorflow_models/object_detection/protos:model_py_pb2", - ], -) - -py_test( - name = "model_builder_test", - srcs = ["model_builder_test.py"], - deps = [ - ":model_builder", - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", - "//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor", - "//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor", - "//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor", - "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor", - "//tensorflow_models/object_detection/protos:model_py_pb2", - ], -) - -py_library( - name = "matcher_builder", - srcs = ["matcher_builder.py"], - deps = [ - "//tensorflow_models/object_detection/matchers:argmax_matcher", - "//tensorflow_models/object_detection/matchers:bipartite_matcher", - "//tensorflow_models/object_detection/protos:matcher_py_pb2", - ], -) - -py_test( - name = "matcher_builder_test", - srcs = ["matcher_builder_test.py"], - deps = [ - ":matcher_builder", - "//tensorflow_models/object_detection/matchers:argmax_matcher", - "//tensorflow_models/object_detection/matchers:bipartite_matcher", - "//tensorflow_models/object_detection/protos:matcher_py_pb2", - ], -) - -py_library( - name = "box_coder_builder", - srcs = ["box_coder_builder.py"], - deps = [ - "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder", - "//tensorflow_models/object_detection/box_coders:keypoint_box_coder", - "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder", - "//tensorflow_models/object_detection/box_coders:square_box_coder", - "//tensorflow_models/object_detection/protos:box_coder_py_pb2", - ], -) - -py_test( - name = "box_coder_builder_test", - srcs = ["box_coder_builder_test.py"], - deps = [ - ":box_coder_builder", - "//tensorflow", - "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder", - "//tensorflow_models/object_detection/box_coders:keypoint_box_coder", - "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder", - "//tensorflow_models/object_detection/box_coders:square_box_coder", - "//tensorflow_models/object_detection/protos:box_coder_py_pb2", - ], -) - -py_library( - name = "anchor_generator_builder", - srcs = ["anchor_generator_builder.py"], - deps = [ - "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator", - "//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator", - "//tensorflow_models/object_detection/protos:anchor_generator_py_pb2", - ], -) - -py_test( - name = "anchor_generator_builder_test", - srcs = ["anchor_generator_builder_test.py"], - deps = [ - ":anchor_generator_builder", - "//tensorflow", - "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator", - "//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator", - "//tensorflow_models/object_detection/protos:anchor_generator_py_pb2", - ], -) - -py_library( - name = "input_reader_builder", - srcs = ["input_reader_builder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/data_decoders:tf_example_decoder", - "//tensorflow_models/object_detection/protos:input_reader_py_pb2", - ], -) - -py_test( - name = "input_reader_builder_test", - srcs = [ - "input_reader_builder_test.py", - ], - deps = [ - ":input_reader_builder", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/protos:input_reader_py_pb2", - ], -) - -py_library( - name = "losses_builder", - srcs = ["losses_builder.py"], - deps = [ - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/protos:losses_py_pb2", - ], -) - -py_test( - name = "losses_builder_test", - srcs = ["losses_builder_test.py"], - deps = [ - ":losses_builder", - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/protos:losses_py_pb2", - ], -) - -py_library( - name = "optimizer_builder", - srcs = ["optimizer_builder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/utils:learning_schedules", - ], -) - -py_test( - name = "optimizer_builder_test", - srcs = ["optimizer_builder_test.py"], - deps = [ - ":optimizer_builder", - "//tensorflow", - "//tensorflow_models/object_detection/protos:optimizer_py_pb2", - ], -) - -py_library( - name = "post_processing_builder", - srcs = ["post_processing_builder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:post_processing", - "//tensorflow_models/object_detection/protos:post_processing_py_pb2", - ], -) - -py_test( - name = "post_processing_builder_test", - srcs = ["post_processing_builder_test.py"], - deps = [ - ":post_processing_builder", - "//tensorflow", - "//tensorflow_models/object_detection/protos:post_processing_py_pb2", - ], -) - -py_library( - name = "hyperparams_builder", - srcs = ["hyperparams_builder.py"], - deps = [ - "//tensorflow_models/object_detection/protos:hyperparams_py_pb2", - ], -) - -py_test( - name = "hyperparams_builder_test", - srcs = ["hyperparams_builder_test.py"], - deps = [ - ":hyperparams_builder", - "//tensorflow", - "//tensorflow_models/object_detection/protos:hyperparams_py_pb2", - ], -) - -py_library( - name = "box_predictor_builder", - srcs = ["box_predictor_builder.py"], - deps = [ - ":hyperparams_builder", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/protos:box_predictor_py_pb2", - ], -) - -py_test( - name = "box_predictor_builder_test", - srcs = ["box_predictor_builder_test.py"], - deps = [ - ":box_predictor_builder", - ":hyperparams_builder", - "//tensorflow", - "//tensorflow_models/object_detection/protos:box_predictor_py_pb2", - "//tensorflow_models/object_detection/protos:hyperparams_py_pb2", - ], -) - -py_library( - name = "region_similarity_calculator_builder", - srcs = ["region_similarity_calculator_builder.py"], - deps = [ - "//tensorflow_models/object_detection/core:region_similarity_calculator", - "//tensorflow_models/object_detection/protos:region_similarity_calculator_py_pb2", - ], -) - -py_test( - name = "region_similarity_calculator_builder_test", - srcs = ["region_similarity_calculator_builder_test.py"], - deps = [ - ":region_similarity_calculator_builder", - "//tensorflow", - ], -) - -py_library( - name = "preprocessor_builder", - srcs = ["preprocessor_builder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:preprocessor", - "//tensorflow_models/object_detection/protos:preprocessor_py_pb2", - ], -) - -py_test( - name = "preprocessor_builder_test", - srcs = [ - "preprocessor_builder_test.py", - ], - deps = [ - ":preprocessor_builder", - "//tensorflow", - "//tensorflow_models/object_detection/core:preprocessor", - "//tensorflow_models/object_detection/protos:preprocessor_py_pb2", - ], -) - -py_library( - name = "image_resizer_builder", - srcs = ["image_resizer_builder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:preprocessor", - "//tensorflow_models/object_detection/protos:image_resizer_py_pb2", - ], -) - -py_test( - name = "image_resizer_builder_test", - srcs = ["image_resizer_builder_test.py"], - deps = [ - ":image_resizer_builder", - "//tensorflow", - "//tensorflow_models/object_detection/protos:image_resizer_py_pb2", - ], -) diff --git a/object_detection/builders/__init__.py b/object_detection/builders/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/builders/__pycache__/__init__.cpython-35.pyc b/object_detection/builders/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 39958001..00000000 Binary files a/object_detection/builders/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc b/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc deleted file mode 100644 index deee9d21..00000000 Binary files a/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc b/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc deleted file mode 100644 index 7fdb466c..00000000 Binary files a/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc b/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc deleted file mode 100644 index c84f9f90..00000000 Binary files a/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc b/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc deleted file mode 100644 index cbc66011..00000000 Binary files a/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc b/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc deleted file mode 100644 index 87f79381..00000000 Binary files a/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc b/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc deleted file mode 100644 index e8dd90e8..00000000 Binary files a/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc b/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc deleted file mode 100644 index c20c37d6..00000000 Binary files a/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/model_builder.cpython-35.pyc b/object_detection/builders/__pycache__/model_builder.cpython-35.pyc deleted file mode 100644 index ca02ac33..00000000 Binary files a/object_detection/builders/__pycache__/model_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc b/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc deleted file mode 100644 index f4c63a23..00000000 Binary files a/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc b/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc deleted file mode 100644 index 80a61106..00000000 Binary files a/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/builders/anchor_generator_builder.py b/object_detection/builders/anchor_generator_builder.py deleted file mode 100644 index 40a65c5c..00000000 --- a/object_detection/builders/anchor_generator_builder.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection anchor generator from config.""" - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.anchor_generators import multiple_grid_anchor_generator -from object_detection.protos import anchor_generator_pb2 - - -def build(anchor_generator_config): - """Builds an anchor generator based on the config. - - Args: - anchor_generator_config: An anchor_generator.proto object containing the - config for the desired anchor generator. - - Returns: - Anchor generator based on the config. - - Raises: - ValueError: On empty anchor generator proto. - """ - if not isinstance(anchor_generator_config, - anchor_generator_pb2.AnchorGenerator): - raise ValueError('anchor_generator_config not of type ' - 'anchor_generator_pb2.AnchorGenerator') - if anchor_generator_config.WhichOneof( - 'anchor_generator_oneof') == 'grid_anchor_generator': - grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator - return grid_anchor_generator.GridAnchorGenerator( - scales=[float(scale) for scale in grid_anchor_generator_config.scales], - aspect_ratios=[float(aspect_ratio) - for aspect_ratio - in grid_anchor_generator_config.aspect_ratios], - base_anchor_size=[grid_anchor_generator_config.height, - grid_anchor_generator_config.width], - anchor_stride=[grid_anchor_generator_config.height_stride, - grid_anchor_generator_config.width_stride], - anchor_offset=[grid_anchor_generator_config.height_offset, - grid_anchor_generator_config.width_offset]) - elif anchor_generator_config.WhichOneof( - 'anchor_generator_oneof') == 'ssd_anchor_generator': - ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator - anchor_strides = None - if ssd_anchor_generator_config.height_stride: - anchor_strides = zip(ssd_anchor_generator_config.height_stride, - ssd_anchor_generator_config.width_stride) - anchor_offsets = None - if ssd_anchor_generator_config.height_offset: - anchor_offsets = zip(ssd_anchor_generator_config.height_offset, - ssd_anchor_generator_config.width_offset) - return multiple_grid_anchor_generator.create_ssd_anchors( - num_layers=ssd_anchor_generator_config.num_layers, - min_scale=ssd_anchor_generator_config.min_scale, - max_scale=ssd_anchor_generator_config.max_scale, - scales=[float(scale) for scale in ssd_anchor_generator_config.scales], - aspect_ratios=ssd_anchor_generator_config.aspect_ratios, - interpolated_scale_aspect_ratio=( - ssd_anchor_generator_config.interpolated_scale_aspect_ratio), - base_anchor_size=[ - ssd_anchor_generator_config.base_anchor_height, - ssd_anchor_generator_config.base_anchor_width - ], - anchor_strides=anchor_strides, - anchor_offsets=anchor_offsets, - reduce_boxes_in_lowest_layer=( - ssd_anchor_generator_config.reduce_boxes_in_lowest_layer)) - else: - raise ValueError('Empty anchor generator.') diff --git a/object_detection/builders/anchor_generator_builder_test.py b/object_detection/builders/anchor_generator_builder_test.py deleted file mode 100644 index ecc1eca1..00000000 --- a/object_detection/builders/anchor_generator_builder_test.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for anchor_generator_builder.""" - -import math - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.anchor_generators import multiple_grid_anchor_generator -from object_detection.builders import anchor_generator_builder -from object_detection.protos import anchor_generator_pb2 - - -class AnchorGeneratorBuilderTest(tf.test.TestCase): - - def assert_almost_list_equal(self, expected_list, actual_list, delta=None): - self.assertEqual(len(expected_list), len(actual_list)) - for expected_item, actual_item in zip(expected_list, actual_list): - self.assertAlmostEqual(expected_item, actual_item, delta=delta) - - def test_build_grid_anchor_generator_with_defaults(self): - anchor_generator_text_proto = """ - grid_anchor_generator { - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - grid_anchor_generator.GridAnchorGenerator)) - self.assertListEqual(anchor_generator_object._scales, []) - self.assertListEqual(anchor_generator_object._aspect_ratios, []) - with self.test_session() as sess: - base_anchor_size, anchor_offset, anchor_stride = sess.run( - [anchor_generator_object._base_anchor_size, - anchor_generator_object._anchor_offset, - anchor_generator_object._anchor_stride]) - self.assertAllEqual(anchor_offset, [0, 0]) - self.assertAllEqual(anchor_stride, [16, 16]) - self.assertAllEqual(base_anchor_size, [256, 256]) - - def test_build_grid_anchor_generator_with_non_default_parameters(self): - anchor_generator_text_proto = """ - grid_anchor_generator { - height: 128 - width: 512 - height_stride: 10 - width_stride: 20 - height_offset: 30 - width_offset: 40 - scales: [0.4, 2.2] - aspect_ratios: [0.3, 4.5] - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - grid_anchor_generator.GridAnchorGenerator)) - self.assert_almost_list_equal(anchor_generator_object._scales, - [0.4, 2.2]) - self.assert_almost_list_equal(anchor_generator_object._aspect_ratios, - [0.3, 4.5]) - with self.test_session() as sess: - base_anchor_size, anchor_offset, anchor_stride = sess.run( - [anchor_generator_object._base_anchor_size, - anchor_generator_object._anchor_offset, - anchor_generator_object._anchor_stride]) - self.assertAllEqual(anchor_offset, [30, 40]) - self.assertAllEqual(anchor_stride, [10, 20]) - self.assertAllEqual(base_anchor_size, [128, 512]) - - def test_build_ssd_anchor_generator_with_defaults(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, 0.2, 0.2), - (0.35, 0.418), - (0.499, 0.570), - (0.649, 0.721), - (0.799, 0.871), - (0.949, 0.974)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_build_ssd_anchor_generator_with_custom_scales(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8] - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, math.sqrt(0.1 * 0.15)), - (0.15, math.sqrt(0.15 * 0.2)), - (0.2, math.sqrt(0.2 * 0.4)), - (0.4, math.sqrt(0.4 * 0.6)), - (0.6, math.sqrt(0.6 * 0.8)), - (0.8, math.sqrt(0.8 * 1.0))]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [0.5] - interpolated_scale_aspect_ratio: 0.5 - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - 6 * [(0.5, 0.5)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - def test_build_ssd_anchor_generator_without_reduced_boxes(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - aspect_ratios: [1.0] - reduce_boxes_in_lowest_layer: false - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.2, 0.264), - (0.35, 0.418), - (0.499, 0.570), - (0.649, 0.721), - (0.799, 0.871), - (0.949, 0.974)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - 6 * [(1.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_build_ssd_anchor_generator_with_non_default_parameters(self): - anchor_generator_text_proto = """ - ssd_anchor_generator { - num_layers: 2 - min_scale: 0.3 - max_scale: 0.8 - aspect_ratios: [2.0] - height_stride: 16 - height_stride: 32 - width_stride: 20 - width_stride: 30 - height_offset: 8 - height_offset: 16 - width_offset: 0 - width_offset: 10 - } - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - anchor_generator_object = anchor_generator_builder.build( - anchor_generator_proto) - self.assertTrue(isinstance(anchor_generator_object, - multiple_grid_anchor_generator. - MultipleGridAnchorGenerator)) - - for actual_scales, expected_scales in zip( - list(anchor_generator_object._scales), - [(0.1, 0.3, 0.3), (0.8, 0.894)]): - self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2) - - for actual_aspect_ratio, expected_aspect_ratio in zip( - list(anchor_generator_object._aspect_ratios), - [(1.0, 2.0, 0.5), (2.0, 1.0)]): - self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio) - - for actual_strides, expected_strides in zip( - list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]): - self.assert_almost_list_equal(expected_strides, actual_strides) - - for actual_offsets, expected_offsets in zip( - list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]): - self.assert_almost_list_equal(expected_offsets, actual_offsets) - - with self.test_session() as sess: - base_anchor_size = sess.run(anchor_generator_object._base_anchor_size) - self.assertAllClose(base_anchor_size, [1.0, 1.0]) - - def test_raise_value_error_on_empty_anchor_genertor(self): - anchor_generator_text_proto = """ - """ - anchor_generator_proto = anchor_generator_pb2.AnchorGenerator() - text_format.Merge(anchor_generator_text_proto, anchor_generator_proto) - with self.assertRaises(ValueError): - anchor_generator_builder.build(anchor_generator_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/box_coder_builder.py b/object_detection/builders/box_coder_builder.py deleted file mode 100644 index edfc2fca..00000000 --- a/object_detection/builders/box_coder_builder.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection box coder from configuration.""" -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import keypoint_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.box_coders import square_box_coder -from object_detection.protos import box_coder_pb2 - - -def build(box_coder_config): - """Builds a box coder object based on the box coder config. - - Args: - box_coder_config: A box_coder.proto object containing the config for the - desired box coder. - - Returns: - BoxCoder based on the config. - - Raises: - ValueError: On empty box coder proto. - """ - if not isinstance(box_coder_config, box_coder_pb2.BoxCoder): - raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.') - - if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder': - return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[ - box_coder_config.faster_rcnn_box_coder.y_scale, - box_coder_config.faster_rcnn_box_coder.x_scale, - box_coder_config.faster_rcnn_box_coder.height_scale, - box_coder_config.faster_rcnn_box_coder.width_scale - ]) - if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder': - return keypoint_box_coder.KeypointBoxCoder( - box_coder_config.keypoint_box_coder.num_keypoints, - scale_factors=[ - box_coder_config.keypoint_box_coder.y_scale, - box_coder_config.keypoint_box_coder.x_scale, - box_coder_config.keypoint_box_coder.height_scale, - box_coder_config.keypoint_box_coder.width_scale - ]) - if (box_coder_config.WhichOneof('box_coder_oneof') == - 'mean_stddev_box_coder'): - return mean_stddev_box_coder.MeanStddevBoxCoder() - if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder': - return square_box_coder.SquareBoxCoder(scale_factors=[ - box_coder_config.square_box_coder.y_scale, - box_coder_config.square_box_coder.x_scale, - box_coder_config.square_box_coder.length_scale - ]) - raise ValueError('Empty box coder.') diff --git a/object_detection/builders/box_coder_builder_test.py b/object_detection/builders/box_coder_builder_test.py deleted file mode 100644 index 286012e9..00000000 --- a/object_detection/builders/box_coder_builder_test.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for box_coder_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import keypoint_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.box_coders import square_box_coder -from object_detection.builders import box_coder_builder -from object_detection.protos import box_coder_pb2 - - -class BoxCoderBuilderTest(tf.test.TestCase): - - def test_build_faster_rcnn_box_coder_with_defaults(self): - box_coder_text_proto = """ - faster_rcnn_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, - faster_rcnn_box_coder.FasterRcnnBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) - - def test_build_faster_rcnn_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - faster_rcnn_box_coder { - y_scale: 6.0 - x_scale: 3.0 - height_scale: 7.0 - width_scale: 8.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, - faster_rcnn_box_coder.FasterRcnnBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) - - def test_build_keypoint_box_coder_with_defaults(self): - box_coder_text_proto = """ - keypoint_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0]) - - def test_build_keypoint_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - keypoint_box_coder { - num_keypoints: 6 - y_scale: 6.0 - x_scale: 3.0 - height_scale: 7.0 - width_scale: 8.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder) - self.assertEqual(box_coder_object._num_keypoints, 6) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0]) - - def test_build_mean_stddev_box_coder(self): - box_coder_text_proto = """ - mean_stddev_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, - mean_stddev_box_coder.MeanStddevBoxCoder)) - - def test_build_square_box_coder_with_defaults(self): - box_coder_text_proto = """ - square_box_coder { - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) - self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0]) - - def test_build_square_box_coder_with_non_default_parameters(self): - box_coder_text_proto = """ - square_box_coder { - y_scale: 6.0 - x_scale: 3.0 - length_scale: 7.0 - } - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - box_coder_object = box_coder_builder.build(box_coder_proto) - self.assertTrue( - isinstance(box_coder_object, square_box_coder.SquareBoxCoder)) - self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0]) - - def test_raise_error_on_empty_box_coder(self): - box_coder_text_proto = """ - """ - box_coder_proto = box_coder_pb2.BoxCoder() - text_format.Merge(box_coder_text_proto, box_coder_proto) - with self.assertRaises(ValueError): - box_coder_builder.build(box_coder_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/box_predictor_builder.py b/object_detection/builders/box_predictor_builder.py deleted file mode 100644 index 3e10b394..00000000 --- a/object_detection/builders/box_predictor_builder.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Function to build box predictor from configuration.""" - -from object_detection.core import box_predictor -from object_detection.protos import box_predictor_pb2 - - -def build(argscope_fn, box_predictor_config, is_training, num_classes): - """Builds box predictor based on the configuration. - - Builds box predictor based on the configuration. See box_predictor.proto for - configurable options. Also, see box_predictor.py for more details. - - Args: - argscope_fn: A function that takes the following inputs: - * hyperparams_pb2.Hyperparams proto - * a boolean indicating if the model is in training mode. - and returns a tf slim argscope for Conv and FC hyperparameters. - box_predictor_config: box_predictor_pb2.BoxPredictor proto containing - configuration. - is_training: Whether the models is in training mode. - num_classes: Number of classes to predict. - - Returns: - box_predictor: box_predictor.BoxPredictor object. - - Raises: - ValueError: On unknown box predictor. - """ - if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor): - raise ValueError('box_predictor_config not of type ' - 'box_predictor_pb2.BoxPredictor.') - - box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof') - - if box_predictor_oneof == 'convolutional_box_predictor': - conv_box_predictor = box_predictor_config.convolutional_box_predictor - conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.ConvolutionalBoxPredictor( - is_training=is_training, - num_classes=num_classes, - conv_hyperparams=conv_hyperparams, - min_depth=conv_box_predictor.min_depth, - max_depth=conv_box_predictor.max_depth, - num_layers_before_predictor=(conv_box_predictor. - num_layers_before_predictor), - use_dropout=conv_box_predictor.use_dropout, - dropout_keep_prob=conv_box_predictor.dropout_keep_probability, - kernel_size=conv_box_predictor.kernel_size, - box_code_size=conv_box_predictor.box_code_size, - apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores, - class_prediction_bias_init=conv_box_predictor.class_prediction_bias_init - ) - return box_predictor_object - - if box_predictor_oneof == 'mask_rcnn_box_predictor': - mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor - fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams, - is_training) - conv_hyperparams = None - if mask_rcnn_box_predictor.HasField('conv_hyperparams'): - conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.MaskRCNNBoxPredictor( - is_training=is_training, - num_classes=num_classes, - fc_hyperparams=fc_hyperparams, - use_dropout=mask_rcnn_box_predictor.use_dropout, - dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability, - box_code_size=mask_rcnn_box_predictor.box_code_size, - conv_hyperparams=conv_hyperparams, - predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks, - mask_prediction_conv_depth=(mask_rcnn_box_predictor. - mask_prediction_conv_depth), - predict_keypoints=mask_rcnn_box_predictor.predict_keypoints) - return box_predictor_object - - if box_predictor_oneof == 'rfcn_box_predictor': - rfcn_box_predictor = box_predictor_config.rfcn_box_predictor - conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams, - is_training) - box_predictor_object = box_predictor.RfcnBoxPredictor( - is_training=is_training, - num_classes=num_classes, - conv_hyperparams=conv_hyperparams, - crop_size=[rfcn_box_predictor.crop_height, - rfcn_box_predictor.crop_width], - num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height, - rfcn_box_predictor.num_spatial_bins_width], - depth=rfcn_box_predictor.depth, - box_code_size=rfcn_box_predictor.box_code_size) - return box_predictor_object - raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof)) diff --git a/object_detection/builders/box_predictor_builder_test.py b/object_detection/builders/box_predictor_builder_test.py deleted file mode 100644 index 6bafd482..00000000 --- a/object_detection/builders/box_predictor_builder_test.py +++ /dev/null @@ -1,393 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for box_predictor_builder.""" -import mock -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.protos import box_predictor_pb2 -from object_detection.protos import hyperparams_pb2 - - -class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_calls_conv_argscope_fn(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams - self.assertAlmostEqual((hyperparams_proto.regularizer. - l1_regularizer.weight), - (conv_hyperparams_actual.regularizer.l1_regularizer. - weight)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.stddev), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.stddev)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.mean), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.mean)) - self.assertEqual(hyperparams_proto.activation, - conv_hyperparams_actual.activation) - self.assertFalse(is_training) - - def test_construct_non_default_conv_box_predictor(self): - box_predictor_text_proto = """ - convolutional_box_predictor { - min_depth: 2 - max_depth: 16 - num_layers_before_predictor: 2 - use_dropout: false - dropout_keep_probability: 0.4 - kernel_size: 3 - box_code_size: 3 - apply_sigmoid_to_scores: true - class_prediction_bias_init: 4.0 - } - """ - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - self.assertEqual(box_predictor._min_depth, 2) - self.assertEqual(box_predictor._max_depth, 16) - self.assertEqual(box_predictor._num_layers_before_predictor, 2) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.4) - self.assertTrue(box_predictor._apply_sigmoid_to_scores) - self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0) - self.assertEqual(box_predictor.num_classes, 10) - self.assertFalse(box_predictor._is_training) - - def test_construct_default_conv_box_predictor(self): - box_predictor_text_proto = """ - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - }""" - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=hyperparams_builder.build, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor._min_depth, 0) - self.assertEqual(box_predictor._max_depth, 0) - self.assertEqual(box_predictor._num_layers_before_predictor, 0) - self.assertTrue(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) - self.assertFalse(box_predictor._apply_sigmoid_to_scores) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - - -class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_builder_calls_fc_argscope_fn(self): - fc_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - op: FC - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( - hyperparams_proto) - mock_argscope_fn = mock.Mock(return_value='arg_scope') - box_predictor = box_predictor_builder.build( - argscope_fn=mock_argscope_fn, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - mock_argscope_fn.assert_called_with(hyperparams_proto, False) - self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope') - - def test_non_default_mask_rcnn_box_predictor(self): - fc_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - op: FC - """ - box_predictor_text_proto = """ - mask_rcnn_box_predictor { - use_dropout: true - dropout_keep_probability: 0.8 - box_code_size: 3 - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto) - def mock_fc_argscope_builder(fc_hyperparams_arg, is_training): - return (fc_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_fc_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertTrue(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 3) - - def test_build_default_mask_rcnn_box_predictor(self): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( - hyperparams_pb2.Hyperparams.FC) - box_predictor = box_predictor_builder.build( - argscope_fn=mock.Mock(return_value='arg_scope'), - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertFalse(box_predictor._predict_instance_masks) - self.assertFalse(box_predictor._predict_keypoints) - - def test_build_box_predictor_with_mask_branch(self): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = ( - hyperparams_pb2.Hyperparams.FC) - box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = ( - hyperparams_pb2.Hyperparams.CONV) - box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True - box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512 - mock_argscope_fn = mock.Mock(return_value='arg_scope') - box_predictor = box_predictor_builder.build( - argscope_fn=mock_argscope_fn, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - mock_argscope_fn.assert_has_calls( - [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams, - True), - mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, - True)], any_order=True) - self.assertFalse(box_predictor._use_dropout) - self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertTrue(box_predictor._predict_instance_masks) - self.assertEqual(box_predictor._mask_prediction_conv_depth, 512) - self.assertFalse(box_predictor._predict_keypoints) - - -class RfcnBoxPredictorBuilderTest(tf.test.TestCase): - - def test_box_predictor_calls_fc_argscope_fn(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.0003 - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.3 - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=False, - num_classes=10) - (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams - self.assertAlmostEqual((hyperparams_proto.regularizer. - l1_regularizer.weight), - (conv_hyperparams_actual.regularizer.l1_regularizer. - weight)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.stddev), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.stddev)) - self.assertAlmostEqual((hyperparams_proto.initializer. - truncated_normal_initializer.mean), - (conv_hyperparams_actual.initializer. - truncated_normal_initializer.mean)) - self.assertEqual(hyperparams_proto.activation, - conv_hyperparams_actual.activation) - self.assertFalse(is_training) - - def test_non_default_rfcn_box_predictor(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - box_predictor_text_proto = """ - rfcn_box_predictor { - num_spatial_bins_height: 4 - num_spatial_bins_width: 4 - depth: 4 - box_code_size: 3 - crop_height: 16 - crop_width: 16 - } - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(box_predictor_text_proto, box_predictor_proto) - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 3) - self.assertEqual(box_predictor._num_spatial_bins, [4, 4]) - self.assertEqual(box_predictor._crop_size, [16, 16]) - - def test_default_rfcn_box_predictor(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto) - def mock_conv_argscope_builder(conv_hyperparams_arg, is_training): - return (conv_hyperparams_arg, is_training) - - box_predictor_proto = box_predictor_pb2.BoxPredictor() - box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom( - hyperparams_proto) - box_predictor = box_predictor_builder.build( - argscope_fn=mock_conv_argscope_builder, - box_predictor_config=box_predictor_proto, - is_training=True, - num_classes=90) - self.assertEqual(box_predictor.num_classes, 90) - self.assertTrue(box_predictor._is_training) - self.assertEqual(box_predictor._box_code_size, 4) - self.assertEqual(box_predictor._num_spatial_bins, [3, 3]) - self.assertEqual(box_predictor._crop_size, [12, 12]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/hyperparams_builder.py b/object_detection/builders/hyperparams_builder.py deleted file mode 100644 index 094ff023..00000000 --- a/object_detection/builders/hyperparams_builder.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder function to construct tf-slim arg_scope for convolution, fc ops.""" -import tensorflow as tf - -from object_detection.protos import hyperparams_pb2 - -slim = tf.contrib.slim - - -def build(hyperparams_config, is_training): - """Builds tf-slim arg_scope for convolution ops based on the config. - - Returns an arg_scope to use for convolution ops containing weights - initializer, weights regularizer, activation function, batch norm function - and batch norm parameters based on the configuration. - - Note that if the batch_norm parameteres are not specified in the config - (i.e. left to default) then batch norm is excluded from the arg_scope. - - The batch norm parameters are set for updates based on `is_training` argument - and conv_hyperparams_config.batch_norm.train parameter. During training, they - are updated only if batch_norm.train parameter is true. However, during eval, - no updates are made to the batch norm variables. In both cases, their current - values are used during forward pass. - - Args: - hyperparams_config: hyperparams.proto object containing - hyperparameters. - is_training: Whether the network is in training mode. - - Returns: - arg_scope: tf-slim arg_scope containing hyperparameters for ops. - - Raises: - ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. - """ - if not isinstance(hyperparams_config, - hyperparams_pb2.Hyperparams): - raise ValueError('hyperparams_config not of type ' - 'hyperparams_pb.Hyperparams.') - - batch_norm = None - batch_norm_params = None - if hyperparams_config.HasField('batch_norm'): - batch_norm = slim.batch_norm - batch_norm_params = _build_batch_norm_params( - hyperparams_config.batch_norm, is_training) - - affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose] - if hyperparams_config.HasField('op') and ( - hyperparams_config.op == hyperparams_pb2.Hyperparams.FC): - affected_ops = [slim.fully_connected] - with slim.arg_scope( - affected_ops, - weights_regularizer=_build_regularizer( - hyperparams_config.regularizer), - weights_initializer=_build_initializer( - hyperparams_config.initializer), - activation_fn=_build_activation_fn(hyperparams_config.activation), - normalizer_fn=batch_norm, - normalizer_params=batch_norm_params) as sc: - return sc - - -def _build_activation_fn(activation_fn): - """Builds a callable activation from config. - - Args: - activation_fn: hyperparams_pb2.Hyperparams.activation - - Returns: - Callable activation function. - - Raises: - ValueError: On unknown activation function. - """ - if activation_fn == hyperparams_pb2.Hyperparams.NONE: - return None - if activation_fn == hyperparams_pb2.Hyperparams.RELU: - return tf.nn.relu - if activation_fn == hyperparams_pb2.Hyperparams.RELU_6: - return tf.nn.relu6 - raise ValueError('Unknown activation function: {}'.format(activation_fn)) - - -def _build_regularizer(regularizer): - """Builds a tf-slim regularizer from config. - - Args: - regularizer: hyperparams_pb2.Hyperparams.regularizer proto. - - Returns: - tf-slim regularizer. - - Raises: - ValueError: On unknown regularizer. - """ - regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') - if regularizer_oneof == 'l1_regularizer': - return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight)) - if regularizer_oneof == 'l2_regularizer': - return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight)) - raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) - - -def _build_initializer(initializer): - """Build a tf initializer from config. - - Args: - initializer: hyperparams_pb2.Hyperparams.regularizer proto. - - Returns: - tf initializer. - - Raises: - ValueError: On unknown initializer. - """ - initializer_oneof = initializer.WhichOneof('initializer_oneof') - if initializer_oneof == 'truncated_normal_initializer': - return tf.truncated_normal_initializer( - mean=initializer.truncated_normal_initializer.mean, - stddev=initializer.truncated_normal_initializer.stddev) - if initializer_oneof == 'variance_scaling_initializer': - enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. - DESCRIPTOR.enum_types_by_name['Mode']) - mode = enum_descriptor.values_by_number[initializer. - variance_scaling_initializer. - mode].name - return slim.variance_scaling_initializer( - factor=initializer.variance_scaling_initializer.factor, - mode=mode, - uniform=initializer.variance_scaling_initializer.uniform) - raise ValueError('Unknown initializer function: {}'.format( - initializer_oneof)) - - -def _build_batch_norm_params(batch_norm, is_training): - """Build a dictionary of batch_norm params from config. - - Args: - batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. - is_training: Whether the models is in training mode. - - Returns: - A dictionary containing batch_norm parameters. - """ - batch_norm_params = { - 'decay': batch_norm.decay, - 'center': batch_norm.center, - 'scale': batch_norm.scale, - 'epsilon': batch_norm.epsilon, - 'is_training': is_training and batch_norm.train, - } - return batch_norm_params diff --git a/object_detection/builders/hyperparams_builder_test.py b/object_detection/builders/hyperparams_builder_test.py deleted file mode 100644 index a9808076..00000000 --- a/object_detection/builders/hyperparams_builder_test.py +++ /dev/null @@ -1,449 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests object_detection.core.hyperparams_builder.""" - -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import hyperparams_builder -from object_detection.protos import hyperparams_pb2 - -slim = tf.contrib.slim - - -class HyperparamsBuilderTest(tf.test.TestCase): - - # TODO: Make this a public api in slim arg_scope.py. - def _get_scope_key(self, op): - return getattr(op, '_key_op', str(op)) - - def test_default_arg_scope_has_conv2d_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - self.assertTrue(self._get_scope_key(slim.conv2d) in scope) - - def test_default_arg_scope_has_separable_conv2d_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope) - - def test_default_arg_scope_has_conv2d_transpose_op(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope) - - def test_explicit_fc_op_arg_scope_has_fully_connected_op(self): - conv_hyperparams_text_proto = """ - op: FC - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - self.assertTrue(self._get_scope_key(slim.fully_connected) in scope) - - def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - kwargs_1, kwargs_2, kwargs_3 = scope.values() - self.assertDictEqual(kwargs_1, kwargs_2) - self.assertDictEqual(kwargs_1, kwargs_3) - - def test_return_l1_regularized_weights(self): - conv_hyperparams_text_proto = """ - regularizer { - l1_regularizer { - weight: 0.5 - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - regularizer = conv_scope_arguments['weights_regularizer'] - weights = np.array([1., -1, 4., 2.]) - with self.test_session() as sess: - result = sess.run(regularizer(tf.constant(weights))) - self.assertAllClose(np.abs(weights).sum() * 0.5, result) - - def test_return_l2_regularizer_weights(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - weight: 0.42 - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - - regularizer = conv_scope_arguments['weights_regularizer'] - weights = np.array([1., -1, 4., 2.]) - with self.test_session() as sess: - result = sess.run(regularizer(tf.constant(weights))) - self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) - - def test_return_non_default_batch_norm_params_with_train_during_train(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: true - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = conv_scope_arguments['normalizer_params'] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertTrue(batch_norm_params['is_training']) - - def test_return_batch_norm_params_with_notrain_during_eval(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: true - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=False) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = conv_scope_arguments['normalizer_params'] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertFalse(batch_norm_params['is_training']) - - def test_return_batch_norm_params_with_notrain_when_train_is_false(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - batch_norm { - decay: 0.7 - center: false - scale: true - epsilon: 0.03 - train: false - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) - batch_norm_params = conv_scope_arguments['normalizer_params'] - self.assertAlmostEqual(batch_norm_params['decay'], 0.7) - self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) - self.assertFalse(batch_norm_params['center']) - self.assertTrue(batch_norm_params['scale']) - self.assertFalse(batch_norm_params['is_training']) - - def test_do_not_use_batch_norm_if_default(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['normalizer_fn'], None) - self.assertEqual(conv_scope_arguments['normalizer_params'], None) - - def test_use_none_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: NONE - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['activation_fn'], None) - - def test_use_relu_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu) - - def test_use_relu_6_activation(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - activation: RELU_6 - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6) - - def _assert_variance_in_range(self, initializer, shape, variance, - tol=1e-2): - with tf.Graph().as_default() as g: - with self.test_session(graph=g) as sess: - var = tf.get_variable( - name='test', - shape=shape, - dtype=tf.float32, - initializer=initializer) - sess.run(tf.global_variables_initializer()) - values = sess.run(var) - self.assertAllClose(np.var(values), variance, tol, tol) - - def test_variance_in_range_with_variance_scaling_initializer_fan_in(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_IN - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 100.) - - def test_variance_in_range_with_variance_scaling_initializer_fan_out(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_OUT - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 40.) - - def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_AVG - uniform: false - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=4. / (100. + 40.)) - - def test_variance_in_range_with_variance_scaling_initializer_uniform(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - variance_scaling_initializer { - factor: 2.0 - mode: FAN_IN - uniform: true - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=2. / 100.) - - def test_variance_in_range_with_truncated_normal_initializer(self): - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - mean: 0.0 - stddev: 0.8 - } - } - """ - conv_hyperparams_proto = hyperparams_pb2.Hyperparams() - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) - scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) - conv_scope_arguments = scope.values()[0] - initializer = conv_scope_arguments['weights_initializer'] - self._assert_variance_in_range(initializer, shape=[100, 40], - variance=0.49, tol=1e-1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/image_resizer_builder.py b/object_detection/builders/image_resizer_builder.py deleted file mode 100644 index 9d81c7d3..00000000 --- a/object_detection/builders/image_resizer_builder.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Builder function for image resizing operations.""" -import functools -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.protos import image_resizer_pb2 - - -def _tf_resize_method(resize_method): - """Maps image resize method from enumeration type to TensorFlow. - - Args: - resize_method: The resize_method attribute of keep_aspect_ratio_resizer or - fixed_shape_resizer. - - Returns: - method: The corresponding TensorFlow ResizeMethod. - - Raises: - ValueError: if `resize_method` is of unknown type. - """ - dict_method = { - image_resizer_pb2.BILINEAR: - tf.image.ResizeMethod.BILINEAR, - image_resizer_pb2.NEAREST_NEIGHBOR: - tf.image.ResizeMethod.NEAREST_NEIGHBOR, - image_resizer_pb2.BICUBIC: - tf.image.ResizeMethod.BICUBIC, - image_resizer_pb2.AREA: - tf.image.ResizeMethod.AREA - } - if resize_method in dict_method: - return dict_method[resize_method] - else: - raise ValueError('Unknown resize_method') - - -def build(image_resizer_config): - """Builds callable for image resizing operations. - - Args: - image_resizer_config: image_resizer.proto object containing parameters for - an image resizing operation. - - Returns: - image_resizer_fn: Callable for image resizing. This callable always takes - a rank-3 image tensor (corresponding to a single image) and returns a - rank-3 image tensor, possibly with new spatial dimensions. - - Raises: - ValueError: if `image_resizer_config` is of incorrect type. - ValueError: if `image_resizer_config.image_resizer_oneof` is of expected - type. - ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer - is used. - """ - if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer): - raise ValueError('image_resizer_config not of type ' - 'image_resizer_pb2.ImageResizer.') - - if image_resizer_config.WhichOneof( - 'image_resizer_oneof') == 'keep_aspect_ratio_resizer': - keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer - if not (keep_aspect_ratio_config.min_dimension <= - keep_aspect_ratio_config.max_dimension): - raise ValueError('min_dimension > max_dimension') - method = _tf_resize_method(keep_aspect_ratio_config.resize_method) - return functools.partial( - preprocessor.resize_to_range, - min_dimension=keep_aspect_ratio_config.min_dimension, - max_dimension=keep_aspect_ratio_config.max_dimension, - method=method) - if image_resizer_config.WhichOneof( - 'image_resizer_oneof') == 'fixed_shape_resizer': - fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer - method = _tf_resize_method(fixed_shape_resizer_config.resize_method) - return functools.partial( - preprocessor.resize_image, - new_height=fixed_shape_resizer_config.height, - new_width=fixed_shape_resizer_config.width, - method=method) - raise ValueError('Invalid image resizer option.') diff --git a/object_detection/builders/image_resizer_builder_test.py b/object_detection/builders/image_resizer_builder_test.py deleted file mode 100644 index 4ef557a5..00000000 --- a/object_detection/builders/image_resizer_builder_test.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.builders.image_resizer_builder.""" -import numpy as np -import tensorflow as tf -from google.protobuf import text_format -from object_detection.builders import image_resizer_builder -from object_detection.protos import image_resizer_pb2 - - -class ImageResizerBuilderTest(tf.test.TestCase): - - def _shape_of_resized_random_image_given_text_proto(self, input_shape, - text_proto): - image_resizer_config = image_resizer_pb2.ImageResizer() - text_format.Merge(text_proto, image_resizer_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - images = tf.to_float( - tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32)) - resized_images = image_resizer_fn(images) - with self.test_session() as sess: - return sess.run(resized_images).shape - - def test_built_keep_aspect_ratio_resizer_returns_expected_shape(self): - image_resizer_text_proto = """ - keep_aspect_ratio_resizer { - min_dimension: 10 - max_dimension: 20 - } - """ - input_shape = (50, 25, 3) - expected_output_shape = (20, 10, 3) - output_shape = self._shape_of_resized_random_image_given_text_proto( - input_shape, image_resizer_text_proto) - self.assertEqual(output_shape, expected_output_shape) - - def test_built_fixed_shape_resizer_returns_expected_shape(self): - image_resizer_text_proto = """ - fixed_shape_resizer { - height: 10 - width: 20 - } - """ - input_shape = (50, 25, 3) - expected_output_shape = (10, 20, 3) - output_shape = self._shape_of_resized_random_image_given_text_proto( - input_shape, image_resizer_text_proto) - self.assertEqual(output_shape, expected_output_shape) - - def test_raises_error_on_invalid_input(self): - invalid_input = 'invalid_input' - with self.assertRaises(ValueError): - image_resizer_builder.build(invalid_input) - - def _resized_image_given_text_proto(self, image, text_proto): - image_resizer_config = image_resizer_pb2.ImageResizer() - text_format.Merge(text_proto, image_resizer_config) - image_resizer_fn = image_resizer_builder.build(image_resizer_config) - image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3]) - resized_image = image_resizer_fn(image_placeholder) - with self.test_session() as sess: - return sess.run(resized_image, feed_dict={image_placeholder: image}) - - def test_fixed_shape_resizer_nearest_neighbor_method(self): - image_resizer_text_proto = """ - fixed_shape_resizer { - height: 1 - width: 1 - resize_method: NEAREST_NEIGHBOR - } - """ - image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - image = np.expand_dims(image, axis=2) - image = np.tile(image, (1, 1, 3)) - image = np.expand_dims(image, axis=0) - resized_image = self._resized_image_given_text_proto( - image, image_resizer_text_proto) - vals = np.unique(resized_image).tolist() - self.assertEqual(len(vals), 1) - self.assertEqual(vals[0], 1) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/input_reader_builder.py b/object_detection/builders/input_reader_builder.py deleted file mode 100644 index 530e879c..00000000 --- a/object_detection/builders/input_reader_builder.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Input reader builder. - -Creates data sources for DetectionModels from an InputReader config. See -input_reader.proto for options. - -Note: If users wishes to also use their own InputReaders with the Object -Detection configuration framework, they should define their own builder function -that wraps the build function. -""" - -import tensorflow as tf - -from object_detection.data_decoders import tf_example_decoder -from object_detection.protos import input_reader_pb2 - -parallel_reader = tf.contrib.slim.parallel_reader - - -def build(input_reader_config): - """Builds a tensor dictionary based on the InputReader config. - - Args: - input_reader_config: A input_reader_pb2.InputReader object. - - Returns: - A tensor dict based on the input_reader_config. - - Raises: - ValueError: On invalid input reader proto. - ValueError: If no input paths are specified. - """ - if not isinstance(input_reader_config, input_reader_pb2.InputReader): - raise ValueError('input_reader_config not of type ' - 'input_reader_pb2.InputReader.') - - if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader': - config = input_reader_config.tf_record_input_reader - if not config.input_path: - raise ValueError('At least one input path must be specified in ' - '`input_reader_config`.') - _, string_tensor = parallel_reader.parallel_read( - config.input_path[:], # Convert `RepeatedScalarContainer` to list. - reader_class=tf.TFRecordReader, - num_epochs=(input_reader_config.num_epochs - if input_reader_config.num_epochs else None), - num_readers=input_reader_config.num_readers, - shuffle=input_reader_config.shuffle, - dtypes=[tf.string, tf.string], - capacity=input_reader_config.queue_capacity, - min_after_dequeue=input_reader_config.min_after_dequeue) - - label_map_proto_file = None - if input_reader_config.HasField('label_map_path'): - label_map_proto_file = input_reader_config.label_map_path - decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=input_reader_config.load_instance_masks, - label_map_proto_file=label_map_proto_file) - return decoder.decode(string_tensor) - - raise ValueError('Unsupported input_reader_config.') diff --git a/object_detection/builders/input_reader_builder_test.py b/object_detection/builders/input_reader_builder_test.py deleted file mode 100644 index f09f60e5..00000000 --- a/object_detection/builders/input_reader_builder_test.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for input_reader_builder.""" - -import os -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format - -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 -from object_detection.builders import input_reader_builder -from object_detection.core import standard_fields as fields -from object_detection.protos import input_reader_pb2 - - -class InputReaderBuilderTest(tf.test.TestCase): - - def create_tf_record(self): - path = os.path.join(self.get_temp_dir(), 'tfrecord') - writer = tf.python_io.TFRecordWriter(path) - - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - flat_mask = (4 * 5) * [1.0] - with self.test_session(): - encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() - example = example_pb2.Example(features=feature_pb2.Features(feature={ - 'image/encoded': feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), - 'image/format': feature_pb2.Feature( - bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), - 'image/height': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[4])), - 'image/width': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[5])), - 'image/object/bbox/xmin': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/xmax': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/bbox/ymin': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[0.0])), - 'image/object/bbox/ymax': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=[1.0])), - 'image/object/class/label': feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[2])), - 'image/object/mask': feature_pb2.Feature( - float_list=feature_pb2.FloatList(value=flat_mask)), - })) - writer.write(example.SerializeToString()) - writer.close() - - return path - - def test_build_tf_record_input_reader(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = input_reader_builder.build(input_reader_proto) - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertTrue(fields.InputDataFields.groundtruth_instance_masks - not in output_dict) - self.assertEquals( - (4, 5, 3), output_dict[fields.InputDataFields.image].shape) - self.assertEquals( - [2], output_dict[fields.InputDataFields.groundtruth_classes]) - self.assertEquals( - (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) - self.assertAllEqual( - [0.0, 0.0, 1.0, 1.0], - output_dict[fields.InputDataFields.groundtruth_boxes][0]) - - def test_build_tf_record_input_reader_and_load_instance_masks(self): - tf_record_path = self.create_tf_record() - - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - tf_record_input_reader {{ - input_path: '{0}' - }} - """.format(tf_record_path) - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - tensor_dict = input_reader_builder.build(input_reader_proto) - - sv = tf.train.Supervisor(logdir=self.get_temp_dir()) - with sv.prepare_or_wait_for_session() as sess: - sv.start_queue_runners(sess) - output_dict = sess.run(tensor_dict) - - self.assertEquals( - (4, 5, 3), output_dict[fields.InputDataFields.image].shape) - self.assertEquals( - [2], output_dict[fields.InputDataFields.groundtruth_classes]) - self.assertEquals( - (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape) - self.assertAllEqual( - [0.0, 0.0, 1.0, 1.0], - output_dict[fields.InputDataFields.groundtruth_boxes][0]) - self.assertAllEqual( - (1, 4, 5), - output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) - - def test_raises_error_with_no_input_paths(self): - input_reader_text_proto = """ - shuffle: false - num_readers: 1 - load_instance_masks: true - """ - input_reader_proto = input_reader_pb2.InputReader() - text_format.Merge(input_reader_text_proto, input_reader_proto) - with self.assertRaises(ValueError): - input_reader_builder.build(input_reader_proto) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/losses_builder.py b/object_detection/builders/losses_builder.py deleted file mode 100644 index c2b0a1f1..00000000 --- a/object_detection/builders/losses_builder.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build localization and classification losses from config.""" - -from object_detection.core import losses -from object_detection.protos import losses_pb2 - - -def build(loss_config): - """Build losses based on the config. - - Builds classification, localization losses and optionally a hard example miner - based on the config. - - Args: - loss_config: A losses_pb2.Loss object. - - Returns: - classification_loss: Classification loss object. - localization_loss: Localization loss object. - classification_weight: Classification loss weight. - localization_weight: Localization loss weight. - hard_example_miner: Hard example miner object. - - Raises: - ValueError: If hard_example_miner is used with sigmoid_focal_loss. - """ - classification_loss = _build_classification_loss( - loss_config.classification_loss) - localization_loss = _build_localization_loss( - loss_config.localization_loss) - classification_weight = loss_config.classification_weight - localization_weight = loss_config.localization_weight - hard_example_miner = None - if loss_config.HasField('hard_example_miner'): - if (loss_config.classification_loss.WhichOneof('classification_loss') == - 'weighted_sigmoid_focal'): - raise ValueError('HardExampleMiner should not be used with sigmoid focal ' - 'loss') - hard_example_miner = build_hard_example_miner( - loss_config.hard_example_miner, - classification_weight, - localization_weight) - return (classification_loss, localization_loss, - classification_weight, - localization_weight, hard_example_miner) - - -def build_hard_example_miner(config, - classification_weight, - localization_weight): - """Builds hard example miner based on the config. - - Args: - config: A losses_pb2.HardExampleMiner object. - classification_weight: Classification loss weight. - localization_weight: Localization loss weight. - - Returns: - Hard example miner. - - """ - loss_type = None - if config.loss_type == losses_pb2.HardExampleMiner.BOTH: - loss_type = 'both' - if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION: - loss_type = 'cls' - if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION: - loss_type = 'loc' - - max_negatives_per_positive = None - num_hard_examples = None - if config.max_negatives_per_positive > 0: - max_negatives_per_positive = config.max_negatives_per_positive - if config.num_hard_examples > 0: - num_hard_examples = config.num_hard_examples - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=num_hard_examples, - iou_threshold=config.iou_threshold, - loss_type=loss_type, - cls_loss_weight=classification_weight, - loc_loss_weight=localization_weight, - max_negatives_per_positive=max_negatives_per_positive, - min_negatives_per_image=config.min_negatives_per_image) - return hard_example_miner - - -def build_faster_rcnn_classification_loss(loss_config): - """Builds a classification loss for Faster RCNN based on the loss config. - - Args: - loss_config: A losses_pb2.ClassificationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.ClassificationLoss): - raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') - - loss_type = loss_config.WhichOneof('classification_loss') - - if loss_type == 'weighted_sigmoid': - config = loss_config.weighted_sigmoid - return losses.WeightedSigmoidClassificationLoss( - anchorwise_output=config.anchorwise_output) - if loss_type == 'weighted_softmax': - config = loss_config.weighted_softmax - return losses.WeightedSoftmaxClassificationLoss( - anchorwise_output=config.anchorwise_output) - - # By default, Faster RCNN second stage classifier uses Softmax loss - # with anchor-wise outputs. - return losses.WeightedSoftmaxClassificationLoss( - anchorwise_output=True) - - -def _build_localization_loss(loss_config): - """Builds a localization loss based on the loss config. - - Args: - loss_config: A losses_pb2.LocalizationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.LocalizationLoss): - raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.') - - loss_type = loss_config.WhichOneof('localization_loss') - - if loss_type == 'weighted_l2': - config = loss_config.weighted_l2 - return losses.WeightedL2LocalizationLoss( - anchorwise_output=config.anchorwise_output) - - if loss_type == 'weighted_smooth_l1': - config = loss_config.weighted_smooth_l1 - return losses.WeightedSmoothL1LocalizationLoss( - anchorwise_output=config.anchorwise_output) - - if loss_type == 'weighted_iou': - return losses.WeightedIOULocalizationLoss() - - raise ValueError('Empty loss config.') - - -def _build_classification_loss(loss_config): - """Builds a classification loss based on the loss config. - - Args: - loss_config: A losses_pb2.ClassificationLoss object. - - Returns: - Loss based on the config. - - Raises: - ValueError: On invalid loss_config. - """ - if not isinstance(loss_config, losses_pb2.ClassificationLoss): - raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.') - - loss_type = loss_config.WhichOneof('classification_loss') - - if loss_type == 'weighted_sigmoid': - config = loss_config.weighted_sigmoid - return losses.WeightedSigmoidClassificationLoss( - anchorwise_output=config.anchorwise_output) - - if loss_type == 'weighted_sigmoid_focal': - config = loss_config.weighted_sigmoid_focal - alpha = None - if config.HasField('alpha'): - alpha = config.alpha - return losses.SigmoidFocalClassificationLoss( - anchorwise_output=config.anchorwise_output, - gamma=config.gamma, - alpha=alpha) - - if loss_type == 'weighted_softmax': - config = loss_config.weighted_softmax - return losses.WeightedSoftmaxClassificationLoss( - anchorwise_output=config.anchorwise_output, - logit_scale=config.logit_scale) - - if loss_type == 'bootstrapped_sigmoid': - config = loss_config.bootstrapped_sigmoid - return losses.BootstrappedSigmoidClassificationLoss( - alpha=config.alpha, - bootstrap_type=('hard' if config.hard_bootstrap else 'soft'), - anchorwise_output=config.anchorwise_output) - - raise ValueError('Empty loss config.') diff --git a/object_detection/builders/losses_builder_test.py b/object_detection/builders/losses_builder_test.py deleted file mode 100644 index d4105203..00000000 --- a/object_detection/builders/losses_builder_test.py +++ /dev/null @@ -1,438 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for losses_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import losses_builder -from object_detection.core import losses -from object_detection.protos import losses_pb2 - - -class LocalizationLossBuilderTest(tf.test.TestCase): - - def test_build_weighted_l2_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedL2LocalizationLoss)) - - def test_build_weighted_smooth_l1_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_smooth_l1 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedSmoothL1LocalizationLoss)) - - def test_build_weighted_iou_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_iou { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedIOULocalizationLoss)) - - def test_anchorwise_output(self): - losses_text_proto = """ - localization_loss { - weighted_smooth_l1 { - anchorwise_output: true - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, localization_loss, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(localization_loss, - losses.WeightedSmoothL1LocalizationLoss)) - predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) - targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]]) - weights = tf.constant([[1.0, 1.0]]) - loss = localization_loss(predictions, targets, weights=weights) - self.assertEqual(loss.shape, [1, 2]) - - def test_raise_error_on_empty_localization_config(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder._build_localization_loss(losses_proto) - - -class ClassificationLossBuilderTest(tf.test.TestCase): - - def test_build_weighted_sigmoid_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - - def test_build_weighted_sigmoid_focal_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid_focal { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.SigmoidFocalClassificationLoss)) - self.assertAlmostEqual(classification_loss._alpha, None) - self.assertAlmostEqual(classification_loss._gamma, 2.0) - - def test_build_weighted_sigmoid_focal_loss_non_default(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid_focal { - alpha: 0.25 - gamma: 3.0 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.SigmoidFocalClassificationLoss)) - self.assertAlmostEqual(classification_loss._alpha, 0.25) - self.assertAlmostEqual(classification_loss._gamma, 3.0) - - def test_build_weighted_softmax_classification_loss(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_weighted_softmax_classification_loss_with_logit_scale(self): - losses_text_proto = """ - classification_loss { - weighted_softmax { - logit_scale: 2.0 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_bootstrapped_sigmoid_classification_loss(self): - losses_text_proto = """ - classification_loss { - bootstrapped_sigmoid { - alpha: 0.5 - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.BootstrappedSigmoidClassificationLoss)) - - def test_anchorwise_output(self): - losses_text_proto = """ - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss, _, _, _, _ = losses_builder.build(losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]]) - targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]]) - weights = tf.constant([[1.0, 1.0]]) - loss = classification_loss(predictions, targets, weights=weights) - self.assertEqual(loss.shape, [1, 2]) - - def test_raise_error_on_empty_config(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder.build(losses_proto) - - -class HardExampleMinerBuilderTest(tf.test.TestCase): - - def test_do_not_build_hard_example_miner_by_default(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner = losses_builder.build(losses_proto) - self.assertEqual(hard_example_miner, None) - - def test_build_hard_example_miner_for_classification_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - loss_type: CLASSIFICATION - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._loss_type, 'cls') - - def test_build_hard_example_miner_for_localization_loss(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - loss_type: LOCALIZATION - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._loss_type, 'loc') - - def test_build_hard_example_miner_with_non_default_values(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - num_hard_examples: 32 - iou_threshold: 0.5 - loss_type: LOCALIZATION - max_negatives_per_positive: 10 - min_negatives_per_image: 3 - } - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - _, _, _, _, hard_example_miner = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertEqual(hard_example_miner._num_hard_examples, 32) - self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5) - self.assertEqual(hard_example_miner._max_negatives_per_positive, 10) - self.assertEqual(hard_example_miner._min_negatives_per_image, 3) - - -class LossBuilderTest(tf.test.TestCase): - - def test_build_all_loss_parameters(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_softmax { - } - } - hard_example_miner { - } - classification_weight: 0.8 - localization_weight: 0.2 - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - (classification_loss, localization_loss, - classification_weight, localization_weight, - hard_example_miner) = losses_builder.build(losses_proto) - self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner)) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - self.assertTrue(isinstance(localization_loss, - losses.WeightedL2LocalizationLoss)) - self.assertAlmostEqual(classification_weight, 0.8) - self.assertAlmostEqual(localization_weight, 0.2) - - def test_raise_error_when_both_focal_loss_and_hard_example_miner(self): - losses_text_proto = """ - localization_loss { - weighted_l2 { - } - } - classification_loss { - weighted_sigmoid_focal { - } - } - hard_example_miner { - } - classification_weight: 0.8 - localization_weight: 0.2 - """ - losses_proto = losses_pb2.Loss() - text_format.Merge(losses_text_proto, losses_proto) - with self.assertRaises(ValueError): - losses_builder.build(losses_proto) - - -class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase): - - def test_build_sigmoid_loss(self): - losses_text_proto = """ - weighted_sigmoid { - } - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSigmoidClassificationLoss)) - - def test_build_softmax_loss(self): - losses_text_proto = """ - weighted_softmax { - } - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - def test_build_softmax_loss_by_default(self): - losses_text_proto = """ - """ - losses_proto = losses_pb2.ClassificationLoss() - text_format.Merge(losses_text_proto, losses_proto) - classification_loss = losses_builder.build_faster_rcnn_classification_loss( - losses_proto) - self.assertTrue(isinstance(classification_loss, - losses.WeightedSoftmaxClassificationLoss)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/matcher_builder.py b/object_detection/builders/matcher_builder.py deleted file mode 100644 index 6ec49da9..00000000 --- a/object_detection/builders/matcher_builder.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build an object detection matcher from configuration.""" - -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.protos import matcher_pb2 - - -def build(matcher_config): - """Builds a matcher object based on the matcher config. - - Args: - matcher_config: A matcher.proto object containing the config for the desired - Matcher. - - Returns: - Matcher based on the config. - - Raises: - ValueError: On empty matcher proto. - """ - if not isinstance(matcher_config, matcher_pb2.Matcher): - raise ValueError('matcher_config not of type matcher_pb2.Matcher.') - if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher': - matcher = matcher_config.argmax_matcher - matched_threshold = unmatched_threshold = None - if not matcher.ignore_thresholds: - matched_threshold = matcher.matched_threshold - unmatched_threshold = matcher.unmatched_threshold - return argmax_matcher.ArgMaxMatcher( - matched_threshold=matched_threshold, - unmatched_threshold=unmatched_threshold, - negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched, - force_match_for_each_row=matcher.force_match_for_each_row) - if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher': - return bipartite_matcher.GreedyBipartiteMatcher() - raise ValueError('Empty matcher.') diff --git a/object_detection/builders/matcher_builder_test.py b/object_detection/builders/matcher_builder_test.py deleted file mode 100644 index c4275aae..00000000 --- a/object_detection/builders/matcher_builder_test.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for matcher_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import matcher_builder -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher -from object_detection.protos import matcher_pb2 - - -class MatcherBuilderTest(tf.test.TestCase): - - def test_build_arg_max_matcher_with_defaults(self): - matcher_text_proto = """ - argmax_matcher { - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertAlmostEqual(matcher_object._matched_threshold, 0.5) - self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5) - self.assertTrue(matcher_object._negatives_lower_than_unmatched) - self.assertFalse(matcher_object._force_match_for_each_row) - - def test_build_arg_max_matcher_without_thresholds(self): - matcher_text_proto = """ - argmax_matcher { - ignore_thresholds: true - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertEqual(matcher_object._matched_threshold, None) - self.assertEqual(matcher_object._unmatched_threshold, None) - self.assertTrue(matcher_object._negatives_lower_than_unmatched) - self.assertFalse(matcher_object._force_match_for_each_row) - - def test_build_arg_max_matcher_with_non_default_parameters(self): - matcher_text_proto = """ - argmax_matcher { - matched_threshold: 0.7 - unmatched_threshold: 0.3 - negatives_lower_than_unmatched: false - force_match_for_each_row: true - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher)) - self.assertAlmostEqual(matcher_object._matched_threshold, 0.7) - self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3) - self.assertFalse(matcher_object._negatives_lower_than_unmatched) - self.assertTrue(matcher_object._force_match_for_each_row) - - def test_build_bipartite_matcher(self): - matcher_text_proto = """ - bipartite_matcher { - } - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - matcher_object = matcher_builder.build(matcher_proto) - self.assertTrue( - isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher)) - - def test_raise_error_on_empty_matcher(self): - matcher_text_proto = """ - """ - matcher_proto = matcher_pb2.Matcher() - text_format.Merge(matcher_text_proto, matcher_proto) - with self.assertRaises(ValueError): - matcher_builder.build(matcher_proto) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/model_builder.py b/object_detection/builders/model_builder.py deleted file mode 100644 index 5467a91b..00000000 --- a/object_detection/builders/model_builder.py +++ /dev/null @@ -1,327 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A function to build a DetectionModel from configuration.""" -from object_detection.builders import anchor_generator_builder -from object_detection.builders import box_coder_builder -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.builders import image_resizer_builder -from object_detection.builders import losses_builder -from object_detection.builders import matcher_builder -from object_detection.builders import post_processing_builder -from object_detection.builders import region_similarity_calculator_builder as sim_calc -from object_detection.core import box_predictor -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.meta_architectures import rfcn_meta_arch -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2 -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 -from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor -from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor -from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor -from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor -from object_detection.protos import model_pb2 - -# A map of names to SSD feature extractors. -SSD_FEATURE_EXTRACTOR_CLASS_MAP = { - 'ssd_inception_v2': SSDInceptionV2FeatureExtractor, - 'ssd_inception_v3': SSDInceptionV3FeatureExtractor, - 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor, - 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor, -} - -# A map of names to Faster R-CNN feature extractors. -FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = { - 'faster_rcnn_nas': - frcnn_nas.FasterRCNNNASFeatureExtractor, - 'faster_rcnn_inception_resnet_v2': - frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor, - 'faster_rcnn_inception_v2': - frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor, - 'faster_rcnn_resnet50': - frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'faster_rcnn_resnet101': - frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'faster_rcnn_resnet152': - frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, -} - - -def build(model_config, is_training): - """Builds a DetectionModel based on the model config. - - Args: - model_config: A model.proto object containing the config for the desired - DetectionModel. - is_training: True if this model is being built for training purposes. - - Returns: - DetectionModel based on the config. - - Raises: - ValueError: On invalid meta architecture or model. - """ - if not isinstance(model_config, model_pb2.DetectionModel): - raise ValueError('model_config not of type model_pb2.DetectionModel.') - meta_architecture = model_config.WhichOneof('model') - if meta_architecture == 'ssd': - return _build_ssd_model(model_config.ssd, is_training) - if meta_architecture == 'faster_rcnn': - return _build_faster_rcnn_model(model_config.faster_rcnn, is_training) - raise ValueError('Unknown meta architecture: {}'.format(meta_architecture)) - - -def _build_ssd_feature_extractor(feature_extractor_config, is_training, - reuse_weights=None): - """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. - - Args: - feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. - is_training: True if this feature extractor is being built for training. - reuse_weights: if the feature extractor should reuse weights. - - Returns: - ssd_meta_arch.SSDFeatureExtractor based on config. - - Raises: - ValueError: On invalid feature extractor type. - """ - feature_type = feature_extractor_config.type - depth_multiplier = feature_extractor_config.depth_multiplier - min_depth = feature_extractor_config.min_depth - pad_to_multiple = feature_extractor_config.pad_to_multiple - batch_norm_trainable = feature_extractor_config.batch_norm_trainable - conv_hyperparams = hyperparams_builder.build( - feature_extractor_config.conv_hyperparams, is_training) - - if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: - raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) - - feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] - return feature_extractor_class(is_training, depth_multiplier, min_depth, - pad_to_multiple, conv_hyperparams, - batch_norm_trainable, reuse_weights) - - -def _build_ssd_model(ssd_config, is_training): - """Builds an SSD detection model based on the model config. - - Args: - ssd_config: A ssd.proto object containing the config for the desired - SSDMetaArch. - is_training: True if this model is being built for training purposes. - - Returns: - SSDMetaArch based on the config. - Raises: - ValueError: If ssd_config.type is not recognized (i.e. not registered in - model_class_map). - """ - num_classes = ssd_config.num_classes - - # Feature extractor - feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor, - is_training) - - box_coder = box_coder_builder.build(ssd_config.box_coder) - matcher = matcher_builder.build(ssd_config.matcher) - region_similarity_calculator = sim_calc.build( - ssd_config.similarity_calculator) - ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build, - ssd_config.box_predictor, - is_training, num_classes) - anchor_generator = anchor_generator_builder.build( - ssd_config.anchor_generator) - image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer) - non_max_suppression_fn, score_conversion_fn = post_processing_builder.build( - ssd_config.post_processing) - (classification_loss, localization_loss, classification_weight, - localization_weight, - hard_example_miner) = losses_builder.build(ssd_config.loss) - normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches - - return ssd_meta_arch.SSDMetaArch( - is_training, - anchor_generator, - ssd_box_predictor, - box_coder, - feature_extractor, - matcher, - region_similarity_calculator, - image_resizer_fn, - non_max_suppression_fn, - score_conversion_fn, - classification_loss, - localization_loss, - classification_weight, - localization_weight, - normalize_loss_by_num_matches, - hard_example_miner) - - -def _build_faster_rcnn_feature_extractor( - feature_extractor_config, is_training, reuse_weights=None): - """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. - - Args: - feature_extractor_config: A FasterRcnnFeatureExtractor proto config from - faster_rcnn.proto. - is_training: True if this feature extractor is being built for training. - reuse_weights: if the feature extractor should reuse weights. - - Returns: - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config. - - Raises: - ValueError: On invalid feature extractor type. - """ - feature_type = feature_extractor_config.type - first_stage_features_stride = ( - feature_extractor_config.first_stage_features_stride) - batch_norm_trainable = feature_extractor_config.batch_norm_trainable - - if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP: - raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format( - feature_type)) - feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[ - feature_type] - return feature_extractor_class( - is_training, first_stage_features_stride, - batch_norm_trainable, reuse_weights) - - -def _build_faster_rcnn_model(frcnn_config, is_training): - """Builds a Faster R-CNN or R-FCN detection model based on the model config. - - Builds R-FCN model if the second_stage_box_predictor in the config is of type - `rfcn_box_predictor` else builds a Faster R-CNN model. - - Args: - frcnn_config: A faster_rcnn.proto object containing the config for the - desired FasterRCNNMetaArch or RFCNMetaArch. - is_training: True if this model is being built for training purposes. - - Returns: - FasterRCNNMetaArch based on the config. - Raises: - ValueError: If frcnn_config.type is not recognized (i.e. not registered in - model_class_map). - """ - num_classes = frcnn_config.num_classes - image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) - - feature_extractor = _build_faster_rcnn_feature_extractor( - frcnn_config.feature_extractor, is_training) - - first_stage_only = frcnn_config.first_stage_only - first_stage_anchor_generator = anchor_generator_builder.build( - frcnn_config.first_stage_anchor_generator) - - first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate - first_stage_box_predictor_arg_scope = hyperparams_builder.build( - frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) - first_stage_box_predictor_kernel_size = ( - frcnn_config.first_stage_box_predictor_kernel_size) - first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth - first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size - first_stage_positive_balance_fraction = ( - frcnn_config.first_stage_positive_balance_fraction) - first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold - first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold - first_stage_max_proposals = frcnn_config.first_stage_max_proposals - first_stage_loc_loss_weight = ( - frcnn_config.first_stage_localization_loss_weight) - first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight - - initial_crop_size = frcnn_config.initial_crop_size - maxpool_kernel_size = frcnn_config.maxpool_kernel_size - maxpool_stride = frcnn_config.maxpool_stride - - second_stage_box_predictor = box_predictor_builder.build( - hyperparams_builder.build, - frcnn_config.second_stage_box_predictor, - is_training=is_training, - num_classes=num_classes) - second_stage_batch_size = frcnn_config.second_stage_batch_size - second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction - (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn - ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) - second_stage_localization_loss_weight = ( - frcnn_config.second_stage_localization_loss_weight) - second_stage_classification_loss = ( - losses_builder.build_faster_rcnn_classification_loss( - frcnn_config.second_stage_classification_loss)) - second_stage_classification_loss_weight = ( - frcnn_config.second_stage_classification_loss_weight) - second_stage_mask_prediction_loss_weight = ( - frcnn_config.second_stage_mask_prediction_loss_weight) - - hard_example_miner = None - if frcnn_config.HasField('hard_example_miner'): - hard_example_miner = losses_builder.build_hard_example_miner( - frcnn_config.hard_example_miner, - second_stage_classification_loss_weight, - second_stage_localization_loss_weight) - - common_kwargs = { - 'is_training': is_training, - 'num_classes': num_classes, - 'image_resizer_fn': image_resizer_fn, - 'feature_extractor': feature_extractor, - 'first_stage_only': first_stage_only, - 'first_stage_anchor_generator': first_stage_anchor_generator, - 'first_stage_atrous_rate': first_stage_atrous_rate, - 'first_stage_box_predictor_arg_scope': - first_stage_box_predictor_arg_scope, - 'first_stage_box_predictor_kernel_size': - first_stage_box_predictor_kernel_size, - 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, - 'first_stage_minibatch_size': first_stage_minibatch_size, - 'first_stage_positive_balance_fraction': - first_stage_positive_balance_fraction, - 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, - 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, - 'first_stage_max_proposals': first_stage_max_proposals, - 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, - 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, - 'second_stage_batch_size': second_stage_batch_size, - 'second_stage_balance_fraction': second_stage_balance_fraction, - 'second_stage_non_max_suppression_fn': - second_stage_non_max_suppression_fn, - 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, - 'second_stage_localization_loss_weight': - second_stage_localization_loss_weight, - 'second_stage_classification_loss': - second_stage_classification_loss, - 'second_stage_classification_loss_weight': - second_stage_classification_loss_weight, - 'hard_example_miner': hard_example_miner} - - if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): - return rfcn_meta_arch.RFCNMetaArch( - second_stage_rfcn_box_predictor=second_stage_box_predictor, - **common_kwargs) - else: - return faster_rcnn_meta_arch.FasterRCNNMetaArch( - initial_crop_size=initial_crop_size, - maxpool_kernel_size=maxpool_kernel_size, - maxpool_stride=maxpool_stride, - second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, - second_stage_mask_prediction_loss_weight=( - second_stage_mask_prediction_loss_weight), - **common_kwargs) diff --git a/object_detection/builders/model_builder_test.py b/object_detection/builders/model_builder_test.py deleted file mode 100644 index 5e217094..00000000 --- a/object_detection/builders/model_builder_test.py +++ /dev/null @@ -1,741 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.model_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import model_builder -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.meta_architectures import rfcn_meta_arch -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2 -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1 -from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor -from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor -from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor -from object_detection.protos import model_pb2 - -FEATURE_EXTRACTOR_MAPS = { - 'faster_rcnn_resnet50': - frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'faster_rcnn_resnet101': - frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'faster_rcnn_resnet152': - frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor -} - - -class ModelBuilderTest(tf.test.TestCase): - - def create_model(self, model_config): - """Builds a DetectionModel based on the model config. - - Args: - model_config: A model.proto object containing the config for the desired - DetectionModel. - - Returns: - DetectionModel based on the config. - """ - return model_builder.build(model_config, is_training=True) - - def test_create_ssd_inception_v2_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_inception_v2' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDInceptionV2FeatureExtractor) - - def test_create_ssd_inception_v3_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_inception_v3' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDInceptionV3FeatureExtractor) - - def test_create_ssd_mobilenet_v1_model_from_config(self): - model_text_proto = """ - ssd { - feature_extractor { - type: 'ssd_mobilenet_v1' - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - batch_norm_trainable: true - } - box_coder { - faster_rcnn_box_coder { - } - } - matcher { - argmax_matcher { - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - aspect_ratios: 1.0 - } - } - image_resizer { - fixed_shape_resizer { - height: 320 - width: 320 - } - } - box_predictor { - convolutional_box_predictor { - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - loss { - classification_loss { - weighted_softmax { - } - } - localization_loss { - weighted_smooth_l1 { - } - } - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = self.create_model(model_proto) - self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch) - self.assertIsInstance(model._feature_extractor, - SSDMobileNetV1FeatureExtractor) - self.assertTrue(model._feature_extractor._batch_norm_trainable) - - def test_create_faster_rcnn_resnet_v1_models_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items(): - model_proto.faster_rcnn.feature_extractor.type = extractor_type - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance(model._feature_extractor, extractor_class) - - def test_create_faster_rcnn_resnet101_with_mask_prediction_enabled(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - predict_instance_masks: true - } - } - second_stage_mask_prediction_loss_weight: 3.0 - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0) - - def test_create_faster_rcnn_nas_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_nas' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance( - model._feature_extractor, - frcnn_nas.FasterRCNNNASFeatureExtractor) - - def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance( - model._feature_extractor, - frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor) - - def test_create_faster_rcnn_inception_v2_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_v2' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch) - self.assertIsInstance(model._feature_extractor, - frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor) - - def test_create_faster_rcnn_model_from_config_with_example_miner(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - } - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - second_stage_box_predictor { - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - hard_example_miner { - num_hard_examples: 10 - iou_threshold: 0.99 - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - model = model_builder.build(model_proto, is_training=True) - self.assertIsNotNone(model._hard_example_miner) - - def test_create_rfcn_resnet_v1_model_from_config(self): - model_text_proto = """ - faster_rcnn { - num_classes: 3 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - rfcn_box_predictor { - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.01 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - }""" - model_proto = model_pb2.DetectionModel() - text_format.Merge(model_text_proto, model_proto) - for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items(): - model_proto.faster_rcnn.feature_extractor.type = extractor_type - model = model_builder.build(model_proto, is_training=True) - self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch) - self.assertIsInstance(model._feature_extractor, extractor_class) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/optimizer_builder.py b/object_detection/builders/optimizer_builder.py deleted file mode 100644 index cccaba99..00000000 --- a/object_detection/builders/optimizer_builder.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to build DetectionModel training optimizers.""" - -import tensorflow as tf -from object_detection.utils import learning_schedules - - -def build(optimizer_config, global_summaries): - """Create optimizer based on config. - - Args: - optimizer_config: A Optimizer proto message. - global_summaries: A set to attach learning rate summary to. - - Returns: - An optimizer. - - Raises: - ValueError: when using an unsupported input data type. - """ - optimizer_type = optimizer_config.WhichOneof('optimizer') - optimizer = None - - if optimizer_type == 'rms_prop_optimizer': - config = optimizer_config.rms_prop_optimizer - optimizer = tf.train.RMSPropOptimizer( - _create_learning_rate(config.learning_rate, global_summaries), - decay=config.decay, - momentum=config.momentum_optimizer_value, - epsilon=config.epsilon) - - if optimizer_type == 'momentum_optimizer': - config = optimizer_config.momentum_optimizer - optimizer = tf.train.MomentumOptimizer( - _create_learning_rate(config.learning_rate, global_summaries), - momentum=config.momentum_optimizer_value) - - if optimizer_type == 'adam_optimizer': - config = optimizer_config.adam_optimizer - optimizer = tf.train.AdamOptimizer( - _create_learning_rate(config.learning_rate, global_summaries)) - - if optimizer is None: - raise ValueError('Optimizer %s not supported.' % optimizer_type) - - if optimizer_config.use_moving_average: - optimizer = tf.contrib.opt.MovingAverageOptimizer( - optimizer, average_decay=optimizer_config.moving_average_decay) - - return optimizer - - -def _create_learning_rate(learning_rate_config, global_summaries): - """Create optimizer learning rate based on config. - - Args: - learning_rate_config: A LearningRate proto message. - global_summaries: A set to attach learning rate summary to. - - Returns: - A learning rate. - - Raises: - ValueError: when using an unsupported input data type. - """ - learning_rate = None - learning_rate_type = learning_rate_config.WhichOneof('learning_rate') - if learning_rate_type == 'constant_learning_rate': - config = learning_rate_config.constant_learning_rate - learning_rate = config.learning_rate - - if learning_rate_type == 'exponential_decay_learning_rate': - config = learning_rate_config.exponential_decay_learning_rate - learning_rate = tf.train.exponential_decay( - config.initial_learning_rate, - tf.train.get_or_create_global_step(), - config.decay_steps, - config.decay_factor, - staircase=config.staircase) - - if learning_rate_type == 'manual_step_learning_rate': - config = learning_rate_config.manual_step_learning_rate - if not config.schedule: - raise ValueError('Empty learning rate schedule.') - learning_rate_step_boundaries = [x.step for x in config.schedule] - learning_rate_sequence = [config.initial_learning_rate] - learning_rate_sequence += [x.learning_rate for x in config.schedule] - learning_rate = learning_schedules.manual_stepping( - tf.train.get_or_create_global_step(), learning_rate_step_boundaries, - learning_rate_sequence) - - if learning_rate_type == 'cosine_decay_learning_rate': - config = learning_rate_config.cosine_decay_learning_rate - learning_rate = learning_schedules.cosine_decay_with_warmup( - tf.train.get_or_create_global_step(), - config.learning_rate_base, - config.total_steps, - config.warmup_learning_rate, - config.warmup_steps) - - if learning_rate is None: - raise ValueError('Learning_rate %s not supported.' % learning_rate_type) - - global_summaries.add(tf.summary.scalar('Learning_Rate', learning_rate)) - return learning_rate diff --git a/object_detection/builders/optimizer_builder_test.py b/object_detection/builders/optimizer_builder_test.py deleted file mode 100644 index e5bcbba1..00000000 --- a/object_detection/builders/optimizer_builder_test.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for optimizer_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import optimizer_builder -from object_detection.protos import optimizer_pb2 - - -class LearningRateBuilderTest(tf.test.TestCase): - - def testBuildConstantLearningRate(self): - learning_rate_text_proto = """ - constant_learning_rate { - learning_rate: 0.004 - } - """ - global_summaries = set([]) - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto, global_summaries) - self.assertAlmostEqual(learning_rate, 0.004) - - def testBuildExponentialDecayLearningRate(self): - learning_rate_text_proto = """ - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 99999 - decay_factor: 0.85 - staircase: false - } - """ - global_summaries = set([]) - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto, global_summaries) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testBuildManualStepLearningRate(self): - learning_rate_text_proto = """ - manual_step_learning_rate { - schedule { - step: 0 - learning_rate: 0.006 - } - schedule { - step: 90000 - learning_rate: 0.00006 - } - } - """ - global_summaries = set([]) - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto, global_summaries) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testBuildCosineDecayLearningRate(self): - learning_rate_text_proto = """ - cosine_decay_learning_rate { - learning_rate_base: 0.002 - total_steps: 20000 - warmup_learning_rate: 0.0001 - warmup_steps: 1000 - } - """ - global_summaries = set([]) - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - learning_rate = optimizer_builder._create_learning_rate( - learning_rate_proto, global_summaries) - self.assertTrue(isinstance(learning_rate, tf.Tensor)) - - def testRaiseErrorOnEmptyLearningRate(self): - learning_rate_text_proto = """ - """ - global_summaries = set([]) - learning_rate_proto = optimizer_pb2.LearningRate() - text_format.Merge(learning_rate_text_proto, learning_rate_proto) - with self.assertRaises(ValueError): - optimizer_builder._create_learning_rate( - learning_rate_proto, global_summaries) - - -class OptimizerBuilderTest(tf.test.TestCase): - - def testBuildRMSPropOptimizer(self): - optimizer_text_proto = """ - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - use_moving_average: false - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer = optimizer_builder.build(optimizer_proto, global_summaries) - self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer)) - - def testBuildMomentumOptimizer(self): - optimizer_text_proto = """ - momentum_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.001 - } - } - momentum_optimizer_value: 0.99 - } - use_moving_average: false - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer = optimizer_builder.build(optimizer_proto, global_summaries) - self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer)) - - def testBuildAdamOptimizer(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: false - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer = optimizer_builder.build(optimizer_proto, global_summaries) - self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer)) - - def testBuildMovingAverageOptimizer(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: True - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer = optimizer_builder.build(optimizer_proto, global_summaries) - self.assertTrue( - isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) - - def testBuildMovingAverageOptimizerWithNonDefaultDecay(self): - optimizer_text_proto = """ - adam_optimizer: { - learning_rate: { - constant_learning_rate { - learning_rate: 0.002 - } - } - } - use_moving_average: True - moving_average_decay: 0.2 - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - optimizer = optimizer_builder.build(optimizer_proto, global_summaries) - self.assertTrue( - isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) - # TODO(rathodv): Find a way to not depend on the private members. - self.assertAlmostEqual(optimizer._ema._decay, 0.2) - - def testBuildEmptyOptimizer(self): - optimizer_text_proto = """ - """ - global_summaries = set([]) - optimizer_proto = optimizer_pb2.Optimizer() - text_format.Merge(optimizer_text_proto, optimizer_proto) - with self.assertRaises(ValueError): - optimizer_builder.build(optimizer_proto, global_summaries) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/post_processing_builder.py b/object_detection/builders/post_processing_builder.py deleted file mode 100644 index fa3a7728..00000000 --- a/object_detection/builders/post_processing_builder.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder function for post processing operations.""" -import functools - -import tensorflow as tf -from object_detection.core import post_processing -from object_detection.protos import post_processing_pb2 - - -def build(post_processing_config): - """Builds callables for post-processing operations. - - Builds callables for non-max suppression and score conversion based on the - configuration. - - Non-max suppression callable takes `boxes`, `scores`, and optionally - `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns - `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See - post_processing.batch_multiclass_non_max_suppression for the type and shape - of these tensors. - - Score converter callable should be called with `input` tensor. The callable - returns the output from one of 3 tf operations based on the configuration - - tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for - argument and return value descriptions. - - Args: - post_processing_config: post_processing.proto object containing the - parameters for the post-processing operations. - - Returns: - non_max_suppressor_fn: Callable for non-max suppression. - score_converter_fn: Callable for score conversion. - - Raises: - ValueError: if the post_processing_config is of incorrect type. - """ - if not isinstance(post_processing_config, post_processing_pb2.PostProcessing): - raise ValueError('post_processing_config not of type ' - 'post_processing_pb2.Postprocessing.') - non_max_suppressor_fn = _build_non_max_suppressor( - post_processing_config.batch_non_max_suppression) - score_converter_fn = _build_score_converter( - post_processing_config.score_converter, - post_processing_config.logit_scale) - return non_max_suppressor_fn, score_converter_fn - - -def _build_non_max_suppressor(nms_config): - """Builds non-max suppresson based on the nms config. - - Args: - nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto. - - Returns: - non_max_suppressor_fn: Callable non-max suppressor. - - Raises: - ValueError: On incorrect iou_threshold or on incompatible values of - max_total_detections and max_detections_per_class. - """ - if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0: - raise ValueError('iou_threshold not in [0, 1.0].') - if nms_config.max_detections_per_class > nms_config.max_total_detections: - raise ValueError('max_detections_per_class should be no greater than ' - 'max_total_detections.') - - non_max_suppressor_fn = functools.partial( - post_processing.batch_multiclass_non_max_suppression, - score_thresh=nms_config.score_threshold, - iou_thresh=nms_config.iou_threshold, - max_size_per_class=nms_config.max_detections_per_class, - max_total_size=nms_config.max_total_detections) - return non_max_suppressor_fn - - -def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale): - """Create a function to scale logits then apply a Tensorflow function.""" - def score_converter_fn(logits): - scaled_logits = tf.divide(logits, logit_scale, name='scale_logits') - return tf_score_converter_fn(scaled_logits, name='convert_scores') - score_converter_fn.__name__ = '%s_with_logit_scale' % ( - tf_score_converter_fn.__name__) - return score_converter_fn - - -def _build_score_converter(score_converter_config, logit_scale): - """Builds score converter based on the config. - - Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on - the config. - - Args: - score_converter_config: post_processing_pb2.PostProcessing.score_converter. - logit_scale: temperature to use for SOFTMAX score_converter. - - Returns: - Callable score converter op. - - Raises: - ValueError: On unknown score converter. - """ - if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY: - return _score_converter_fn_with_logit_scale(tf.identity, logit_scale) - if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID: - return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale) - if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX: - return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale) - raise ValueError('Unknown score converter.') diff --git a/object_detection/builders/post_processing_builder_test.py b/object_detection/builders/post_processing_builder_test.py deleted file mode 100644 index c39fbfb4..00000000 --- a/object_detection/builders/post_processing_builder_test.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for post_processing_builder.""" - -import tensorflow as tf -from google.protobuf import text_format -from object_detection.builders import post_processing_builder -from object_detection.protos import post_processing_pb2 - - -class PostProcessingBuilderTest(tf.test.TestCase): - - def test_build_non_max_suppressor_with_correct_parameters(self): - post_processing_text_proto = """ - batch_non_max_suppression { - score_threshold: 0.7 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - non_max_suppressor, _ = post_processing_builder.build( - post_processing_config) - self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100) - self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300) - self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7) - self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6) - - def test_build_identity_score_converter(self): - post_processing_text_proto = """ - score_converter: IDENTITY - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') - - inputs = tf.constant([1, 1], tf.float32) - outputs = score_converter(inputs) - with self.test_session() as sess: - converted_scores = sess.run(outputs) - expected_converted_scores = sess.run(inputs) - self.assertAllClose(converted_scores, expected_converted_scores) - - def test_build_identity_score_converter_with_logit_scale(self): - post_processing_text_proto = """ - score_converter: IDENTITY - logit_scale: 2.0 - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'identity_with_logit_scale') - - inputs = tf.constant([1, 1], tf.float32) - outputs = score_converter(inputs) - with self.test_session() as sess: - converted_scores = sess.run(outputs) - expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32)) - self.assertAllClose(converted_scores, expected_converted_scores) - - def test_build_sigmoid_score_converter(self): - post_processing_text_proto = """ - score_converter: SIGMOID - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale') - - def test_build_softmax_score_converter(self): - post_processing_text_proto = """ - score_converter: SOFTMAX - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') - - def test_build_softmax_score_converter_with_temperature(self): - post_processing_text_proto = """ - score_converter: SOFTMAX - logit_scale: 2.0 - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - _, score_converter = post_processing_builder.build(post_processing_config) - self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale') - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/preprocessor_builder.py b/object_detection/builders/preprocessor_builder.py deleted file mode 100644 index 9263925e..00000000 --- a/object_detection/builders/preprocessor_builder.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder for preprocessing steps.""" - -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.protos import preprocessor_pb2 - - -def _get_step_config_from_proto(preprocessor_step_config, step_name): - """Returns the value of a field named step_name from proto. - - Args: - preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object. - step_name: Name of the field to get value from. - - Returns: - result_dict: a sub proto message from preprocessor_step_config which will be - later converted to a dictionary. - - Raises: - ValueError: If field does not exist in proto. - """ - for field, value in preprocessor_step_config.ListFields(): - if field.name == step_name: - return value - - raise ValueError('Could not get field %s from proto!', step_name) - - -def _get_dict_from_proto(config): - """Helper function to put all proto fields into a dictionary. - - For many preprocessing steps, there's an trivial 1-1 mapping from proto fields - to function arguments. This function automatically populates a dictionary with - the arguments from the proto. - - Protos that CANNOT be trivially populated include: - * nested messages. - * steps that check if an optional field is set (ie. where None != 0). - * protos that don't map 1-1 to arguments (ie. list should be reshaped). - * fields requiring additional validation (ie. repeated field has n elements). - - Args: - config: A protobuf object that does not violate the conditions above. - - Returns: - result_dict: |config| converted into a python dictionary. - """ - result_dict = {} - for field, value in config.ListFields(): - result_dict[field.name] = value - return result_dict - - -# A map from a PreprocessingStep proto config field name to the preprocessing -# function that should be used. The PreprocessingStep proto should be parsable -# with _get_dict_from_proto. -PREPROCESSING_FUNCTION_MAP = { - 'normalize_image': preprocessor.normalize_image, - 'random_pixel_value_scale': preprocessor.random_pixel_value_scale, - 'random_image_scale': preprocessor.random_image_scale, - 'random_rgb_to_gray': preprocessor.random_rgb_to_gray, - 'random_adjust_brightness': preprocessor.random_adjust_brightness, - 'random_adjust_contrast': preprocessor.random_adjust_contrast, - 'random_adjust_hue': preprocessor.random_adjust_hue, - 'random_adjust_saturation': preprocessor.random_adjust_saturation, - 'random_distort_color': preprocessor.random_distort_color, - 'random_jitter_boxes': preprocessor.random_jitter_boxes, - 'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio, - 'random_black_patches': preprocessor.random_black_patches, - 'scale_boxes_to_pixel_coordinates': ( - preprocessor.scale_boxes_to_pixel_coordinates), - 'subtract_channel_mean': preprocessor.subtract_channel_mean, -} - - -# A map to convert from preprocessor_pb2.ResizeImage.Method enum to -# tf.image.ResizeMethod. -RESIZE_METHOD_MAP = { - preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA, - preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC, - preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR, - preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: ( - tf.image.ResizeMethod.NEAREST_NEIGHBOR), -} - - -def build(preprocessor_step_config): - """Builds preprocessing step based on the configuration. - - Args: - preprocessor_step_config: PreprocessingStep configuration proto. - - Returns: - function, argmap: A callable function and an argument map to call function - with. - - Raises: - ValueError: On invalid configuration. - """ - step_type = preprocessor_step_config.WhichOneof('preprocessing_step') - - if step_type in PREPROCESSING_FUNCTION_MAP: - preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type] - step_config = _get_step_config_from_proto(preprocessor_step_config, - step_type) - function_args = _get_dict_from_proto(step_config) - return (preprocessing_function, function_args) - - if step_type == 'random_horizontal_flip': - config = preprocessor_step_config.random_horizontal_flip - return (preprocessor.random_horizontal_flip, - { - 'keypoint_flip_permutation': tuple( - config.keypoint_flip_permutation), - }) - - if step_type == 'random_vertical_flip': - config = preprocessor_step_config.random_vertical_flip - return (preprocessor.random_vertical_flip, - { - 'keypoint_flip_permutation': tuple( - config.keypoint_flip_permutation), - }) - - if step_type == 'random_rotation90': - return (preprocessor.random_rotation90, {}) - - if step_type == 'random_crop_image': - config = preprocessor_step_config.random_crop_image - return (preprocessor.random_crop_image, - { - 'min_object_covered': config.min_object_covered, - 'aspect_ratio_range': (config.min_aspect_ratio, - config.max_aspect_ratio), - 'area_range': (config.min_area, config.max_area), - 'overlap_thresh': config.overlap_thresh, - 'random_coef': config.random_coef, - }) - - if step_type == 'random_pad_image': - config = preprocessor_step_config.random_pad_image - min_image_size = None - if (config.HasField('min_image_height') != - config.HasField('min_image_width')): - raise ValueError('min_image_height and min_image_width should be either ' - 'both set or both unset.') - if config.HasField('min_image_height'): - min_image_size = (config.min_image_height, config.min_image_width) - - max_image_size = None - if (config.HasField('max_image_height') != - config.HasField('max_image_width')): - raise ValueError('max_image_height and max_image_width should be either ' - 'both set or both unset.') - if config.HasField('max_image_height'): - max_image_size = (config.max_image_height, config.max_image_width) - - pad_color = config.pad_color - if pad_color and len(pad_color) != 3: - raise ValueError('pad_color should have 3 elements (RGB) if set!') - if not pad_color: - pad_color = None - return (preprocessor.random_pad_image, - { - 'min_image_size': min_image_size, - 'max_image_size': max_image_size, - 'pad_color': pad_color, - }) - - if step_type == 'random_crop_pad_image': - config = preprocessor_step_config.random_crop_pad_image - min_padded_size_ratio = config.min_padded_size_ratio - if min_padded_size_ratio and len(min_padded_size_ratio) != 2: - raise ValueError('min_padded_size_ratio should have 2 elements if set!') - max_padded_size_ratio = config.max_padded_size_ratio - if max_padded_size_ratio and len(max_padded_size_ratio) != 2: - raise ValueError('max_padded_size_ratio should have 2 elements if set!') - pad_color = config.pad_color - if pad_color and len(pad_color) != 3: - raise ValueError('pad_color should have 3 elements if set!') - kwargs = { - 'min_object_covered': config.min_object_covered, - 'aspect_ratio_range': (config.min_aspect_ratio, - config.max_aspect_ratio), - 'area_range': (config.min_area, config.max_area), - 'overlap_thresh': config.overlap_thresh, - 'random_coef': config.random_coef, - } - if min_padded_size_ratio: - kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio) - if max_padded_size_ratio: - kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio) - if pad_color: - kwargs['pad_color'] = tuple(pad_color) - return (preprocessor.random_crop_pad_image, kwargs) - - if step_type == 'random_resize_method': - config = preprocessor_step_config.random_resize_method - return (preprocessor.random_resize_method, - { - 'target_size': [config.target_height, config.target_width], - }) - - if step_type == 'resize_image': - config = preprocessor_step_config.resize_image - method = RESIZE_METHOD_MAP[config.method] - return (preprocessor.resize_image, - { - 'new_height': config.new_height, - 'new_width': config.new_width, - 'method': method - }) - - if step_type == 'ssd_random_crop': - config = preprocessor_step_config.ssd_random_crop - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - return (preprocessor.ssd_random_crop, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio_range': aspect_ratio_range, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - }) - return (preprocessor.ssd_random_crop, {}) - - if step_type == 'ssd_random_crop_pad': - config = preprocessor_step_config.ssd_random_crop_pad - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - min_padded_size_ratio = [ - (op.min_padded_size_ratio[0], op.min_padded_size_ratio[1]) - for op in config.operations] - max_padded_size_ratio = [ - (op.max_padded_size_ratio[0], op.max_padded_size_ratio[1]) - for op in config.operations] - pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b) - for op in config.operations] - return (preprocessor.ssd_random_crop_pad, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio_range': aspect_ratio_range, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - 'min_padded_size_ratio': min_padded_size_ratio, - 'max_padded_size_ratio': max_padded_size_ratio, - 'pad_color': pad_color, - }) - return (preprocessor.ssd_random_crop_pad, {}) - - if step_type == 'ssd_random_crop_fixed_aspect_ratio': - config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - return (preprocessor.ssd_random_crop_fixed_aspect_ratio, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio': config.aspect_ratio, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - }) - return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {}) - - if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio': - config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio - if config.operations: - min_object_covered = [op.min_object_covered for op in config.operations] - aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) - for op in config.operations] - area_range = [(op.min_area, op.max_area) for op in config.operations] - overlap_thresh = [op.overlap_thresh for op in config.operations] - random_coef = [op.random_coef for op in config.operations] - min_padded_size_ratio = [ - (op.min_padded_size_ratio[0], op.min_padded_size_ratio[1]) - for op in config.operations] - max_padded_size_ratio = [ - (op.max_padded_size_ratio[0], op.max_padded_size_ratio[1]) - for op in config.operations] - return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, - { - 'min_object_covered': min_object_covered, - 'aspect_ratio': config.aspect_ratio, - 'aspect_ratio_range': aspect_ratio_range, - 'area_range': area_range, - 'overlap_thresh': overlap_thresh, - 'random_coef': random_coef, - 'min_padded_size_ratio': min_padded_size_ratio, - 'max_padded_size_ratio': max_padded_size_ratio, - }) - return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, {}) - - raise ValueError('Unknown preprocessing step.') diff --git a/object_detection/builders/preprocessor_builder_test.py b/object_detection/builders/preprocessor_builder_test.py deleted file mode 100644 index cc2789aa..00000000 --- a/object_detection/builders/preprocessor_builder_test.py +++ /dev/null @@ -1,558 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for preprocessor_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.builders import preprocessor_builder -from object_detection.core import preprocessor -from object_detection.protos import preprocessor_pb2 - - -class PreprocessorBuilderTest(tf.test.TestCase): - - def assert_dictionary_close(self, dict1, dict2): - """Helper to check if two dicts with floatst or integers are close.""" - self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys())) - for key in dict1: - value = dict1[key] - if isinstance(value, float): - self.assertAlmostEqual(value, dict2[key]) - else: - self.assertEqual(value, dict2[key]) - - def test_build_normalize_image(self): - preprocessor_text_proto = """ - normalize_image { - original_minval: 0.0 - original_maxval: 255.0 - target_minval: -1.0 - target_maxval: 1.0 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.normalize_image) - self.assertEqual(args, { - 'original_minval': 0.0, - 'original_maxval': 255.0, - 'target_minval': -1.0, - 'target_maxval': 1.0, - }) - - def test_build_random_horizontal_flip(self): - preprocessor_text_proto = """ - random_horizontal_flip { - keypoint_flip_permutation: 1 - keypoint_flip_permutation: 0 - keypoint_flip_permutation: 2 - keypoint_flip_permutation: 3 - keypoint_flip_permutation: 5 - keypoint_flip_permutation: 4 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_horizontal_flip) - self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) - - def test_build_random_vertical_flip(self): - preprocessor_text_proto = """ - random_vertical_flip { - keypoint_flip_permutation: 1 - keypoint_flip_permutation: 0 - keypoint_flip_permutation: 2 - keypoint_flip_permutation: 3 - keypoint_flip_permutation: 5 - keypoint_flip_permutation: 4 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_vertical_flip) - self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)}) - - def test_build_random_rotation90(self): - preprocessor_text_proto = """ - random_rotation90 {} - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_rotation90) - self.assertEqual(args, {}) - - def test_build_random_pixel_value_scale(self): - preprocessor_text_proto = """ - random_pixel_value_scale { - minval: 0.8 - maxval: 1.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_pixel_value_scale) - self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2}) - - def test_build_random_image_scale(self): - preprocessor_text_proto = """ - random_image_scale { - min_scale_ratio: 0.8 - max_scale_ratio: 2.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_image_scale) - self.assert_dictionary_close(args, {'min_scale_ratio': 0.8, - 'max_scale_ratio': 2.2}) - - def test_build_random_rgb_to_gray(self): - preprocessor_text_proto = """ - random_rgb_to_gray { - probability: 0.8 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_rgb_to_gray) - self.assert_dictionary_close(args, {'probability': 0.8}) - - def test_build_random_adjust_brightness(self): - preprocessor_text_proto = """ - random_adjust_brightness { - max_delta: 0.2 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_brightness) - self.assert_dictionary_close(args, {'max_delta': 0.2}) - - def test_build_random_adjust_contrast(self): - preprocessor_text_proto = """ - random_adjust_contrast { - min_delta: 0.7 - max_delta: 1.1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_contrast) - self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1}) - - def test_build_random_adjust_hue(self): - preprocessor_text_proto = """ - random_adjust_hue { - max_delta: 0.01 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_hue) - self.assert_dictionary_close(args, {'max_delta': 0.01}) - - def test_build_random_adjust_saturation(self): - preprocessor_text_proto = """ - random_adjust_saturation { - min_delta: 0.75 - max_delta: 1.15 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_adjust_saturation) - self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15}) - - def test_build_random_distort_color(self): - preprocessor_text_proto = """ - random_distort_color { - color_ordering: 1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_distort_color) - self.assertEqual(args, {'color_ordering': 1}) - - def test_build_random_jitter_boxes(self): - preprocessor_text_proto = """ - random_jitter_boxes { - ratio: 0.1 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_jitter_boxes) - self.assert_dictionary_close(args, {'ratio': 0.1}) - - def test_build_random_crop_image(self): - preprocessor_text_proto = """ - random_crop_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - }) - - def test_build_random_pad_image(self): - preprocessor_text_proto = """ - random_pad_image { - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_pad_image) - self.assertEqual(args, { - 'min_image_size': None, - 'max_image_size': None, - 'pad_color': None, - }) - - def test_build_random_crop_pad_image(self): - preprocessor_text_proto = """ - random_crop_pad_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_pad_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - }) - - def test_build_random_crop_pad_image_with_optional_parameters(self): - preprocessor_text_proto = """ - random_crop_pad_image { - min_object_covered: 0.75 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.25 - max_area: 0.875 - overlap_thresh: 0.5 - random_coef: 0.125 - min_padded_size_ratio: 0.5 - min_padded_size_ratio: 0.75 - max_padded_size_ratio: 0.5 - max_padded_size_ratio: 0.75 - pad_color: 0.5 - pad_color: 0.5 - pad_color: 1.0 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_pad_image) - self.assertEqual(args, { - 'min_object_covered': 0.75, - 'aspect_ratio_range': (0.75, 1.5), - 'area_range': (0.25, 0.875), - 'overlap_thresh': 0.5, - 'random_coef': 0.125, - 'min_padded_size_ratio': (0.5, 0.75), - 'max_padded_size_ratio': (0.5, 0.75), - 'pad_color': (0.5, 0.5, 1.0) - }) - - def test_build_random_crop_to_aspect_ratio(self): - preprocessor_text_proto = """ - random_crop_to_aspect_ratio { - aspect_ratio: 0.85 - overlap_thresh: 0.35 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio) - self.assert_dictionary_close(args, {'aspect_ratio': 0.85, - 'overlap_thresh': 0.35}) - - def test_build_random_black_patches(self): - preprocessor_text_proto = """ - random_black_patches { - max_black_patches: 20 - probability: 0.95 - size_to_image_ratio: 0.12 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_black_patches) - self.assert_dictionary_close(args, {'max_black_patches': 20, - 'probability': 0.95, - 'size_to_image_ratio': 0.12}) - - def test_build_random_resize_method(self): - preprocessor_text_proto = """ - random_resize_method { - target_height: 75 - target_width: 100 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.random_resize_method) - self.assert_dictionary_close(args, {'target_size': [75, 100]}) - - def test_build_scale_boxes_to_pixel_coordinates(self): - preprocessor_text_proto = """ - scale_boxes_to_pixel_coordinates {} - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates) - self.assertEqual(args, {}) - - def test_build_resize_image(self): - preprocessor_text_proto = """ - resize_image { - new_height: 75 - new_width: 100 - method: BICUBIC - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.resize_image) - self.assertEqual(args, {'new_height': 75, - 'new_width': 100, - 'method': tf.image.ResizeMethod.BICUBIC}) - - def test_build_subtract_channel_mean(self): - preprocessor_text_proto = """ - subtract_channel_mean { - means: [1.0, 2.0, 3.0] - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.subtract_channel_mean) - self.assertEqual(args, {'means': [1.0, 2.0, 3.0]}) - - def test_build_ssd_random_crop(self): - preprocessor_text_proto = """ - ssd_random_crop { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - } - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375]}) - - def test_build_ssd_random_crop_empty_operations(self): - preprocessor_text_proto = """ - ssd_random_crop { - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop) - self.assertEqual(args, {}) - - def test_build_ssd_random_crop_pad(self): - preprocessor_text_proto = """ - ssd_random_crop_pad { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - pad_color_r: 0.5 - pad_color_g: 0.5 - pad_color_b: 0.5 - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - pad_color_r: 0.5 - pad_color_g: 0.5 - pad_color_b: 0.5 - } - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop_pad) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375], - 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)], - 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)], - 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]}) - - def test_build_ssd_random_crop_fixed_aspect_ratio(self): - preprocessor_text_proto = """ - ssd_random_crop_fixed_aspect_ratio { - operations { - min_object_covered: 0.0 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - } - operations { - min_object_covered: 0.25 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - } - aspect_ratio: 0.875 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio': 0.875, - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375]}) - - def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self): - preprocessor_text_proto = """ - ssd_random_crop_pad_fixed_aspect_ratio { - operations { - min_object_covered: 0.0 - min_aspect_ratio: 0.875 - max_aspect_ratio: 1.125 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.0 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - } - operations { - min_object_covered: 0.25 - min_aspect_ratio: 0.75 - max_aspect_ratio: 1.5 - min_area: 0.5 - max_area: 1.0 - overlap_thresh: 0.25 - random_coef: 0.375 - min_padded_size_ratio: [1.0, 1.0] - max_padded_size_ratio: [2.0, 2.0] - } - aspect_ratio: 0.875 - } - """ - preprocessor_proto = preprocessor_pb2.PreprocessingStep() - text_format.Merge(preprocessor_text_proto, preprocessor_proto) - function, args = preprocessor_builder.build(preprocessor_proto) - self.assertEqual(function, - preprocessor.ssd_random_crop_pad_fixed_aspect_ratio) - self.assertEqual(args, {'min_object_covered': [0.0, 0.25], - 'aspect_ratio': 0.875, - 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)], - 'area_range': [(0.5, 1.0), (0.5, 1.0)], - 'overlap_thresh': [0.0, 0.25], - 'random_coef': [0.375, 0.375], - 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)], - 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)]}) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/builders/region_similarity_calculator_builder.py b/object_detection/builders/region_similarity_calculator_builder.py deleted file mode 100644 index fa1d6717..00000000 --- a/object_detection/builders/region_similarity_calculator_builder.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Builder for region similarity calculators.""" - -from object_detection.core import region_similarity_calculator -from object_detection.protos import region_similarity_calculator_pb2 - - -def build(region_similarity_calculator_config): - """Builds region similarity calculator based on the configuration. - - Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See - core/region_similarity_calculator.proto for details. - - Args: - region_similarity_calculator_config: RegionSimilarityCalculator - configuration proto. - - Returns: - region_similarity_calculator: RegionSimilarityCalculator object. - - Raises: - ValueError: On unknown region similarity calculator. - """ - - if not isinstance( - region_similarity_calculator_config, - region_similarity_calculator_pb2.RegionSimilarityCalculator): - raise ValueError( - 'region_similarity_calculator_config not of type ' - 'region_similarity_calculator_pb2.RegionsSimilarityCalculator') - - similarity_calculator = region_similarity_calculator_config.WhichOneof( - 'region_similarity') - if similarity_calculator == 'iou_similarity': - return region_similarity_calculator.IouSimilarity() - if similarity_calculator == 'ioa_similarity': - return region_similarity_calculator.IoaSimilarity() - if similarity_calculator == 'neg_sq_dist_similarity': - return region_similarity_calculator.NegSqDistSimilarity() - - raise ValueError('Unknown region similarity calculator.') - diff --git a/object_detection/builders/region_similarity_calculator_builder_test.py b/object_detection/builders/region_similarity_calculator_builder_test.py deleted file mode 100644 index ca3a5512..00000000 --- a/object_detection/builders/region_similarity_calculator_builder_test.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for region_similarity_calculator_builder.""" - -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import region_similarity_calculator_builder -from object_detection.core import region_similarity_calculator -from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2 - - -class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase): - - def testBuildIoaSimilarityCalculator(self): - similarity_calc_text_proto = """ - ioa_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator.IoaSimilarity)) - - def testBuildIouSimilarityCalculator(self): - similarity_calc_text_proto = """ - iou_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator.IouSimilarity)) - - def testBuildNegSqDistSimilarityCalculator(self): - similarity_calc_text_proto = """ - neg_sq_dist_similarity { - } - """ - similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator() - text_format.Merge(similarity_calc_text_proto, similarity_calc_proto) - similarity_calc = region_similarity_calculator_builder.build( - similarity_calc_proto) - self.assertTrue(isinstance(similarity_calc, - region_similarity_calculator. - NegSqDistSimilarity)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/BUILD b/object_detection/core/BUILD deleted file mode 100644 index 5d8aaad7..00000000 --- a/object_detection/core/BUILD +++ /dev/null @@ -1,368 +0,0 @@ -# Tensorflow Object Detection API: Core. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) -# Apache 2.0 - -py_library( - name = "batcher", - srcs = ["batcher.py"], - deps = [ - ":prefetcher", - ":preprocessor", - ":standard_fields", - "//tensorflow", - ], -) - -py_test( - name = "batcher_test", - srcs = ["batcher_test.py"], - deps = [ - ":batcher", - "//tensorflow", - ], -) - -py_library( - name = "box_list", - srcs = [ - "box_list.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_test( - name = "box_list_test", - srcs = ["box_list_test.py"], - deps = [ - ":box_list", - ], -) - -py_library( - name = "box_list_ops", - srcs = [ - "box_list_ops.py", - ], - deps = [ - ":box_list", - "//tensorflow", - "//tensorflow_models/object_detection/utils:shape_utils", - ], -) - -py_test( - name = "box_list_ops_test", - srcs = ["box_list_ops_test.py"], - deps = [ - ":box_list", - ":box_list_ops", - ], -) - -py_library( - name = "box_coder", - srcs = [ - "box_coder.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_test( - name = "box_coder_test", - srcs = [ - "box_coder_test.py", - ], - deps = [ - ":box_coder", - ":box_list", - "//tensorflow", - ], -) - -py_library( - name = "keypoint_ops", - srcs = [ - "keypoint_ops.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_test( - name = "keypoint_ops_test", - srcs = ["keypoint_ops_test.py"], - deps = [ - ":keypoint_ops", - ], -) - -py_library( - name = "losses", - srcs = ["losses.py"], - deps = [ - ":box_list", - ":box_list_ops", - "//tensorflow", - "//tensorflow_models/object_detection/utils:ops", - ], -) - -py_library( - name = "matcher", - srcs = [ - "matcher.py", - ], - deps = [ - ], -) - -py_library( - name = "model", - srcs = ["model.py"], - deps = [ - ":standard_fields", - ], -) - -py_test( - name = "matcher_test", - srcs = [ - "matcher_test.py", - ], - deps = [ - ":matcher", - "//tensorflow", - ], -) - -py_library( - name = "prefetcher", - srcs = ["prefetcher.py"], - deps = ["//tensorflow"], -) - -py_library( - name = "preprocessor", - srcs = [ - "preprocessor.py", - ], - deps = [ - ":box_list", - ":box_list_ops", - ":keypoint_ops", - ":standard_fields", - "//tensorflow", - ], -) - -py_test( - name = "preprocessor_test", - srcs = [ - "preprocessor_test.py", - ], - deps = [ - ":preprocessor", - "//tensorflow", - ], -) - -py_test( - name = "losses_test", - srcs = ["losses_test.py"], - deps = [ - ":box_list", - ":losses", - ":matcher", - "//tensorflow", - ], -) - -py_test( - name = "prefetcher_test", - srcs = ["prefetcher_test.py"], - deps = [ - ":prefetcher", - "//tensorflow", - ], -) - -py_library( - name = "standard_fields", - srcs = [ - "standard_fields.py", - ], -) - -py_library( - name = "post_processing", - srcs = ["post_processing.py"], - deps = [ - ":box_list", - ":box_list_ops", - ":standard_fields", - "//tensorflow", - ], -) - -py_test( - name = "post_processing_test", - srcs = ["post_processing_test.py"], - deps = [ - ":box_list", - ":box_list_ops", - ":post_processing", - "//tensorflow", - ], -) - -py_library( - name = "target_assigner", - srcs = [ - "target_assigner.py", - ], - deps = [ - ":box_list", - ":box_list_ops", - ":matcher", - ":region_similarity_calculator", - "//tensorflow", - "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder", - "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder", - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/matchers:argmax_matcher", - "//tensorflow_models/object_detection/matchers:bipartite_matcher", - ], -) - -py_test( - name = "target_assigner_test", - size = "large", - timeout = "long", - srcs = ["target_assigner_test.py"], - deps = [ - ":box_list", - ":region_similarity_calculator", - ":target_assigner", - "//tensorflow", - "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder", - "//tensorflow_models/object_detection/matchers:bipartite_matcher", - ], -) - -py_library( - name = "data_decoder", - srcs = ["data_decoder.py"], -) - -py_library( - name = "data_parser", - srcs = ["data_parser.py"], -) - -py_library( - name = "box_predictor", - srcs = ["box_predictor.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/object_detection/utils:shape_utils", - "//tensorflow_models/object_detection/utils:static_shape", - ], -) - -py_test( - name = "box_predictor_test", - srcs = ["box_predictor_test.py"], - deps = [ - ":box_predictor", - "//tensorflow", - "//tensorflow_models/object_detection/builders:hyperparams_builder", - "//tensorflow_models/object_detection/protos:hyperparams_py_pb2", - ], -) - -py_library( - name = "region_similarity_calculator", - srcs = [ - "region_similarity_calculator.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list_ops", - ], -) - -py_test( - name = "region_similarity_calculator_test", - srcs = [ - "region_similarity_calculator_test.py", - ], - deps = [ - ":region_similarity_calculator", - "//tensorflow_models/object_detection/core:box_list", - ], -) - -py_library( - name = "anchor_generator", - srcs = [ - "anchor_generator.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_library( - name = "minibatch_sampler", - srcs = [ - "minibatch_sampler.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/utils:ops", - ], -) - -py_test( - name = "minibatch_sampler_test", - srcs = [ - "minibatch_sampler_test.py", - ], - deps = [ - ":minibatch_sampler", - "//tensorflow", - ], -) - -py_library( - name = "balanced_positive_negative_sampler", - srcs = [ - "balanced_positive_negative_sampler.py", - ], - deps = [ - ":minibatch_sampler", - "//tensorflow", - ], -) - -py_test( - name = "balanced_positive_negative_sampler_test", - srcs = [ - "balanced_positive_negative_sampler_test.py", - ], - deps = [ - ":balanced_positive_negative_sampler", - "//tensorflow", - ], -) diff --git a/object_detection/core/__init__.py b/object_detection/core/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/core/__pycache__/__init__.cpython-35.pyc b/object_detection/core/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index a42c6d32..00000000 Binary files a/object_detection/core/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc b/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc deleted file mode 100644 index c4e9bddb..00000000 Binary files a/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc b/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc deleted file mode 100644 index a259a66b..00000000 Binary files a/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/box_coder.cpython-35.pyc b/object_detection/core/__pycache__/box_coder.cpython-35.pyc deleted file mode 100644 index f18d49fb..00000000 Binary files a/object_detection/core/__pycache__/box_coder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/box_list.cpython-35.pyc b/object_detection/core/__pycache__/box_list.cpython-35.pyc deleted file mode 100644 index f1cb18db..00000000 Binary files a/object_detection/core/__pycache__/box_list.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc b/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc deleted file mode 100644 index 5a106696..00000000 Binary files a/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/box_predictor.cpython-35.pyc b/object_detection/core/__pycache__/box_predictor.cpython-35.pyc deleted file mode 100644 index e419bbbf..00000000 Binary files a/object_detection/core/__pycache__/box_predictor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/data_decoder.cpython-35.pyc b/object_detection/core/__pycache__/data_decoder.cpython-35.pyc deleted file mode 100644 index 65b7de17..00000000 Binary files a/object_detection/core/__pycache__/data_decoder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc b/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc deleted file mode 100644 index 7ca49b1a..00000000 Binary files a/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/losses.cpython-35.pyc b/object_detection/core/__pycache__/losses.cpython-35.pyc deleted file mode 100644 index 4ffe5697..00000000 Binary files a/object_detection/core/__pycache__/losses.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/matcher.cpython-35.pyc b/object_detection/core/__pycache__/matcher.cpython-35.pyc deleted file mode 100644 index 0f331fe1..00000000 Binary files a/object_detection/core/__pycache__/matcher.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc b/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc deleted file mode 100644 index 7fcdb236..00000000 Binary files a/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/model.cpython-35.pyc b/object_detection/core/__pycache__/model.cpython-35.pyc deleted file mode 100644 index f162341c..00000000 Binary files a/object_detection/core/__pycache__/model.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/post_processing.cpython-35.pyc b/object_detection/core/__pycache__/post_processing.cpython-35.pyc deleted file mode 100644 index 6192014d..00000000 Binary files a/object_detection/core/__pycache__/post_processing.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/preprocessor.cpython-35.pyc b/object_detection/core/__pycache__/preprocessor.cpython-35.pyc deleted file mode 100644 index e0b16b7f..00000000 Binary files a/object_detection/core/__pycache__/preprocessor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc b/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc deleted file mode 100644 index 953ac19d..00000000 Binary files a/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/standard_fields.cpython-35.pyc b/object_detection/core/__pycache__/standard_fields.cpython-35.pyc deleted file mode 100644 index 31f74c6d..00000000 Binary files a/object_detection/core/__pycache__/standard_fields.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/__pycache__/target_assigner.cpython-35.pyc b/object_detection/core/__pycache__/target_assigner.cpython-35.pyc deleted file mode 100644 index 039d78c4..00000000 Binary files a/object_detection/core/__pycache__/target_assigner.cpython-35.pyc and /dev/null differ diff --git a/object_detection/core/anchor_generator.py b/object_detection/core/anchor_generator.py deleted file mode 100644 index ed6a2bc5..00000000 --- a/object_detection/core/anchor_generator.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base anchor generator. - -The job of the anchor generator is to create (or load) a collection -of bounding boxes to be used as anchors. - -Generated anchors are assumed to match some convolutional grid or list of grid -shapes. For example, we might want to generate anchors matching an 8x8 -feature map and a 4x4 feature map. If we place 3 anchors per grid location -on the first feature map and 6 anchors per grid location on the second feature -map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total. - -To support fully convolutional settings, feature map shapes are passed -dynamically at generation time. The number of anchors to place at each location -is static --- implementations of AnchorGenerator must always be able return -the number of anchors that it uses per location for each feature map. -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - - -class AnchorGenerator(object): - """Abstract base class for anchor generators.""" - __metaclass__ = ABCMeta - - @abstractmethod - def name_scope(self): - """Name scope. - - Must be defined by implementations. - - Returns: - a string representing the name scope of the anchor generation operation. - """ - pass - - @property - def check_num_anchors(self): - """Whether to dynamically check the number of anchors generated. - - Can be overridden by implementations that would like to disable this - behavior. - - Returns: - a boolean controlling whether the Generate function should dynamically - check the number of anchors generated against the mathematically - expected number of anchors. - """ - return True - - @abstractmethod - def num_anchors_per_location(self): - """Returns the number of anchors per spatial location. - - Returns: - a list of integers, one for each expected feature map to be passed to - the `generate` function. - """ - pass - - def generate(self, feature_map_shape_list, **params): - """Generates a collection of bounding boxes to be used as anchors. - - TODO: remove **params from argument list and make stride and offsets (for - multiple_grid_anchor_generator) constructor arguments. - - Args: - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. Pairs can be provided as 1-dimensional - integer tensors of length 2 or simply as tuples of integers. - **params: parameters for anchor generation op - - Returns: - boxes: a BoxList holding a collection of N anchor boxes - Raises: - ValueError: if the number of feature map shapes does not match the length - of NumAnchorsPerLocation. - """ - if self.check_num_anchors and ( - len(feature_map_shape_list) != len(self.num_anchors_per_location())): - raise ValueError('Number of feature maps is expected to equal the length ' - 'of `num_anchors_per_location`.') - with tf.name_scope(self.name_scope()): - anchors = self._generate(feature_map_shape_list, **params) - if self.check_num_anchors: - with tf.control_dependencies([ - self._assert_correct_number_of_anchors( - anchors, feature_map_shape_list)]): - anchors.set(tf.identity(anchors.get())) - return anchors - - @abstractmethod - def _generate(self, feature_map_shape_list, **params): - """To be overridden by implementations. - - Args: - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. - **params: parameters for anchor generation op - - Returns: - boxes: a BoxList holding a collection of N anchor boxes - """ - pass - - def _assert_correct_number_of_anchors(self, anchors, feature_map_shape_list): - """Assert that correct number of anchors was generated. - - Args: - anchors: box_list.BoxList object holding anchors generated - feature_map_shape_list: list of (height, width) pairs in the format - [(height_0, width_0), (height_1, width_1), ...] that the generated - anchors must align with. - Returns: - Op that raises InvalidArgumentError if the number of anchors does not - match the number of expected anchors. - """ - expected_num_anchors = 0 - for num_anchors_per_location, feature_map_shape in zip( - self.num_anchors_per_location(), feature_map_shape_list): - expected_num_anchors += (num_anchors_per_location - * feature_map_shape[0] - * feature_map_shape[1]) - return tf.assert_equal(expected_num_anchors, anchors.num_boxes()) diff --git a/object_detection/core/balanced_positive_negative_sampler.py b/object_detection/core/balanced_positive_negative_sampler.py deleted file mode 100644 index 68844c4f..00000000 --- a/object_detection/core/balanced_positive_negative_sampler.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Class to subsample minibatches by balancing positives and negatives. - -Subsamples minibatches based on a pre-specified positive fraction in range -[0,1]. The class presumes there are many more negatives than positive examples: -if the desired batch_size cannot be achieved with the pre-specified positive -fraction, it fills the rest with negative examples. If this is not sufficient -for obtaining the desired batch_size, it returns fewer examples. - -The main function to call is Subsample(self, indicator, labels). For convenience -one can also call SubsampleWeights(self, weights, labels) which is defined in -the minibatch_sampler base class. -""" - -import tensorflow as tf - -from object_detection.core import minibatch_sampler - - -class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): - """Subsamples minibatches to a desired balance of positives and negatives.""" - - def __init__(self, positive_fraction=0.5): - """Constructs a minibatch sampler. - - Args: - positive_fraction: desired fraction of positive examples (scalar in [0,1]) - - Raises: - ValueError: if positive_fraction < 0, or positive_fraction > 1 - """ - if positive_fraction < 0 or positive_fraction > 1: - raise ValueError('positive_fraction should be in range [0,1]. ' - 'Received: %s.' % positive_fraction) - self._positive_fraction = positive_fraction - - def subsample(self, indicator, batch_size, labels): - """Returns subsampled minibatch. - - Args: - indicator: boolean tensor of shape [N] whose True entries can be sampled. - batch_size: desired batch size. - labels: boolean tensor of shape [N] denoting positive(=True) and negative - (=False) examples. - - Returns: - is_sampled: boolean tensor of shape [N], True for entries which are - sampled. - - Raises: - ValueError: if labels and indicator are not 1D boolean tensors. - """ - if len(indicator.get_shape().as_list()) != 1: - raise ValueError('indicator must be 1 dimensional, got a tensor of ' - 'shape %s' % indicator.get_shape()) - if len(labels.get_shape().as_list()) != 1: - raise ValueError('labels must be 1 dimensional, got a tensor of ' - 'shape %s' % labels.get_shape()) - if labels.dtype != tf.bool: - raise ValueError('labels should be of type bool. Received: %s' % - labels.dtype) - if indicator.dtype != tf.bool: - raise ValueError('indicator should be of type bool. Received: %s' % - indicator.dtype) - - # Only sample from indicated samples - negative_idx = tf.logical_not(labels) - positive_idx = tf.logical_and(labels, indicator) - negative_idx = tf.logical_and(negative_idx, indicator) - - # Sample positive and negative samples separately - max_num_pos = int(self._positive_fraction * batch_size) - sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) - max_num_neg = batch_size - tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) - sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) - - sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx) - return sampled_idx diff --git a/object_detection/core/balanced_positive_negative_sampler_test.py b/object_detection/core/balanced_positive_negative_sampler_test.py deleted file mode 100644 index 23991cf5..00000000 --- a/object_detection/core/balanced_positive_negative_sampler_test.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.balanced_positive_negative_sampler.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import balanced_positive_negative_sampler - - -class BalancedPositiveNegativeSamplerTest(tf.test.TestCase): - - def test_subsample_all_examples(self): - numpy_labels = np.random.permutation(300) - indicator = tf.constant(np.ones(300) == 1) - numpy_labels = (numpy_labels - 200) > 0 - - labels = tf.constant(numpy_labels) - - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - is_sampled = sampler.subsample(indicator, 64, labels) - with self.test_session() as sess: - is_sampled = sess.run(is_sampled) - self.assertTrue(sum(is_sampled) == 64) - self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32) - self.assertTrue(sum(np.logical_and( - np.logical_not(numpy_labels), is_sampled)) == 32) - - def test_subsample_selection(self): - # Test random sampling when only some examples can be sampled: - # 100 samples, 20 positives, 10 positives cannot be sampled - numpy_labels = np.arange(100) - numpy_indicator = numpy_labels < 90 - indicator = tf.constant(numpy_indicator) - numpy_labels = (numpy_labels - 80) >= 0 - - labels = tf.constant(numpy_labels) - - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - is_sampled = sampler.subsample(indicator, 64, labels) - with self.test_session() as sess: - is_sampled = sess.run(is_sampled) - self.assertTrue(sum(is_sampled) == 64) - self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10) - self.assertTrue(sum(np.logical_and( - np.logical_not(numpy_labels), is_sampled)) == 54) - self.assertAllEqual(is_sampled, np.logical_and(is_sampled, - numpy_indicator)) - - def test_raises_error_with_incorrect_label_shape(self): - labels = tf.constant([[True, False, False]]) - indicator = tf.constant([True, False, True]) - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - with self.assertRaises(ValueError): - sampler.subsample(indicator, 64, labels) - - def test_raises_error_with_incorrect_indicator_shape(self): - labels = tf.constant([True, False, False]) - indicator = tf.constant([[True, False, True]]) - sampler = (balanced_positive_negative_sampler. - BalancedPositiveNegativeSampler()) - with self.assertRaises(ValueError): - sampler.subsample(indicator, 64, labels) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/batcher.py b/object_detection/core/batcher.py deleted file mode 100644 index c5dfb712..00000000 --- a/object_detection/core/batcher.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides functions to batch a dictionary of input tensors.""" -import collections - -import tensorflow as tf - -from object_detection.core import prefetcher - -rt_shape_str = '_runtime_shapes' - - -class BatchQueue(object): - """BatchQueue class. - - This class creates a batch queue to asynchronously enqueue tensors_dict. - It also adds a FIFO prefetcher so that the batches are readily available - for the consumers. Dequeue ops for a BatchQueue object can be created via - the Dequeue method which evaluates to a batch of tensor_dict. - - Example input pipeline with batching: - ------------------------------------ - key, string_tensor = slim.parallel_reader.parallel_read(...) - tensor_dict = decoder.decode(string_tensor) - tensor_dict = preprocessor.preprocess(tensor_dict, ...) - batch_queue = batcher.BatchQueue(tensor_dict, - batch_size=32, - batch_queue_capacity=2000, - num_batch_queue_threads=8, - prefetch_queue_capacity=20) - tensor_dict = batch_queue.dequeue() - outputs = Model(tensor_dict) - ... - ----------------------------------- - - Notes: - ----- - This class batches tensors of unequal sizes by zero padding and unpadding - them after generating a batch. This can be computationally expensive when - batching tensors (such as images) that are of vastly different sizes. So it is - recommended that the shapes of such tensors be fully defined in tensor_dict - while other lightweight tensors such as bounding box corners and class labels - can be of varying sizes. Use either crop or resize operations to fully define - the shape of an image in tensor_dict. - - It is also recommended to perform any preprocessing operations on tensors - before passing to BatchQueue and subsequently calling the Dequeue method. - - Another caveat is that this class does not read the last batch if it is not - full. The current implementation makes it hard to support that use case. So, - for evaluation, when it is critical to run all the examples through your - network use the input pipeline example mentioned in core/prefetcher.py. - """ - - def __init__(self, tensor_dict, batch_size, batch_queue_capacity, - num_batch_queue_threads, prefetch_queue_capacity): - """Constructs a batch queue holding tensor_dict. - - Args: - tensor_dict: dictionary of tensors to batch. - batch_size: batch size. - batch_queue_capacity: max capacity of the queue from which the tensors are - batched. - num_batch_queue_threads: number of threads to use for batching. - prefetch_queue_capacity: max capacity of the queue used to prefetch - assembled batches. - """ - # Remember static shapes to set shapes of batched tensors. - static_shapes = collections.OrderedDict( - {key: tensor.get_shape() for key, tensor in tensor_dict.items()}) - # Remember runtime shapes to unpad tensors after batching. - runtime_shapes = collections.OrderedDict( - {(key + rt_shape_str): tf.shape(tensor) - for key, tensor in tensor_dict.items()}) - - all_tensors = tensor_dict - all_tensors.update(runtime_shapes) - batched_tensors = tf.train.batch( - all_tensors, - capacity=batch_queue_capacity, - batch_size=batch_size, - dynamic_pad=True, - num_threads=num_batch_queue_threads) - - self._queue = prefetcher.prefetch(batched_tensors, - prefetch_queue_capacity) - self._static_shapes = static_shapes - self._batch_size = batch_size - - def dequeue(self): - """Dequeues a batch of tensor_dict from the BatchQueue. - - TODO: use allow_smaller_final_batch to allow running over the whole eval set - - Returns: - A list of tensor_dicts of the requested batch_size. - """ - batched_tensors = self._queue.dequeue() - # Separate input tensors from tensors containing their runtime shapes. - tensors = {} - shapes = {} - for key, batched_tensor in batched_tensors.items(): - unbatched_tensor_list = tf.unstack(batched_tensor) - for i, unbatched_tensor in enumerate(unbatched_tensor_list): - if rt_shape_str in key: - shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor - else: - tensors[(key, i)] = unbatched_tensor - - # Undo that padding using shapes and create a list of size `batch_size` that - # contains tensor dictionaries. - tensor_dict_list = [] - batch_size = self._batch_size - for batch_id in range(batch_size): - tensor_dict = {} - for key in self._static_shapes: - tensor_dict[key] = tf.slice(tensors[(key, batch_id)], - tf.zeros_like(shapes[(key, batch_id)]), - shapes[(key, batch_id)]) - tensor_dict[key].set_shape(self._static_shapes[key]) - tensor_dict_list.append(tensor_dict) - - return tensor_dict_list diff --git a/object_detection/core/batcher_test.py b/object_detection/core/batcher_test.py deleted file mode 100644 index 61b4390b..00000000 --- a/object_detection/core/batcher_test.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.batcher.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import batcher - -slim = tf.contrib.slim - - -class BatcherTest(tf.test.TestCase): - - def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - boxes = tf.tile( - tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)])) - batch_queue = batcher.BatchQueue( - tensor_dict={'boxes': boxes}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, 4], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions( - self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - image = tf.reshape( - tf.range(counter * counter), tf.stack([counter, counter])) - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, None], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self): - with self.test_session() as sess: - batch_size = 3 - num_batches = 2 - examples = tf.Variable(tf.constant(1, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 1) - image = tf.reshape(tf.range(1, 13), [4, 3]) * counter - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([4, 3], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 1 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - def test_batcher_when_batch_size_is_one(self): - with self.test_session() as sess: - batch_size = 1 - num_batches = 2 - examples = tf.Variable(tf.constant(2, dtype=tf.int32)) - counter = examples.count_up_to(num_batches * batch_size + 2) - image = tf.reshape( - tf.range(counter * counter), tf.stack([counter, counter])) - batch_queue = batcher.BatchQueue( - tensor_dict={'image': image}, - batch_size=batch_size, - batch_queue_capacity=100, - num_batch_queue_threads=1, - prefetch_queue_capacity=100) - batch = batch_queue.dequeue() - - for tensor_dict in batch: - for tensor in tensor_dict.values(): - self.assertAllEqual([None, None], tensor.get_shape().as_list()) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - i = 2 - for _ in range(num_batches): - batch_np = sess.run(batch) - for tensor_dict in batch_np: - for tensor in tensor_dict.values(): - self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i))) - i += 1 - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(batch) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/box_coder.py b/object_detection/core/box_coder.py deleted file mode 100644 index f20ac956..00000000 --- a/object_detection/core/box_coder.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base box coder. - -Box coders convert between coordinate frames, namely image-centric -(with (0,0) on the top left of image) and anchor-centric (with (0,0) being -defined by a specific anchor). - -Users of a BoxCoder can call two methods: - encode: which encodes a box with respect to a given anchor - (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and - decode: which inverts this encoding with a decode operation. -In both cases, the arguments are assumed to be in 1-1 correspondence already; -it is not the job of a BoxCoder to perform matching. -""" -from abc import ABCMeta -from abc import abstractmethod -from abc import abstractproperty - -import tensorflow as tf - - -# Box coder types. -FASTER_RCNN = 'faster_rcnn' -KEYPOINT = 'keypoint' -MEAN_STDDEV = 'mean_stddev' -SQUARE = 'square' - - -class BoxCoder(object): - """Abstract base class for box coder.""" - __metaclass__ = ABCMeta - - @abstractproperty - def code_size(self): - """Return the size of each code. - - This number is a constant and should agree with the output of the `encode` - op (e.g. if rel_codes is the output of self.encode(...), then it should have - shape [N, code_size()]). This abstractproperty should be overridden by - implementations. - - Returns: - an integer constant - """ - pass - - def encode(self, boxes, anchors): - """Encode a box list relative to an anchor collection. - - Args: - boxes: BoxList holding N boxes to be encoded - anchors: BoxList of N anchors - - Returns: - a tensor representing N relative-encoded boxes - """ - with tf.name_scope('Encode'): - return self._encode(boxes, anchors) - - def decode(self, rel_codes, anchors): - """Decode boxes that are encoded relative to an anchor collection. - - Args: - rel_codes: a tensor representing N relative-encoded boxes - anchors: BoxList of anchors - - Returns: - boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., - with corners y_min, x_min, y_max, x_max) - """ - with tf.name_scope('Decode'): - return self._decode(rel_codes, anchors) - - @abstractmethod - def _encode(self, boxes, anchors): - """Method to be overriden by implementations. - - Args: - boxes: BoxList holding N boxes to be encoded - anchors: BoxList of N anchors - - Returns: - a tensor representing N relative-encoded boxes - """ - pass - - @abstractmethod - def _decode(self, rel_codes, anchors): - """Method to be overriden by implementations. - - Args: - rel_codes: a tensor representing N relative-encoded boxes - anchors: BoxList of anchors - - Returns: - boxlist: BoxList holding N boxes encoded in the ordinary way (i.e., - with corners y_min, x_min, y_max, x_max) - """ - pass - - -def batch_decode(encoded_boxes, box_coder, anchors): - """Decode a batch of encoded boxes. - - This op takes a batch of encoded bounding boxes and transforms - them to a batch of bounding boxes specified by their corners in - the order of [y_min, x_min, y_max, x_max]. - - Args: - encoded_boxes: a float32 tensor of shape [batch_size, num_anchors, - code_size] representing the location of the objects. - box_coder: a BoxCoder object. - anchors: a BoxList of anchors used to encode `encoded_boxes`. - - Returns: - decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, - coder_size] representing the corners of the objects in the order - of [y_min, x_min, y_max, x_max]. - - Raises: - ValueError: if batch sizes of the inputs are inconsistent, or if - the number of anchors inferred from encoded_boxes and anchors are - inconsistent. - """ - encoded_boxes.get_shape().assert_has_rank(3) - if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static(): - raise ValueError('The number of anchors inferred from encoded_boxes' - ' and anchors are inconsistent: shape[1] of encoded_boxes' - ' %s should be equal to the number of anchors: %s.' % - (encoded_boxes.get_shape()[1].value, - anchors.num_boxes_static())) - - decoded_boxes = tf.stack([ - box_coder.decode(boxes, anchors).get() - for boxes in tf.unstack(encoded_boxes) - ]) - return decoded_boxes diff --git a/object_detection/core/box_coder_test.py b/object_detection/core/box_coder_test.py deleted file mode 100644 index c087a325..00000000 --- a/object_detection/core/box_coder_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_coder.""" - -import tensorflow as tf - -from object_detection.core import box_coder -from object_detection.core import box_list - - -class MockBoxCoder(box_coder.BoxCoder): - """Test BoxCoder that encodes/decodes using the multiply-by-two function.""" - - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - return 2.0 * boxes.get() - - def _decode(self, rel_codes, anchors): - return box_list.BoxList(rel_codes / 2.0) - - -class BoxCoderTest(tf.test.TestCase): - - def test_batch_decode(self): - mock_anchor_corners = tf.constant( - [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) - mock_anchors = box_list.BoxList(mock_anchor_corners) - mock_box_coder = MockBoxCoder() - - expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]], - [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]] - - encoded_boxes_list = [mock_box_coder.encode( - box_list.BoxList(tf.constant(boxes)), mock_anchors) - for boxes in expected_boxes] - encoded_boxes = tf.stack(encoded_boxes_list) - decoded_boxes = box_coder.batch_decode( - encoded_boxes, mock_box_coder, mock_anchors) - - with self.test_session() as sess: - decoded_boxes_result = sess.run(decoded_boxes) - self.assertAllClose(expected_boxes, decoded_boxes_result) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/box_list.py b/object_detection/core/box_list.py deleted file mode 100644 index c0196f05..00000000 --- a/object_detection/core/box_list.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bounding Box List definition. - -BoxList represents a list of bounding boxes as tensorflow -tensors, where each bounding box is represented as a row of 4 numbers, -[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes -within a given list correspond to a single image. See also -box_list_ops.py for common box related operations (such as area, iou, etc). - -Optionally, users can add additional related fields (such as weights). -We assume the following things to be true about fields: -* they correspond to boxes in the box_list along the 0th dimension -* they have inferrable rank at graph construction time -* all dimensions except for possibly the 0th can be inferred - (i.e., not None) at graph construction time. - -Some other notes: - * Following tensorflow conventions, we use height, width ordering, - and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering - * Tensors are always provided as (flat) [N, 4] tensors. -""" - -import tensorflow as tf - - -class BoxList(object): - """Box collection.""" - - def __init__(self, boxes): - """Constructs box collection. - - Args: - boxes: a tensor of shape [N, 4] representing box corners - - Raises: - ValueError: if invalid dimensions for bbox data or if bbox data is not in - float32 format. - """ - if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: - raise ValueError('Invalid dimensions for box data.') - if boxes.dtype != tf.float32: - raise ValueError('Invalid tensor type: should be tf.float32') - self.data = {'boxes': boxes} - - def num_boxes(self): - """Returns number of boxes held in collection. - - Returns: - a tensor representing the number of boxes held in the collection. - """ - return tf.shape(self.data['boxes'])[0] - - def num_boxes_static(self): - """Returns number of boxes held in collection. - - This number is inferred at graph construction time rather than run-time. - - Returns: - Number of boxes held in collection (integer) or None if this is not - inferrable at graph construction time. - """ - return self.data['boxes'].get_shape()[0].value - - def get_all_fields(self): - """Returns all fields.""" - return self.data.keys() - - def get_extra_fields(self): - """Returns all non-box fields (i.e., everything not named 'boxes').""" - return [k for k in self.data.keys() if k != 'boxes'] - - def add_field(self, field, field_data): - """Add field to box list. - - This method can be used to add related box data such as - weights/labels, etc. - - Args: - field: a string key to access the data via `get` - field_data: a tensor containing the data to store in the BoxList - """ - self.data[field] = field_data - - def has_field(self, field): - return field in self.data - - def get(self): - """Convenience function for accessing box coordinates. - - Returns: - a tensor with shape [N, 4] representing box coordinates. - """ - return self.get_field('boxes') - - def set(self, boxes): - """Convenience function for setting box coordinates. - - Args: - boxes: a tensor of shape [N, 4] representing box corners - - Raises: - ValueError: if invalid dimensions for bbox data - """ - if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4: - raise ValueError('Invalid dimensions for box data.') - self.data['boxes'] = boxes - - def get_field(self, field): - """Accesses a box collection and associated fields. - - This function returns specified field with object; if no field is specified, - it returns the box coordinates. - - Args: - field: this optional string parameter can be used to specify - a related field to be accessed. - - Returns: - a tensor representing the box collection or an associated field. - - Raises: - ValueError: if invalid field - """ - if not self.has_field(field): - raise ValueError('field ' + str(field) + ' does not exist') - return self.data[field] - - def set_field(self, field, value): - """Sets the value of a field. - - Updates the field of a box_list with a given value. - - Args: - field: (string) name of the field to set value. - value: the value to assign to the field. - - Raises: - ValueError: if the box_list does not have specified field. - """ - if not self.has_field(field): - raise ValueError('field %s does not exist' % field) - self.data[field] = value - - def get_center_coordinates_and_sizes(self, scope=None): - """Computes the center coordinates, height and width of the boxes. - - Args: - scope: name scope of the function. - - Returns: - a list of 4 1-D tensors [ycenter, xcenter, height, width]. - """ - with tf.name_scope(scope, 'get_center_coordinates_and_sizes'): - box_corners = self.get() - ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners)) - width = xmax - xmin - height = ymax - ymin - ycenter = ymin + height / 2. - xcenter = xmin + width / 2. - return [ycenter, xcenter, height, width] - - def transpose_coordinates(self, scope=None): - """Transpose the coordinate representation in a boxlist. - - Args: - scope: name scope of the function. - """ - with tf.name_scope(scope, 'transpose_coordinates'): - y_min, x_min, y_max, x_max = tf.split( - value=self.get(), num_or_size_splits=4, axis=1) - self.set(tf.concat([x_min, y_min, x_max, y_max], 1)) - - def as_tensor_dict(self, fields=None): - """Retrieves specified fields as a dictionary of tensors. - - Args: - fields: (optional) list of fields to return in the dictionary. - If None (default), all fields are returned. - - Returns: - tensor_dict: A dictionary of tensors specified by fields. - - Raises: - ValueError: if specified field is not contained in boxlist. - """ - tensor_dict = {} - if fields is None: - fields = self.get_all_fields() - for field in fields: - if not self.has_field(field): - raise ValueError('boxlist must contain all specified fields') - tensor_dict[field] = self.get_field(field) - return tensor_dict diff --git a/object_detection/core/box_list_ops.py b/object_detection/core/box_list_ops.py deleted file mode 100644 index c98048d5..00000000 --- a/object_detection/core/box_list_ops.py +++ /dev/null @@ -1,984 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bounding Box List operations. - -Example box operations that are supported: - * areas: compute bounding box areas - * iou: pairwise intersection-over-union scores - * sq_dist: pairwise distances between bounding boxes - -Whenever box_list_ops functions output a BoxList, the fields of the incoming -BoxList are retained unless documented otherwise. -""" -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.utils import shape_utils - - -class SortOrder(object): - """Enum class for sort order. - - Attributes: - ascend: ascend order. - descend: descend order. - """ - ascend = 1 - descend = 2 - - -def area(boxlist, scope=None): - """Computes area of boxes. - - Args: - boxlist: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing box areas. - """ - with tf.name_scope(scope, 'Area'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - return tf.squeeze((y_max - y_min) * (x_max - x_min), [1]) - - -def height_width(boxlist, scope=None): - """Computes height and width of boxes in boxlist. - - Args: - boxlist: BoxList holding N boxes - scope: name scope. - - Returns: - Height: A tensor with shape [N] representing box heights. - Width: A tensor with shape [N] representing box widths. - """ - with tf.name_scope(scope, 'HeightWidth'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1]) - - -def scale(boxlist, y_scale, x_scale, scope=None): - """scale box coordinates in x and y dimensions. - - Args: - boxlist: BoxList holding N boxes - y_scale: (float) scalar tensor - x_scale: (float) scalar tensor - scope: name scope. - - Returns: - boxlist: BoxList holding N boxes - """ - with tf.name_scope(scope, 'Scale'): - y_scale = tf.cast(y_scale, tf.float32) - x_scale = tf.cast(x_scale, tf.float32) - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - y_min = y_scale * y_min - y_max = y_scale * y_max - x_min = x_scale * x_min - x_max = x_scale * x_max - scaled_boxlist = box_list.BoxList( - tf.concat([y_min, x_min, y_max, x_max], 1)) - return _copy_extra_fields(scaled_boxlist, boxlist) - - -def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): - """Clip bounding boxes to a window. - - This op clips any input bounding boxes (represented by bounding box - corners) to a window, optionally filtering out boxes that do not - overlap at all with the window. - - Args: - boxlist: BoxList holding M_in boxes - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window to which the op should clip boxes. - filter_nonoverlapping: whether to filter out boxes that do not overlap at - all with the window. - scope: name scope. - - Returns: - a BoxList holding M_out boxes where M_out <= M_in - """ - with tf.name_scope(scope, 'ClipToWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) - y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) - x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) - x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min) - clipped = box_list.BoxList( - tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped], - 1)) - clipped = _copy_extra_fields(clipped, boxlist) - if filter_nonoverlapping: - areas = area(clipped) - nonzero_area_indices = tf.cast( - tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32) - clipped = gather(clipped, nonzero_area_indices) - return clipped - - -def prune_outside_window(boxlist, window, scope=None): - """Prunes bounding boxes that fall outside a given window. - - This function prunes bounding boxes that even partially fall outside the given - window. See also clip_to_window which only prunes bounding boxes that fall - completely outside the window, and clips any bounding boxes that partially - overflow. - - Args: - boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window - scope: name scope. - - Returns: - pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in - valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes - in the input tensor. - """ - with tf.name_scope(scope, 'PruneOutsideWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - coordinate_violations = tf.concat([ - tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), - tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) - ], 1) - valid_indices = tf.reshape( - tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) - return gather(boxlist, valid_indices), valid_indices - - -def prune_completely_outside_window(boxlist, window, scope=None): - """Prunes bounding boxes that fall completely outside of the given window. - - The function clip_to_window prunes bounding boxes that fall - completely outside the window, but also clips any bounding boxes that - partially overflow. This function does not clip partially overflowing boxes. - - Args: - boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window - scope: name scope. - - Returns: - pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in - valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes - in the input tensor. - """ - with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): - y_min, x_min, y_max, x_max = tf.split( - value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - coordinate_violations = tf.concat([ - tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), - tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) - ], 1) - valid_indices = tf.reshape( - tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) - return gather(boxlist, valid_indices), valid_indices - - -def intersection(boxlist1, boxlist2, scope=None): - """Compute pairwise intersection areas between boxes. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise intersections - """ - with tf.name_scope(scope, 'Intersection'): - y_min1, x_min1, y_max1, x_max1 = tf.split( - value=boxlist1.get(), num_or_size_splits=4, axis=1) - y_min2, x_min2, y_max2, x_max2 = tf.split( - value=boxlist2.get(), num_or_size_splits=4, axis=1) - all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2)) - all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2)) - intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin) - all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2)) - all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2)) - intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin) - return intersect_heights * intersect_widths - - -def matched_intersection(boxlist1, boxlist2, scope=None): - """Compute intersection areas between corresponding boxes in two boxlists. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing pairwise intersections - """ - with tf.name_scope(scope, 'MatchedIntersection'): - y_min1, x_min1, y_max1, x_max1 = tf.split( - value=boxlist1.get(), num_or_size_splits=4, axis=1) - y_min2, x_min2, y_max2, x_max2 = tf.split( - value=boxlist2.get(), num_or_size_splits=4, axis=1) - min_ymax = tf.minimum(y_max1, y_max2) - max_ymin = tf.maximum(y_min1, y_min2) - intersect_heights = tf.maximum(0.0, min_ymax - max_ymin) - min_xmax = tf.minimum(x_max1, x_max2) - max_xmin = tf.maximum(x_min1, x_min2) - intersect_widths = tf.maximum(0.0, min_xmax - max_xmin) - return tf.reshape(intersect_heights * intersect_widths, [-1]) - - -def iou(boxlist1, boxlist2, scope=None): - """Computes pairwise intersection-over-union between box collections. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise iou scores. - """ - with tf.name_scope(scope, 'IOU'): - intersections = intersection(boxlist1, boxlist2) - areas1 = area(boxlist1) - areas2 = area(boxlist2) - unions = ( - tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) - return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) - - -def matched_iou(boxlist1, boxlist2, scope=None): - """Compute intersection-over-union between corresponding boxes in boxlists. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding N boxes - scope: name scope. - - Returns: - a tensor with shape [N] representing pairwise iou scores. - """ - with tf.name_scope(scope, 'MatchedIOU'): - intersections = matched_intersection(boxlist1, boxlist2) - areas1 = area(boxlist1) - areas2 = area(boxlist2) - unions = areas1 + areas2 - intersections - return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) - - -def ioa(boxlist1, boxlist2, scope=None): - """Computes pairwise intersection-over-area between box collections. - - intersection-over-area (IOA) between two boxes box1 and box2 is defined as - their intersection area over box2's area. Note that ioa is not symmetric, - that is, ioa(box1, box2) != ioa(box2, box1). - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise ioa scores. - """ - with tf.name_scope(scope, 'IOA'): - intersections = intersection(boxlist1, boxlist2) - areas = tf.expand_dims(area(boxlist2), 0) - return tf.truediv(intersections, areas) - - -def prune_non_overlapping_boxes( - boxlist1, boxlist2, min_overlap=0.0, scope=None): - """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. - - For each box in boxlist1, we want its IOA to be more than minoverlap with - at least one of the boxes in boxlist2. If it does not, we remove it. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - min_overlap: Minimum required overlap between boxes, to count them as - overlapping. - scope: name scope. - - Returns: - new_boxlist1: A pruned boxlist with size [N', 4]. - keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the - first input BoxList `boxlist1`. - """ - with tf.name_scope(scope, 'PruneNonOverlappingBoxes'): - ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor - ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor - keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap)) - keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1]) - new_boxlist1 = gather(boxlist1, keep_inds) - return new_boxlist1, keep_inds - - -def prune_small_boxes(boxlist, min_side, scope=None): - """Prunes small boxes in the boxlist which have a side smaller than min_side. - - Args: - boxlist: BoxList holding N boxes. - min_side: Minimum width AND height of box to survive pruning. - scope: name scope. - - Returns: - A pruned boxlist. - """ - with tf.name_scope(scope, 'PruneSmallBoxes'): - height, width = height_width(boxlist) - is_valid = tf.logical_and(tf.greater_equal(width, min_side), - tf.greater_equal(height, min_side)) - return gather(boxlist, tf.reshape(tf.where(is_valid), [-1])) - - -def change_coordinate_frame(boxlist, window, scope=None): - """Change coordinate frame of the boxlist to be relative to window's frame. - - Given a window of the form [ymin, xmin, ymax, xmax], - changes bounding box coordinates from boxlist to be relative to this window - (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). - - An example use case is data augmentation: where we are given groundtruth - boxes (boxlist) and would like to randomly crop the image to some - window (window). In this case we need to change the coordinate frame of - each groundtruth box to be relative to this new window. - - Args: - boxlist: A BoxList object holding N boxes. - window: A rank 1 tensor [4]. - scope: name scope. - - Returns: - Returns a BoxList object with N boxes. - """ - with tf.name_scope(scope, 'ChangeCoordinateFrame'): - win_height = window[2] - window[0] - win_width = window[3] - window[1] - boxlist_new = scale(box_list.BoxList( - boxlist.get() - [window[0], window[1], window[0], window[1]]), - 1.0 / win_height, 1.0 / win_width) - boxlist_new = _copy_extra_fields(boxlist_new, boxlist) - return boxlist_new - - -def sq_dist(boxlist1, boxlist2, scope=None): - """Computes the pairwise squared distances between box corners. - - This op treats each box as if it were a point in a 4d Euclidean space and - computes pairwise squared distances. - - Mathematically, we are given two matrices of box coordinates X and Y, - where X(i,:) is the i'th row of X, containing the 4 numbers defining the - corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to - boxlist2. We compute - Z(i,j) = ||X(i,:) - Y(j,:)||^2 - = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:), - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - scope: name scope. - - Returns: - a tensor with shape [N, M] representing pairwise distances - """ - with tf.name_scope(scope, 'SqDist'): - sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) - sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) - innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), - transpose_a=False, transpose_b=True) - return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod - - -def boolean_mask(boxlist, indicator, fields=None, scope=None): - """Select boxes from BoxList according to indicator and return new BoxList. - - `boolean_mask` returns the subset of boxes that are marked as "True" by the - indicator tensor. By default, `boolean_mask` returns boxes corresponding to - the input index list, as well as all additional fields stored in the boxlist - (indexing into the first dimension). However one can optionally only draw - from a subset of fields. - - Args: - boxlist: BoxList holding N boxes - indicator: a rank-1 boolean tensor - fields: (optional) list of fields to also gather from. If None (default), - all fields are gathered from. Pass an empty fields list to only gather - the box coordinates. - scope: name scope. - - Returns: - subboxlist: a BoxList corresponding to the subset of the input BoxList - specified by indicator - Raises: - ValueError: if `indicator` is not a rank-1 boolean tensor. - """ - with tf.name_scope(scope, 'BooleanMask'): - if indicator.shape.ndims != 1: - raise ValueError('indicator should have rank 1') - if indicator.dtype != tf.bool: - raise ValueError('indicator should be a boolean tensor') - subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) - if fields is None: - fields = boxlist.get_extra_fields() - for field in fields: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all specified fields') - subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) - subboxlist.add_field(field, subfieldlist) - return subboxlist - - -def gather(boxlist, indices, fields=None, scope=None): - """Gather boxes from BoxList according to indices and return new BoxList. - - By default, `gather` returns boxes corresponding to the input index list, as - well as all additional fields stored in the boxlist (indexing into the - first dimension). However one can optionally only gather from a - subset of fields. - - Args: - boxlist: BoxList holding N boxes - indices: a rank-1 tensor of type int32 / int64 - fields: (optional) list of fields to also gather from. If None (default), - all fields are gathered from. Pass an empty fields list to only gather - the box coordinates. - scope: name scope. - - Returns: - subboxlist: a BoxList corresponding to the subset of the input BoxList - specified by indices - Raises: - ValueError: if specified field is not contained in boxlist or if the - indices are not of type int32 - """ - with tf.name_scope(scope, 'Gather'): - if len(indices.shape.as_list()) != 1: - raise ValueError('indices should have rank 1') - if indices.dtype != tf.int32 and indices.dtype != tf.int64: - raise ValueError('indices should be an int32 / int64 tensor') - subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices)) - if fields is None: - fields = boxlist.get_extra_fields() - for field in fields: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all specified fields') - subfieldlist = tf.gather(boxlist.get_field(field), indices) - subboxlist.add_field(field, subfieldlist) - return subboxlist - - -def concatenate(boxlists, fields=None, scope=None): - """Concatenate list of BoxLists. - - This op concatenates a list of input BoxLists into a larger BoxList. It also - handles concatenation of BoxList fields as long as the field tensor shapes - are equal except for the first dimension. - - Args: - boxlists: list of BoxList objects - fields: optional list of fields to also concatenate. By default, all - fields from the first BoxList in the list are included in the - concatenation. - scope: name scope. - - Returns: - a BoxList with number of boxes equal to - sum([boxlist.num_boxes() for boxlist in BoxList]) - Raises: - ValueError: if boxlists is invalid (i.e., is not a list, is empty, or - contains non BoxList objects), or if requested fields are not contained in - all boxlists - """ - with tf.name_scope(scope, 'Concatenate'): - if not isinstance(boxlists, list): - raise ValueError('boxlists should be a list') - if not boxlists: - raise ValueError('boxlists should have nonzero length') - for boxlist in boxlists: - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('all elements of boxlists should be BoxList objects') - concatenated = box_list.BoxList( - tf.concat([boxlist.get() for boxlist in boxlists], 0)) - if fields is None: - fields = boxlists[0].get_extra_fields() - for field in fields: - first_field_shape = boxlists[0].get_field(field).get_shape().as_list() - first_field_shape[0] = -1 - if None in first_field_shape: - raise ValueError('field %s must have fully defined shape except for the' - ' 0th dimension.' % field) - for boxlist in boxlists: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all requested fields') - field_shape = boxlist.get_field(field).get_shape().as_list() - field_shape[0] = -1 - if field_shape != first_field_shape: - raise ValueError('field %s must have same shape for all boxlists ' - 'except for the 0th dimension.' % field) - concatenated_field = tf.concat( - [boxlist.get_field(field) for boxlist in boxlists], 0) - concatenated.add_field(field, concatenated_field) - return concatenated - - -def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): - """Sort boxes and associated fields according to a scalar field. - - A common use case is reordering the boxes according to descending scores. - - Args: - boxlist: BoxList holding N boxes. - field: A BoxList field for sorting and reordering the BoxList. - order: (Optional) descend or ascend. Default is descend. - scope: name scope. - - Returns: - sorted_boxlist: A sorted BoxList with the field in the specified order. - - Raises: - ValueError: if specified field does not exist - ValueError: if the order is not either descend or ascend - """ - with tf.name_scope(scope, 'SortByField'): - if order != SortOrder.descend and order != SortOrder.ascend: - raise ValueError('Invalid sort order') - - field_to_sort = boxlist.get_field(field) - if len(field_to_sort.shape.as_list()) != 1: - raise ValueError('Field should have rank 1') - - num_boxes = boxlist.num_boxes() - num_entries = tf.size(field_to_sort) - length_assert = tf.Assert( - tf.equal(num_boxes, num_entries), - ['Incorrect field size: actual vs expected.', num_entries, num_boxes]) - - with tf.control_dependencies([length_assert]): - # TODO: Remove with tf.device when top_k operation runs - # correctly on GPU. - with tf.device('/cpu:0'): - _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True) - - if order == SortOrder.ascend: - sorted_indices = tf.reverse_v2(sorted_indices, [0]) - - return gather(boxlist, sorted_indices) - - -def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): - """Overlay bounding box list on image. - - Currently this visualization plots a 1 pixel thick red bounding box on top - of the image. Note that tf.image.draw_bounding_boxes essentially is - 1 indexed. - - Args: - image: an image tensor with shape [height, width, 3] - boxlist: a BoxList - normalized: (boolean) specify whether corners are to be interpreted - as absolute coordinates in image space or normalized with respect to the - image size. - scope: name scope. - - Returns: - image_and_boxes: an image tensor with shape [height, width, 3] - """ - with tf.name_scope(scope, 'VisualizeBoxesInImage'): - if not normalized: - height, width, _ = tf.unstack(tf.shape(image)) - boxlist = scale(boxlist, - 1.0 / tf.cast(height, tf.float32), - 1.0 / tf.cast(width, tf.float32)) - corners = tf.expand_dims(boxlist.get(), 0) - image = tf.expand_dims(image, 0) - return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0]) - - -def filter_field_value_equals(boxlist, field, value, scope=None): - """Filter to keep only boxes with field entries equal to the given value. - - Args: - boxlist: BoxList holding N boxes. - field: field name for filtering. - value: scalar value. - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= N - - Raises: - ValueError: if boxlist not a BoxList object or if it does not have - the specified field. - """ - with tf.name_scope(scope, 'FilterFieldValueEquals'): - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field(field): - raise ValueError('boxlist must contain the specified field') - filter_field = boxlist.get_field(field) - gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1]) - return gather(boxlist, gather_index) - - -def filter_greater_than(boxlist, thresh, scope=None): - """Filter to keep only boxes with score exceeding a given threshold. - - This op keeps the collection of boxes whose corresponding scores are - greater than the input threshold. - - TODO: Change function name to filter_scores_greater_than - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. - thresh: scalar threshold - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= N - - Raises: - ValueError: if boxlist not a BoxList object or if it does not - have a scores field - """ - with tf.name_scope(scope, 'FilterGreaterThan'): - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - scores = boxlist.get_field('scores') - if len(scores.shape.as_list()) > 2: - raise ValueError('Scores should have rank 1 or 2') - if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1: - raise ValueError('Scores should have rank 1 or have shape ' - 'consistent with [None, 1]') - high_score_indices = tf.cast(tf.reshape( - tf.where(tf.greater(scores, thresh)), - [-1]), tf.int32) - return gather(boxlist, high_score_indices) - - -def non_max_suppression(boxlist, thresh, max_output_size, scope=None): - """Non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. Note that this only works for a single class --- - to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression. - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. - thresh: scalar threshold - max_output_size: maximum number of retained boxes - scope: name scope. - - Returns: - a BoxList holding M boxes where M <= max_output_size - Raises: - ValueError: if thresh is not in [0, 1] - """ - with tf.name_scope(scope, 'NonMaxSuppression'): - if not 0 <= thresh <= 1.0: - raise ValueError('thresh must be between 0 and 1') - if not isinstance(boxlist, box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - selected_indices = tf.image.non_max_suppression( - boxlist.get(), boxlist.get_field('scores'), - max_output_size, iou_threshold=thresh) - return gather(boxlist, selected_indices) - - -def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): - """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to. - - Args: - boxlist_to_copy_to: BoxList to which extra fields are copied. - boxlist_to_copy_from: BoxList from which fields are copied. - - Returns: - boxlist_to_copy_to with extra fields. - """ - for field in boxlist_to_copy_from.get_extra_fields(): - boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field)) - return boxlist_to_copy_to - - -def to_normalized_coordinates(boxlist, height, width, - check_range=True, scope=None): - """Converts absolute box coordinates to normalized coordinates in [0, 1]. - - Usually one uses the dynamic shape of the image or conv-layer tensor: - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(images)[1], - tf.shape(images)[2]), - - This function raises an assertion failed error at graph execution time when - the maximum coordinate is smaller than 1.01 (which means that coordinates are - already normalized). The value 1.01 is to deal with small rounding errors. - - Args: - boxlist: BoxList with coordinates in terms of pixel-locations. - height: Maximum value for height of absolute box coordinates. - width: Maximum value for width of absolute box coordinates. - check_range: If True, checks if the coordinates are normalized or not. - scope: name scope. - - Returns: - boxlist with normalized coordinates in [0, 1]. - """ - with tf.name_scope(scope, 'ToNormalizedCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - if check_range: - max_val = tf.reduce_max(boxlist.get()) - max_assert = tf.Assert(tf.greater(max_val, 1.01), - ['max value is lower than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(boxlist, 1 / height, 1 / width) - - -def to_absolute_coordinates(boxlist, - height, - width, - check_range=False, - maximum_normalized_coordinate=1.01, - scope=None): - """Converts normalized box coordinates to absolute pixel coordinates. - - This function raises an assertion failed error when the maximum box coordinate - value is larger than maximum_normalized_coordinate (in which case coordinates - are already absolute). - - Args: - boxlist: BoxList with coordinates in range [0, 1]. - height: Maximum value for height of absolute box coordinates. - width: Maximum value for width of absolute box coordinates. - check_range: If True, checks if the coordinates are normalized or not. - maximum_normalized_coordinate: Maximum coordinate value to be considered - as normalized, default to 1.01. - scope: name scope. - - Returns: - boxlist with absolute coordinates in terms of the image size. - - """ - with tf.name_scope(scope, 'ToAbsoluteCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - # Ensure range of input boxes is correct. - if check_range: - box_maximum = tf.reduce_max(boxlist.get()) - max_assert = tf.Assert( - tf.greater_equal(1.1, box_maximum), - ['maximum box coordinate value is larger ' - 'than 1.1: ', box_maximum]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(boxlist, height, width) - - -def refine_boxes_multi_class(pool_boxes, - num_classes, - nms_iou_thresh, - nms_max_detections, - voting_iou_thresh=0.5): - """Refines a pool of boxes using non max suppression and box voting. - - Box refinement is done independently for each class. - - Args: - pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must - have a rank 1 'scores' field and a rank 1 'classes' field. - num_classes: (int scalar) Number of classes. - nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). - nms_max_detections: (int scalar) maximum output size for NMS. - voting_iou_thresh: (float scalar) iou threshold for box voting. - - Returns: - BoxList of refined boxes. - - Raises: - ValueError: if - a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. - b) pool_boxes is not a BoxList. - c) pool_boxes does not have a scores and classes field. - """ - if not 0.0 <= nms_iou_thresh <= 1.0: - raise ValueError('nms_iou_thresh must be between 0 and 1') - if not 0.0 <= voting_iou_thresh <= 1.0: - raise ValueError('voting_iou_thresh must be between 0 and 1') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - if not pool_boxes.has_field('classes'): - raise ValueError('pool_boxes must have a \'classes\' field') - - refined_boxes = [] - for i in range(num_classes): - boxes_class = filter_field_value_equals(pool_boxes, 'classes', i) - refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh, - nms_max_detections, voting_iou_thresh) - refined_boxes.append(refined_boxes_class) - return sort_by_field(concatenate(refined_boxes), 'scores') - - -def refine_boxes(pool_boxes, - nms_iou_thresh, - nms_max_detections, - voting_iou_thresh=0.5): - """Refines a pool of boxes using non max suppression and box voting. - - Args: - pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must - have a rank 1 'scores' field. - nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS). - nms_max_detections: (int scalar) maximum output size for NMS. - voting_iou_thresh: (float scalar) iou threshold for box voting. - - Returns: - BoxList of refined boxes. - - Raises: - ValueError: if - a) nms_iou_thresh or voting_iou_thresh is not in [0, 1]. - b) pool_boxes is not a BoxList. - c) pool_boxes does not have a scores field. - """ - if not 0.0 <= nms_iou_thresh <= 1.0: - raise ValueError('nms_iou_thresh must be between 0 and 1') - if not 0.0 <= voting_iou_thresh <= 1.0: - raise ValueError('voting_iou_thresh must be between 0 and 1') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - - nms_boxes = non_max_suppression( - pool_boxes, nms_iou_thresh, nms_max_detections) - return box_voting(nms_boxes, pool_boxes, voting_iou_thresh) - - -def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): - """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. - - Performs box voting as described in 'Object detection via a multi-region & - semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For - each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes - with iou overlap >= iou_thresh. The location of B is set to the weighted - average location of boxes in S (scores are used for weighting). And the score - of B is set to the average score of boxes in S. - - Args: - selected_boxes: BoxList containing a subset of boxes in pool_boxes. These - boxes are usually selected from pool_boxes using non max suppression. - pool_boxes: BoxList containing a set of (possibly redundant) boxes. - iou_thresh: (float scalar) iou threshold for matching boxes in - selected_boxes and pool_boxes. - - Returns: - BoxList containing averaged locations and scores for each box in - selected_boxes. - - Raises: - ValueError: if - a) selected_boxes or pool_boxes is not a BoxList. - b) if iou_thresh is not in [0, 1]. - c) pool_boxes does not have a scores field. - """ - if not 0.0 <= iou_thresh <= 1.0: - raise ValueError('iou_thresh must be between 0 and 1') - if not isinstance(selected_boxes, box_list.BoxList): - raise ValueError('selected_boxes must be a BoxList') - if not isinstance(pool_boxes, box_list.BoxList): - raise ValueError('pool_boxes must be a BoxList') - if not pool_boxes.has_field('scores'): - raise ValueError('pool_boxes must have a \'scores\' field') - - iou_ = iou(selected_boxes, pool_boxes) - match_indicator = tf.to_float(tf.greater(iou_, iou_thresh)) - num_matches = tf.reduce_sum(match_indicator, 1) - # TODO: Handle the case where some boxes in selected_boxes do not - # match to any boxes in pool_boxes. For such boxes without any matches, we - # should return the original boxes without voting. - match_assert = tf.Assert( - tf.reduce_all(tf.greater(num_matches, 0)), - ['Each box in selected_boxes must match with at least one box ' - 'in pool_boxes.']) - - scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) - scores_assert = tf.Assert( - tf.reduce_all(tf.greater_equal(scores, 0)), - ['Scores must be non negative.']) - - with tf.control_dependencies([scores_assert, match_assert]): - sum_scores = tf.matmul(match_indicator, scores) - averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches - - box_locations = tf.matmul(match_indicator, - pool_boxes.get() * scores) / sum_scores - averaged_boxes = box_list.BoxList(box_locations) - _copy_extra_fields(averaged_boxes, selected_boxes) - averaged_boxes.add_field('scores', averaged_scores) - return averaged_boxes - - -def pad_or_clip_box_list(boxlist, num_boxes, scope=None): - """Pads or clips all fields of a BoxList. - - Args: - boxlist: A BoxList with arbitrary of number of boxes. - num_boxes: First num_boxes in boxlist are kept. - The fields are zero-padded if num_boxes is bigger than the - actual number of boxes. - scope: name scope. - - Returns: - BoxList with all fields padded or clipped. - """ - with tf.name_scope(scope, 'PadOrClipBoxList'): - subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor( - boxlist.get(), num_boxes)) - for field in boxlist.get_extra_fields(): - subfield = shape_utils.pad_or_clip_tensor( - boxlist.get_field(field), num_boxes) - subboxlist.add_field(field, subfield) - return subboxlist diff --git a/object_detection/core/box_list_ops_test.py b/object_detection/core/box_list_ops_test.py deleted file mode 100644 index 467bb3c6..00000000 --- a/object_detection/core/box_list_ops_test.py +++ /dev/null @@ -1,962 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_list_ops.""" -import numpy as np -import tensorflow as tf -from tensorflow.python.framework import errors - -from object_detection.core import box_list -from object_detection.core import box_list_ops - - -class BoxListOpsTest(tf.test.TestCase): - """Tests for common bounding box operations.""" - - def test_area(self): - corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) - exp_output = [200.0, 4.0] - boxes = box_list.BoxList(corners) - areas = box_list_ops.area(boxes) - with self.test_session() as sess: - areas_output = sess.run(areas) - self.assertAllClose(areas_output, exp_output) - - def test_height_width(self): - corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]]) - exp_output_heights = [10., 2.] - exp_output_widths = [20., 2.] - boxes = box_list.BoxList(corners) - heights, widths = box_list_ops.height_width(boxes) - with self.test_session() as sess: - output_heights, output_widths = sess.run([heights, widths]) - self.assertAllClose(output_heights, exp_output_heights) - self.assertAllClose(output_widths, exp_output_widths) - - def test_scale(self): - corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]], - dtype=tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2]])) - - y_scale = tf.constant(1.0/100) - x_scale = tf.constant(1.0/200) - scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale) - exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]] - with self.test_session() as sess: - scaled_corners_out = sess.run(scaled_boxes.get()) - self.assertAllClose(scaled_corners_out, exp_output) - extra_data_out = sess.run(scaled_boxes.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2]]) - - def test_clip_to_window_filter_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0], - [-10.0, -10.0, -9.0, -9.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], - [0.0, 0.0, 9.0, 14.0]] - pruned = box_list_ops.clip_to_window( - boxes, window, filter_nonoverlapping=True) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]]) - - def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0], - [-10.0, -10.0, -9.0, -9.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0], - [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]] - pruned = box_list_ops.clip_to_window( - boxes, window, filter_nonoverlapping=False) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]]) - - def test_prune_outside_window_filters_boxes_which_fall_outside_the_window( - self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-10.0, -10.0, -9.0, -9.0], - [-100.0, -100.0, 300.0, 600.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0]] - pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - keep_indices_out = sess.run(keep_indices) - self.assertAllEqual(keep_indices_out, [0, 2, 3]) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [3], [4]]) - - def test_prune_completely_outside_window(self): - window = tf.constant([0, 0, 9, 14], tf.float32) - corners = tf.constant([[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-10.0, -10.0, -9.0, -9.0], - [-100.0, -100.0, 300.0, 600.0]]) - boxes = box_list.BoxList(corners) - boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]])) - exp_output = [[5.0, 5.0, 6.0, 6.0], - [-1.0, -2.0, 4.0, 5.0], - [2.0, 3.0, 5.0, 9.0], - [0.0, 0.0, 9.0, 14.0], - [-100.0, -100.0, 300.0, 600.0]] - pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes, - window) - with self.test_session() as sess: - pruned_output = sess.run(pruned.get()) - self.assertAllClose(pruned_output, exp_output) - keep_indices_out = sess.run(keep_indices) - self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5]) - extra_data_out = sess.run(pruned.get_field('extra_data')) - self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]]) - - def test_intersection(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - intersect = box_list_ops.intersection(boxes1, boxes2) - with self.test_session() as sess: - intersect_output = sess.run(intersect) - self.assertAllClose(intersect_output, exp_output) - - def test_matched_intersection(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) - exp_output = [2.0, 0.0] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - intersect = box_list_ops.matched_intersection(boxes1, boxes2) - with self.test_session() as sess: - intersect_output = sess.run(intersect) - self.assertAllClose(intersect_output, exp_output) - - def test_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou = box_list_ops.iou(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou) - self.assertAllClose(iou_output, exp_output) - - def test_matched_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]]) - exp_output = [2.0 / 16.0, 0] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou = box_list_ops.matched_iou(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou) - self.assertAllClose(iou_output, exp_output) - - def test_iouworks_on_empty_inputs(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - boxes_empty = box_list.BoxList(tf.zeros((0, 4))) - iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty) - iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2) - iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty) - with self.test_session() as sess: - iou_output_1, iou_output_2, iou_output_3 = sess.run( - [iou_empty_1, iou_empty_2, iou_empty_3]) - self.assertAllEqual(iou_output_1.shape, (2, 0)) - self.assertAllEqual(iou_output_2.shape, (0, 3)) - self.assertAllEqual(iou_output_3.shape, (0, 0)) - - def test_ioa(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], - [1.0 / 12.0, 0.0, 5.0 / 400.0]] - exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], - [0, 0], - [6.0 / 6.0, 5.0 / 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - ioa_1 = box_list_ops.ioa(boxes1, boxes2) - ioa_2 = box_list_ops.ioa(boxes2, boxes1) - with self.test_session() as sess: - ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2]) - self.assertAllClose(ioa_output_1, exp_output_1) - self.assertAllClose(ioa_output_2, exp_output_2) - - def test_prune_non_overlapping_boxes(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - minoverlap = 0.5 - - exp_output_1 = boxes1 - exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4])) - output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes( - boxes1, boxes2, min_overlap=minoverlap) - output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes( - boxes2, boxes1, min_overlap=minoverlap) - with self.test_session() as sess: - (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_, - exp_output_2_) = sess.run( - [output_1.get(), keep_indices_1, - output_2.get(), keep_indices_2, - exp_output_1.get(), exp_output_2.get()]) - self.assertAllClose(output_1_, exp_output_1_) - self.assertAllClose(output_2_, exp_output_2_) - self.assertAllEqual(keep_indices_1_, [0, 1]) - self.assertAllEqual(keep_indices_2_, []) - - def test_prune_small_boxes(self): - boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], - [5.0, 6.0, 10.0, 7.0], - [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_boxes = [[3.0, 4.0, 6.0, 8.0], - [0.0, 0.0, 20.0, 20.0]] - boxes = box_list.BoxList(boxes) - pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) - with self.test_session() as sess: - pruned_boxes = sess.run(pruned_boxes.get()) - self.assertAllEqual(pruned_boxes, exp_boxes) - - def test_prune_small_boxes_prunes_boxes_with_negative_side(self): - boxes = tf.constant([[4.0, 3.0, 7.0, 5.0], - [5.0, 6.0, 10.0, 7.0], - [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0], - [2.0, 3.0, 1.5, 7.0], # negative height - [2.0, 3.0, 5.0, 1.7]]) # negative width - exp_boxes = [[3.0, 4.0, 6.0, 8.0], - [0.0, 0.0, 20.0, 20.0]] - boxes = box_list.BoxList(boxes) - pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3) - with self.test_session() as sess: - pruned_boxes = sess.run(pruned_boxes.get()) - self.assertAllEqual(pruned_boxes, exp_boxes) - - def test_change_coordinate_frame(self): - corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - boxes = box_list.BoxList(corners) - - expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]]) - expected_boxes = box_list.BoxList(expected_corners) - output = box_list_ops.change_coordinate_frame(boxes, window) - - with self.test_session() as sess: - output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()]) - self.assertAllClose(output_, expected_boxes_) - - def test_ioaworks_on_empty_inputs(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - boxes_empty = box_list.BoxList(tf.zeros((0, 4))) - ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty) - ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2) - ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty) - with self.test_session() as sess: - ioa_output_1, ioa_output_2, ioa_output_3 = sess.run( - [ioa_empty_1, ioa_empty_2, ioa_empty_3]) - self.assertAllEqual(ioa_output_1.shape, (2, 0)) - self.assertAllEqual(ioa_output_2.shape, (0, 3)) - self.assertAllEqual(ioa_output_3.shape, (0, 0)) - - def test_pairwise_distances(self): - corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 0.0, 2.0]]) - corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], - [-4.0, 0.0, 0.0, 3.0], - [0.0, 0.0, 0.0, 0.0]]) - exp_output = [[26, 25, 0], [18, 27, 6]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - dist_matrix = box_list_ops.sq_dist(boxes1, boxes2) - with self.test_session() as sess: - dist_output = sess.run(dist_matrix) - self.assertAllClose(dist_output, exp_output) - - def test_boolean_mask(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indicator = tf.constant([True, False, True, False, True], tf.bool) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - boxes = box_list.BoxList(corners) - subset = box_list_ops.boolean_mask(boxes, indicator) - with self.test_session() as sess: - subset_output = sess.run(subset.get()) - self.assertAllClose(subset_output, expected_subset) - - def test_boolean_mask_with_field(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indicator = tf.constant([True, False, True, False, True], tf.bool) - weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [[.1], [.5], [.9]] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.boolean_mask(boxes, indicator, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run( - [subset.get(), subset.get_field('weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_gather(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indices = tf.constant([0, 2, 4], tf.int32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - boxes = box_list.BoxList(corners) - subset = box_list_ops.gather(boxes, indices) - with self.test_session() as sess: - subset_output = sess.run(subset.get()) - self.assertAllClose(subset_output, expected_subset) - - def test_gather_with_field(self): - corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]]) - indices = tf.constant([0, 2, 4], tf.int32) - weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [[.1], [.5], [.9]] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.gather(boxes, indices, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run( - [subset.get(), subset.get_field('weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_gather_with_invalid_field(self): - corners = tf.constant([4 * [0.0], 4 * [1.0]]) - indices = tf.constant([0, 1], tf.int32) - weights = tf.constant([[.1], [.3]], tf.float32) - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - with self.assertRaises(ValueError): - box_list_ops.gather(boxes, indices, ['foo', 'bar']) - - def test_gather_with_invalid_inputs(self): - corners = tf.constant( - [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]) - indices_float32 = tf.constant([0, 2, 4], tf.float32) - boxes = box_list.BoxList(corners) - with self.assertRaises(ValueError): - _ = box_list_ops.gather(boxes, indices_float32) - indices_2d = tf.constant([[0, 2, 4]], tf.int32) - boxes = box_list.BoxList(corners) - with self.assertRaises(ValueError): - _ = box_list_ops.gather(boxes, indices_2d) - - def test_gather_with_dynamic_indexing(self): - corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0] - ]) - weights = tf.constant([.5, .3, .7, .1, .9], tf.float32) - indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1]) - expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]] - expected_weights = [.5, .7, .9] - - boxes = box_list.BoxList(corners) - boxes.add_field('weights', weights) - subset = box_list_ops.gather(boxes, indices, ['weights']) - with self.test_session() as sess: - subset_output, weights_output = sess.run([subset.get(), subset.get_field( - 'weights')]) - self.assertAllClose(subset_output, expected_subset) - self.assertAllClose(weights_output, expected_weights) - - def test_sort_by_field_ascending_order(self): - exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - exp_scores = [.95, .9, .75, .6, .5, .3] - exp_weights = [.2, .45, .6, .75, .8, .92] - shuffle = [2, 4, 0, 5, 1, 3] - corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant( - [exp_scores[i] for i in shuffle], tf.float32)) - boxes.add_field('weights', tf.constant( - [exp_weights[i] for i in shuffle], tf.float32)) - sort_by_weight = box_list_ops.sort_by_field( - boxes, - 'weights', - order=box_list_ops.SortOrder.ascend) - with self.test_session() as sess: - corners_out, scores_out, weights_out = sess.run([ - sort_by_weight.get(), - sort_by_weight.get_field('scores'), - sort_by_weight.get_field('weights')]) - self.assertAllClose(corners_out, exp_corners) - self.assertAllClose(scores_out, exp_scores) - self.assertAllClose(weights_out, exp_weights) - - def test_sort_by_field_descending_order(self): - exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - exp_scores = [.95, .9, .75, .6, .5, .3] - exp_weights = [.2, .45, .6, .75, .8, .92] - shuffle = [2, 4, 0, 5, 1, 3] - - corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant( - [exp_scores[i] for i in shuffle], tf.float32)) - boxes.add_field('weights', tf.constant( - [exp_weights[i] for i in shuffle], tf.float32)) - - sort_by_score = box_list_ops.sort_by_field(boxes, 'scores') - with self.test_session() as sess: - corners_out, scores_out, weights_out = sess.run([sort_by_score.get( - ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')]) - self.assertAllClose(corners_out, exp_corners) - self.assertAllClose(scores_out, exp_scores) - self.assertAllClose(weights_out, exp_weights) - - def test_sort_by_field_invalid_inputs(self): - corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 * - [3.0], 4 * [4.0]]) - misc = tf.constant([[.95, .9], [.5, .3]], tf.float32) - weights = tf.constant([.1, .2], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('misc', misc) - boxes.add_field('weights', weights) - - with self.test_session() as sess: - with self.assertRaises(ValueError): - box_list_ops.sort_by_field(boxes, 'area') - - with self.assertRaises(ValueError): - box_list_ops.sort_by_field(boxes, 'misc') - - with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError, - 'Incorrect field size'): - sess.run(box_list_ops.sort_by_field(boxes, 'weights').get()) - - def test_visualize_boxes_in_image(self): - image = tf.zeros((6, 4, 3)) - corners = tf.constant([[0, 0, 5, 3], - [0, 0, 3, 2]], tf.float32) - boxes = box_list.BoxList(corners) - image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes) - image_and_boxes_bw = tf.to_float( - tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0)) - exp_result = [[1, 1, 1, 0], - [1, 1, 1, 0], - [1, 1, 1, 0], - [1, 0, 1, 0], - [1, 1, 1, 0], - [0, 0, 0, 0]] - with self.test_session() as sess: - output = sess.run(image_and_boxes_bw) - self.assertAllEqual(output.astype(int), exp_result) - - def test_filter_field_value_equals(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1])) - exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] - exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]] - - filtered_boxes1 = box_list_ops.filter_field_value_equals( - boxes, 'classes', 1) - filtered_boxes2 = box_list_ops.filter_field_value_equals( - boxes, 'classes', 2) - with self.test_session() as sess: - filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(), - filtered_boxes2.get()]) - self.assertAllClose(filtered_output1, exp_output1) - self.assertAllClose(filtered_output2, exp_output2) - - def test_filter_greater_than(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8])) - thresh = .6 - exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]] - - filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh) - with self.test_session() as sess: - filtered_output = sess.run(filtered_boxes.get()) - self.assertAllClose(filtered_output, exp_output) - - def test_clip_box_list(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 0, 1, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2])) - num_boxes = 2 - clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 0] - expected_scores = [0.75, 0.65] - with self.test_session() as sess: - boxes_out, classes_out, scores_out = sess.run( - [clipped_boxlist.get(), clipped_boxlist.get_field('classes'), - clipped_boxlist.get_field('scores')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllEqual(expected_classes, classes_out) - self.assertAllClose(expected_scores, scores_out) - - def test_pad_box_list(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - num_boxes = 4 - padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0, 0, 0, 0], [0, 0, 0, 0]] - expected_classes = [0, 1, 0, 0] - expected_scores = [0.75, 0.2, 0, 0] - with self.test_session() as sess: - boxes_out, classes_out, scores_out = sess.run( - [padded_boxlist.get(), padded_boxlist.get_field('classes'), - padded_boxlist.get_field('scores')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllEqual(expected_classes, classes_out) - self.assertAllClose(expected_scores, scores_out) - - -class ConcatenateTest(tf.test.TestCase): - - def test_invalid_input_box_list_list(self): - with self.assertRaises(ValueError): - box_list_ops.concatenate(None) - with self.assertRaises(ValueError): - box_list_ops.concatenate([]) - with self.assertRaises(ValueError): - corners = tf.constant([[0, 0, 0, 0]], tf.float32) - boxlist = box_list.BoxList(corners) - box_list_ops.concatenate([boxlist, 2]) - - def test_concatenate_with_missing_fields(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - with self.assertRaises(ValueError): - box_list_ops.concatenate([boxlist1, boxlist2]) - - def test_concatenate_with_incompatible_field_shapes(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) - scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]]) - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - boxlist2.add_field('scores', scores2) - with self.assertRaises(ValueError): - box_list_ops.concatenate([boxlist1, boxlist2]) - - def test_concatenate_is_correct(self): - corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) - scores1 = tf.constant([1.0, 2.1]) - corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]], - tf.float32) - scores2 = tf.constant([1.0, 2.1, 5.6]) - - exp_corners = [[0, 0, 0, 0], - [1, 2, 3, 4], - [0, 3, 1, 6], - [2, 4, 3, 8], - [1, 0, 5, 10]] - exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6] - - boxlist1 = box_list.BoxList(corners1) - boxlist1.add_field('scores', scores1) - boxlist2 = box_list.BoxList(corners2) - boxlist2.add_field('scores', scores2) - result = box_list_ops.concatenate([boxlist1, boxlist2]) - with self.test_session() as sess: - corners_output, scores_output = sess.run( - [result.get(), result.get_field('scores')]) - self.assertAllClose(corners_output, exp_corners) - self.assertAllClose(scores_output, exp_scores) - - -class NonMaxSuppressionTest(tf.test.TestCase): - - def test_with_invalid_scores_field(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5])) - iou_thresh = .5 - max_output_size = 3 - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - with self.assertRaisesWithPredicateMatch( - errors.InvalidArgumentError, 'scores has incompatible shape'): - sess.run(nms.get()) - - def test_select_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_at_most_two_boxes_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 2 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_at_most_thirty_boxes_from_three_clusters(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3])) - iou_thresh = .5 - max_output_size = 30 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_single_box(self): - corners = tf.constant([[0, 0, 1, 1]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant([.9])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_select_from_ten_identical_boxes(self): - corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32) - boxes = box_list.BoxList(corners) - boxes.add_field('scores', tf.constant(10 * [.9])) - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 0, 1, 1]] - nms = box_list_ops.non_max_suppression( - boxes, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_copy_extra_fields(self): - corners = tf.constant([[0, 0, 1, 1], - [0, 0.1, 1, 1.1]], tf.float32) - boxes = box_list.BoxList(corners) - tensor1 = np.array([[1], [4]]) - tensor2 = np.array([[1, 1], [2, 2]]) - boxes.add_field('tensor1', tf.constant(tensor1)) - boxes.add_field('tensor2', tf.constant(tensor2)) - new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10], - [1, 3, 5, 5]], tf.float32)) - new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes) - with self.test_session() as sess: - self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1'))) - self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2'))) - - -class CoordinatesConversionTest(tf.test.TestCase): - - def test_to_normalized_coordinates(self): - coordinates = tf.constant([[0, 0, 100, 100], - [25, 25, 75, 75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - normalized_boxlist = box_list_ops.to_normalized_coordinates( - boxlist, tf.shape(img)[1], tf.shape(img)[2]) - expected_boxes = [[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]] - - with self.test_session() as sess: - normalized_boxes = sess.run(normalized_boxlist.get()) - self.assertAllClose(normalized_boxes, expected_boxes) - - def test_to_normalized_coordinates_already_normalized(self): - coordinates = tf.constant([[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - normalized_boxlist = box_list_ops.to_normalized_coordinates( - boxlist, tf.shape(img)[1], tf.shape(img)[2]) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(normalized_boxlist.get()) - - def test_to_absolute_coordinates(self): - coordinates = tf.constant([[0, 0, 1, 1], - [0.25, 0.25, 0.75, 0.75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - expected_boxes = [[0, 0, 100, 100], - [25, 25, 75, 75]] - - with self.test_session() as sess: - absolute_boxes = sess.run(absolute_boxlist.get()) - self.assertAllClose(absolute_boxes, expected_boxes) - - def test_to_absolute_coordinates_already_abolute(self): - coordinates = tf.constant([[0, 0, 100, 100], - [25, 25, 75, 75]], tf.float32) - img = tf.ones((128, 100, 100, 3)) - boxlist = box_list.BoxList(coordinates) - absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(absolute_boxlist.get()) - - def test_convert_to_normalized_and_back(self): - coordinates = np.random.uniform(size=(100, 4)) - coordinates = np.round(np.sort(coordinates) * 200) - coordinates[:, 2:4] += 1 - coordinates[99, :] = [0, 0, 201, 201] - img = tf.ones((128, 202, 202, 3)) - - boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - out = sess.run(boxlist.get()) - self.assertAllClose(out, coordinates) - - def test_convert_to_absolute_and_back(self): - coordinates = np.random.uniform(size=(100, 4)) - coordinates = np.sort(coordinates) - coordinates[99, :] = [0, 0, 1, 1] - img = tf.ones((128, 202, 202, 3)) - - boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) - boxlist = box_list_ops.to_absolute_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - boxlist = box_list_ops.to_normalized_coordinates(boxlist, - tf.shape(img)[1], - tf.shape(img)[2]) - - with self.test_session() as sess: - out = sess.run(boxlist.get()) - self.assertAllClose(out, coordinates) - - -class BoxRefinementTest(tf.test.TestCase): - - def test_box_voting(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32)) - candidates.add_field('ExtraField', tf.constant([1, 2])) - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] - expected_scores = [0.5, 0.3] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [averaged_boxes.get(), averaged_boxes.get_field('scores'), - averaged_boxes.get_field('ExtraField')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [1, 2]) - - def test_box_voting_fails_with_negative_scores(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool.add_field('scores', tf.constant([-0.2])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - - with self.test_session() as sess: - with self.assertRaisesOpError('Scores must be non negative'): - sess.run([averaged_boxes.get()]) - - def test_box_voting_fails_when_unmatched(self): - candidates = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32)) - pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('scores', tf.constant([0.2])) - averaged_boxes = box_list_ops.box_voting(candidates, pool) - - with self.test_session() as sess: - with self.assertRaisesOpError('Each box in selected_boxes must match ' - 'with at least one box in pool_boxes.'): - sess.run([averaged_boxes.get()]) - - def test_refine_boxes(self): - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8]], tf.float32)) - pool.add_field('ExtraField', tf.constant([1, 2, 3])) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3])) - refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10) - - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]] - expected_scores = [0.5, 0.3] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [refined_boxes.get(), refined_boxes.get_field('scores'), - refined_boxes.get_field('ExtraField')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [1, 3]) - - def test_refine_boxes_multi_class(self): - pool = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5], - [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32)) - pool.add_field('classes', tf.constant([0, 0, 1, 1])) - pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2])) - refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10) - - expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8], - [0.2, 0.2, 0.3, 0.3]] - expected_scores = [0.5, 0.3, 0.2] - with self.test_session() as sess: - boxes_out, scores_out, extra_field_out = sess.run( - [refined_boxes.get(), refined_boxes.get_field('scores'), - refined_boxes.get_field('classes')]) - - self.assertAllClose(expected_boxes, boxes_out) - self.assertAllClose(expected_scores, scores_out) - self.assertAllEqual(extra_field_out, [0, 1, 1]) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/box_list_test.py b/object_detection/core/box_list_test.py deleted file mode 100644 index edc00ebb..00000000 --- a/object_detection/core/box_list_test.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_list.""" - -import tensorflow as tf - -from object_detection.core import box_list - - -class BoxListTest(tf.test.TestCase): - """Tests for BoxList class.""" - - def test_num_boxes(self): - data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) - expected_num_boxes = 3 - - boxes = box_list.BoxList(data) - with self.test_session() as sess: - num_boxes_output = sess.run(boxes.num_boxes()) - self.assertEquals(num_boxes_output, expected_num_boxes) - - def test_get_correct_center_coordinates_and_sizes(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(boxes)) - centers_sizes = boxes.get_center_coordinates_and_sizes() - expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]] - with self.test_session() as sess: - centers_sizes_out = sess.run(centers_sizes) - self.assertAllClose(centers_sizes_out, expected_centers_sizes) - - def test_create_box_list_with_dynamic_shape(self): - data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32) - indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1]) - data = tf.gather(data, indices) - assert data.get_shape().as_list() == [None, 4] - expected_num_boxes = 2 - - boxes = box_list.BoxList(data) - with self.test_session() as sess: - num_boxes_output = sess.run(boxes.num_boxes()) - self.assertEquals(num_boxes_output, expected_num_boxes) - - def test_transpose_coordinates(self): - boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(boxes)) - boxes.transpose_coordinates() - expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]] - with self.test_session() as sess: - corners_out = sess.run(boxes.get()) - self.assertAllClose(corners_out, expected_corners) - - def test_box_list_invalid_inputs(self): - data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32) - data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32) - data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32) - - with self.assertRaises(ValueError): - _ = box_list.BoxList(data0) - with self.assertRaises(ValueError): - _ = box_list.BoxList(data1) - with self.assertRaises(ValueError): - _ = box_list.BoxList(data2) - - def test_num_boxes_static(self): - box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]] - boxes = box_list.BoxList(tf.constant(box_corners)) - self.assertEquals(boxes.num_boxes_static(), 2) - self.assertEquals(type(boxes.num_boxes_static()), int) - - def test_num_boxes_static_for_uninferrable_shape(self): - placeholder = tf.placeholder(tf.float32, shape=[None, 4]) - boxes = box_list.BoxList(placeholder) - self.assertEquals(boxes.num_boxes_static(), None) - - def test_as_tensor_dict(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - tensor_dict = boxlist.as_tensor_dict() - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 1] - expected_scores = [0.75, 0.2] - - with self.test_session() as sess: - tensor_dict_out = sess.run(tensor_dict) - self.assertAllEqual(3, len(tensor_dict_out)) - self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) - self.assertAllEqual(expected_classes, tensor_dict_out['classes']) - self.assertAllClose(expected_scores, tensor_dict_out['scores']) - - def test_as_tensor_dict_with_features(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores']) - - expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]] - expected_classes = [0, 1] - expected_scores = [0.75, 0.2] - - with self.test_session() as sess: - tensor_dict_out = sess.run(tensor_dict) - self.assertAllEqual(3, len(tensor_dict_out)) - self.assertAllClose(expected_boxes, tensor_dict_out['boxes']) - self.assertAllEqual(expected_classes, tensor_dict_out['classes']) - self.assertAllClose(expected_scores, tensor_dict_out['scores']) - - def test_as_tensor_dict_missing_field(self): - boxlist = box_list.BoxList( - tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32)) - boxlist.add_field('classes', tf.constant([0, 1])) - boxlist.add_field('scores', tf.constant([0.75, 0.2])) - with self.assertRaises(ValueError): - boxlist.as_tensor_dict(['foo', 'bar']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/box_predictor.py b/object_detection/core/box_predictor.py deleted file mode 100644 index 8378a8ea..00000000 --- a/object_detection/core/box_predictor.py +++ /dev/null @@ -1,566 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Box predictor for object detectors. - -Box predictors are classes that take a high level -image feature map as input and produce two predictions, -(1) a tensor encoding box locations, and -(2) a tensor encoding classes for each box. - -These components are passed directly to loss functions -in our detection models. - -These modules are separated from the main model since the same -few box predictor architectures are shared across many models. -""" -from abc import abstractmethod -import tensorflow as tf -from object_detection.utils import ops -from object_detection.utils import shape_utils -from object_detection.utils import static_shape - -slim = tf.contrib.slim - -BOX_ENCODINGS = 'box_encodings' -CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background' -MASK_PREDICTIONS = 'mask_predictions' - - -class BoxPredictor(object): - """BoxPredictor.""" - - def __init__(self, is_training, num_classes): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - """ - self._is_training = is_training - self._num_classes = num_classes - - @property - def num_classes(self): - return self._num_classes - - def predict(self, image_features, num_predictions_per_location, scope, - **params): - """Computes encoded object locations and corresponding confidences. - - Takes a high level image feature map as input and produce two predictions, - (1) a tensor encoding box locations, and - (2) a tensor encoding class scores for each corresponding box. - In this interface, we only assume that two tensors are returned as output - and do not assume anything about their shapes. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - num_predictions_per_location: an integer representing the number of box - predictions to be made per spatial location in the feature map. - scope: Variable and Op scope name. - **params: Additional keyword arguments for specific implementations of - BoxPredictor. - - Returns: - A dictionary containing at least the following tensors. - box_encodings: A float tensor of shape - [batch_size, num_anchors, q, code_size] representing the location of - the objects, where q is 1 or the number of classes. - class_predictions_with_background: A float tensor of shape - [batch_size, num_anchors, num_classes + 1] representing the class - predictions for the proposals. - """ - with tf.variable_scope(scope): - return self._predict(image_features, num_predictions_per_location, - **params) - - # TODO: num_predictions_per_location could be moved to constructor. - # This is currently only used by ConvolutionalBoxPredictor. - @abstractmethod - def _predict(self, image_features, num_predictions_per_location, **params): - """Implementations must override this method. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - num_predictions_per_location: an integer representing the number of box - predictions to be made per spatial location in the feature map. - **params: Additional keyword arguments for specific implementations of - BoxPredictor. - - Returns: - A dictionary containing at least the following tensors. - box_encodings: A float tensor of shape - [batch_size, num_anchors, q, code_size] representing the location of - the objects, where q is 1 or the number of classes. - class_predictions_with_background: A float tensor of shape - [batch_size, num_anchors, num_classes + 1] representing the class - predictions for the proposals. - """ - pass - - -class RfcnBoxPredictor(BoxPredictor): - """RFCN Box Predictor. - - Applies a position sensitve ROI pooling on position sensitive feature maps to - predict classes and refined locations. See https://arxiv.org/abs/1605.06409 - for details. - - This is used for the second stage of the RFCN meta architecture. Notice that - locations are *not* shared across classes, thus for each anchor, a separate - prediction is made for each class. - """ - - def __init__(self, - is_training, - num_classes, - conv_hyperparams, - num_spatial_bins, - depth, - crop_size, - box_code_size): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - conv_hyperparams: Slim arg_scope with hyperparameters for conolutional - layers. - num_spatial_bins: A list of two integers `[spatial_bins_y, - spatial_bins_x]`. - depth: Target depth to reduce the input feature maps to. - crop_size: A list of two integers `[crop_height, crop_width]`. - box_code_size: Size of encoding for each box. - """ - super(RfcnBoxPredictor, self).__init__(is_training, num_classes) - self._conv_hyperparams = conv_hyperparams - self._num_spatial_bins = num_spatial_bins - self._depth = depth - self._crop_size = crop_size - self._box_code_size = box_code_size - - @property - def num_classes(self): - return self._num_classes - - def _predict(self, image_features, num_predictions_per_location, - proposal_boxes): - """Computes encoded object locations and corresponding confidences. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - num_predictions_per_location: an integer representing the number of box - predictions to be made per spatial location in the feature map. - Currently, this must be set to 1, or an error will be raised. - proposal_boxes: A float tensor of shape [batch_size, num_proposals, - box_code_size]. - - Returns: - box_encodings: A float tensor of shape - [batch_size, 1, num_classes, code_size] representing the - location of the objects. - class_predictions_with_background: A float tensor of shape - [batch_size, 1, num_classes + 1] representing the class - predictions for the proposals. - Raises: - ValueError: if num_predictions_per_location is not 1. - """ - if num_predictions_per_location != 1: - raise ValueError('Currently RfcnBoxPredictor only supports ' - 'predicting a single box per class per location.') - - batch_size = tf.shape(proposal_boxes)[0] - num_boxes = tf.shape(proposal_boxes)[1] - def get_box_indices(proposals): - proposals_shape = proposals.get_shape().as_list() - if any(dim is None for dim in proposals_shape): - proposals_shape = tf.shape(proposals) - ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) - multiplier = tf.expand_dims( - tf.range(start=0, limit=proposals_shape[0]), 1) - return tf.reshape(ones_mat * multiplier, [-1]) - - net = image_features - with slim.arg_scope(self._conv_hyperparams): - net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth') - # Location predictions. - location_feature_map_depth = (self._num_spatial_bins[0] * - self._num_spatial_bins[1] * - self.num_classes * - self._box_code_size) - location_feature_map = slim.conv2d(net, location_feature_map_depth, - [1, 1], activation_fn=None, - scope='refined_locations') - box_encodings = ops.position_sensitive_crop_regions( - location_feature_map, - boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]), - box_ind=get_box_indices(proposal_boxes), - crop_size=self._crop_size, - num_spatial_bins=self._num_spatial_bins, - global_pool=True) - box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2]) - box_encodings = tf.reshape(box_encodings, - [batch_size * num_boxes, 1, self.num_classes, - self._box_code_size]) - - # Class predictions. - total_classes = self.num_classes + 1 # Account for background class. - class_feature_map_depth = (self._num_spatial_bins[0] * - self._num_spatial_bins[1] * - total_classes) - class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1], - activation_fn=None, - scope='class_predictions') - class_predictions_with_background = ops.position_sensitive_crop_regions( - class_feature_map, - boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]), - box_ind=get_box_indices(proposal_boxes), - crop_size=self._crop_size, - num_spatial_bins=self._num_spatial_bins, - global_pool=True) - class_predictions_with_background = tf.squeeze( - class_predictions_with_background, squeeze_dims=[1, 2]) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - [batch_size * num_boxes, 1, total_classes]) - - return {BOX_ENCODINGS: box_encodings, - CLASS_PREDICTIONS_WITH_BACKGROUND: - class_predictions_with_background} - - -class MaskRCNNBoxPredictor(BoxPredictor): - """Mask R-CNN Box Predictor. - - See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017). - Mask R-CNN. arXiv preprint arXiv:1703.06870. - - This is used for the second stage of the Mask R-CNN detector where proposals - cropped from an image are arranged along the batch dimension of the input - image_features tensor. Notice that locations are *not* shared across classes, - thus for each anchor, a separate prediction is made for each class. - - In addition to predicting boxes and classes, optionally this class allows - predicting masks and/or keypoints inside detection boxes. - - Currently this box predictor makes per-class predictions; that is, each - anchor makes a separate box prediction for each class. - """ - - def __init__(self, - is_training, - num_classes, - fc_hyperparams, - use_dropout, - dropout_keep_prob, - box_code_size, - conv_hyperparams=None, - predict_instance_masks=False, - mask_height=14, - mask_width=14, - mask_prediction_conv_depth=256, - predict_keypoints=False): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - fc_hyperparams: Slim arg_scope with hyperparameters for fully - connected ops. - use_dropout: Option to use dropout or not. Note that a single dropout - op is applied here prior to both box and class predictions, which stands - in contrast to the ConvolutionalBoxPredictor below. - dropout_keep_prob: Keep probability for dropout. - This is only used if use_dropout is True. - box_code_size: Size of encoding for each box. - conv_hyperparams: Slim arg_scope with hyperparameters for convolution - ops. - predict_instance_masks: Whether to predict object masks inside detection - boxes. - mask_height: Desired output mask height. The default value is 14. - mask_width: Desired output mask width. The default value is 14. - mask_prediction_conv_depth: The depth for the first conv2d_transpose op - applied to the image_features in the mask prediciton branch. - predict_keypoints: Whether to predict keypoints insde detection boxes. - - - Raises: - ValueError: If predict_instance_masks or predict_keypoints is true. - """ - super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes) - self._fc_hyperparams = fc_hyperparams - self._use_dropout = use_dropout - self._box_code_size = box_code_size - self._dropout_keep_prob = dropout_keep_prob - self._conv_hyperparams = conv_hyperparams - self._predict_instance_masks = predict_instance_masks - self._mask_height = mask_height - self._mask_width = mask_width - self._mask_prediction_conv_depth = mask_prediction_conv_depth - self._predict_keypoints = predict_keypoints - if self._predict_keypoints: - raise ValueError('Keypoint prediction is unimplemented.') - if ((self._predict_instance_masks or self._predict_keypoints) and - self._conv_hyperparams is None): - raise ValueError('`conv_hyperparams` must be provided when predicting ' - 'masks.') - - @property - def num_classes(self): - return self._num_classes - - def _predict(self, image_features, num_predictions_per_location): - """Computes encoded object locations and corresponding confidences. - - Flattens image_features and applies fully connected ops (with no - non-linearity) to predict box encodings and class predictions. In this - setting, anchors are not spatially arranged in any way and are assumed to - have been folded into the batch dimension. Thus we output 1 for the - anchors dimension. - - Also optionally predicts instance masks. - The mask prediction head is based on the Mask RCNN paper with the following - modifications: We replace the deconvolution layer with a bilinear resize - and a convolution. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - num_predictions_per_location: an integer representing the number of box - predictions to be made per spatial location in the feature map. - Currently, this must be set to 1, or an error will be raised. - - Returns: - A dictionary containing the following tensors. - box_encodings: A float tensor of shape - [batch_size, 1, num_classes, code_size] representing the - location of the objects. - class_predictions_with_background: A float tensor of shape - [batch_size, 1, num_classes + 1] representing the class - predictions for the proposals. - If predict_masks is True the dictionary also contains: - instance_masks: A float tensor of shape - [batch_size, 1, num_classes, image_height, image_width] - If predict_keypoints is True the dictionary also contains: - keypoints: [batch_size, 1, num_keypoints, 2] - - Raises: - ValueError: if num_predictions_per_location is not 1. - """ - if num_predictions_per_location != 1: - raise ValueError('Currently FullyConnectedBoxPredictor only supports ' - 'predicting a single box per class per location.') - spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2], - keep_dims=True, - name='AvgPool') - flattened_image_features = slim.flatten(spatial_averaged_image_features) - if self._use_dropout: - flattened_image_features = slim.dropout(flattened_image_features, - keep_prob=self._dropout_keep_prob, - is_training=self._is_training) - with slim.arg_scope(self._fc_hyperparams): - box_encodings = slim.fully_connected( - flattened_image_features, - self._num_classes * self._box_code_size, - activation_fn=None, - scope='BoxEncodingPredictor') - class_predictions_with_background = slim.fully_connected( - flattened_image_features, - self._num_classes + 1, - activation_fn=None, - scope='ClassPredictor') - box_encodings = tf.reshape( - box_encodings, [-1, 1, self._num_classes, self._box_code_size]) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, [-1, 1, self._num_classes + 1]) - - predictions_dict = { - BOX_ENCODINGS: box_encodings, - CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background - } - - if self._predict_instance_masks: - with slim.arg_scope(self._conv_hyperparams): - upsampled_features = tf.image.resize_bilinear( - image_features, - [self._mask_height, self._mask_width], - align_corners=True) - upsampled_features = slim.conv2d( - upsampled_features, - num_outputs=self._mask_prediction_conv_depth, - kernel_size=[2, 2]) - mask_predictions = slim.conv2d(upsampled_features, - num_outputs=self.num_classes, - activation_fn=None, - kernel_size=[3, 3]) - instance_masks = tf.expand_dims(tf.transpose(mask_predictions, - perm=[0, 3, 1, 2]), - axis=1, - name='MaskPredictor') - predictions_dict[MASK_PREDICTIONS] = instance_masks - return predictions_dict - - -class ConvolutionalBoxPredictor(BoxPredictor): - """Convolutional Box Predictor. - - Optionally add an intermediate 1x1 convolutional layer after features and - predict in parallel branches box_encodings and - class_predictions_with_background. - - Currently this box predictor assumes that predictions are "shared" across - classes --- that is each anchor makes box predictions which do not depend - on class. - """ - - def __init__(self, - is_training, - num_classes, - conv_hyperparams, - min_depth, - max_depth, - num_layers_before_predictor, - use_dropout, - dropout_keep_prob, - kernel_size, - box_code_size, - apply_sigmoid_to_scores=False, - class_prediction_bias_init=0.0): - """Constructor. - - Args: - is_training: Indicates whether the BoxPredictor is in training mode. - num_classes: number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops. - min_depth: Minumum feature depth prior to predicting box encodings - and class predictions. - max_depth: Maximum feature depth prior to predicting box encodings - and class predictions. If max_depth is set to 0, no additional - feature map will be inserted before location and class predictions. - num_layers_before_predictor: Number of the additional conv layers before - the predictor. - use_dropout: Option to use dropout for class prediction or not. - dropout_keep_prob: Keep probability for dropout. - This is only used if use_dropout is True. - kernel_size: Size of final convolution kernel. If the - spatial resolution of the feature map is smaller than the kernel size, - then the kernel size is automatically set to be - min(feature_width, feature_height). - box_code_size: Size of encoding for each box. - apply_sigmoid_to_scores: if True, apply the sigmoid on the output - class_predictions. - class_prediction_bias_init: constant value to initialize bias of the last - conv2d layer before class prediction. - - Raises: - ValueError: if min_depth > max_depth. - """ - super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes) - if min_depth > max_depth: - raise ValueError('min_depth should be less than or equal to max_depth') - self._conv_hyperparams = conv_hyperparams - self._min_depth = min_depth - self._max_depth = max_depth - self._num_layers_before_predictor = num_layers_before_predictor - self._use_dropout = use_dropout - self._kernel_size = kernel_size - self._box_code_size = box_code_size - self._dropout_keep_prob = dropout_keep_prob - self._apply_sigmoid_to_scores = apply_sigmoid_to_scores - self._class_prediction_bias_init = class_prediction_bias_init - - def _predict(self, image_features, num_predictions_per_location): - """Computes encoded object locations and corresponding confidences. - - Args: - image_features: A float tensor of shape [batch_size, height, width, - channels] containing features for a batch of images. - num_predictions_per_location: an integer representing the number of box - predictions to be made per spatial location in the feature map. - - Returns: - A dictionary containing the following tensors. - box_encodings: A float tensor of shape [batch_size, num_anchors, 1, - code_size] representing the location of the objects, where - num_anchors = feat_height * feat_width * num_predictions_per_location - class_predictions_with_background: A float tensor of shape - [batch_size, num_anchors, num_classes + 1] representing the class - predictions for the proposals. - """ - # Add a slot for the background class. - num_class_slots = self.num_classes + 1 - net = image_features - with slim.arg_scope(self._conv_hyperparams), \ - slim.arg_scope([slim.dropout], is_training=self._is_training): - # Add additional conv layers before the class predictor. - features_depth = static_shape.get_depth(image_features.get_shape()) - depth = max(min(features_depth, self._max_depth), self._min_depth) - tf.logging.info('depth of additional conv before box predictor: {}'. - format(depth)) - if depth > 0 and self._num_layers_before_predictor > 0: - for i in range(self._num_layers_before_predictor): - net = slim.conv2d( - net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth)) - with slim.arg_scope([slim.conv2d], activation_fn=None, - normalizer_fn=None, normalizer_params=None): - box_encodings = slim.conv2d( - net, num_predictions_per_location * self._box_code_size, - [self._kernel_size, self._kernel_size], - scope='BoxEncodingPredictor') - if self._use_dropout: - net = slim.dropout(net, keep_prob=self._dropout_keep_prob) - class_predictions_with_background = slim.conv2d( - net, num_predictions_per_location * num_class_slots, - [self._kernel_size, self._kernel_size], scope='ClassPredictor', - biases_initializer=tf.constant_initializer( - self._class_prediction_bias_init)) - if self._apply_sigmoid_to_scores: - class_predictions_with_background = tf.sigmoid( - class_predictions_with_background) - - combined_feature_map_shape = shape_utils.combined_static_and_dynamic_shape( - image_features) - box_encodings = tf.reshape( - box_encodings, tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - 1, self._box_code_size])) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - tf.stack([combined_feature_map_shape[0], - combined_feature_map_shape[1] * - combined_feature_map_shape[2] * - num_predictions_per_location, - num_class_slots])) - return {BOX_ENCODINGS: box_encodings, - CLASS_PREDICTIONS_WITH_BACKGROUND: - class_predictions_with_background} diff --git a/object_detection/core/box_predictor_test.py b/object_detection/core/box_predictor_test.py deleted file mode 100644 index e5e5a3c9..00000000 --- a/object_detection/core/box_predictor_test.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.box_predictor.""" - -import numpy as np -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.builders import hyperparams_builder -from object_detection.core import box_predictor -from object_detection.protos import hyperparams_pb2 - - -class MaskRCNNBoxPredictorTest(tf.test.TestCase): - - def _build_arg_scope_with_hyperparams(self, - op_type=hyperparams_pb2.Hyperparams.FC): - hyperparams = hyperparams_pb2.Hyperparams() - hyperparams_text_proto = """ - activation: NONE - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(hyperparams_text_proto, hyperparams) - hyperparams.op = op_type - return hyperparams_builder.build(hyperparams, is_training=True) - - def test_get_boxes_with_five_classes(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - ) - box_predictions = mask_box_predictor.predict( - image_features, num_predictions_per_location=1, scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - class_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - class_predictions_with_background_shape) = sess.run( - [tf.shape(box_encodings), - tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4]) - self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6]) - - def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self): - with self.assertRaises(ValueError): - box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - predict_instance_masks=True) - - def test_get_instance_masks(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - conv_hyperparams=self._build_arg_scope_with_hyperparams( - op_type=hyperparams_pb2.Hyperparams.CONV), - predict_instance_masks=True) - box_predictions = mask_box_predictor.predict( - image_features, num_predictions_per_location=1, scope='BoxPredictor') - mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS] - self.assertListEqual([2, 1, 5, 14, 14], - mask_predictions.get_shape().as_list()) - - def test_do_not_return_instance_masks_and_keypoints_without_request(self): - image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32) - mask_box_predictor = box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4) - box_predictions = mask_box_predictor.predict( - image_features, num_predictions_per_location=1, scope='BoxPredictor') - self.assertEqual(len(box_predictions), 2) - self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions) - self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND - in box_predictions) - - def test_value_error_on_predict_keypoints(self): - with self.assertRaises(ValueError): - box_predictor.MaskRCNNBoxPredictor( - is_training=False, - num_classes=5, - fc_hyperparams=self._build_arg_scope_with_hyperparams(), - use_dropout=False, - dropout_keep_prob=0.5, - box_code_size=4, - predict_keypoints=True) - - -class RfcnBoxPredictorTest(tf.test.TestCase): - - def _build_arg_scope_with_conv_hyperparams(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def test_get_correct_box_encoding_and_class_prediction_shapes(self): - image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) - proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32) - rfcn_box_predictor = box_predictor.RfcnBoxPredictor( - is_training=False, - num_classes=2, - conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(), - num_spatial_bins=[3, 3], - depth=4, - crop_size=[12, 12], - box_code_size=4 - ) - box_predictions = rfcn_box_predictor.predict( - image_features, num_predictions_per_location=1, scope='BoxPredictor', - proposal_boxes=proposal_boxes) - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - class_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - class_predictions_shape) = sess.run( - [tf.shape(box_encodings), - tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4]) - self.assertAllEqual(class_predictions_shape, [8, 1, 3]) - - -class ConvolutionalBoxPredictorTest(tf.test.TestCase): - - def _build_arg_scope_with_conv_hyperparams(self): - conv_hyperparams = hyperparams_pb2.Hyperparams() - conv_hyperparams_text_proto = """ - activation: RELU_6 - regularizer { - l2_regularizer { - } - } - initializer { - truncated_normal_initializer { - } - } - """ - text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) - return hyperparams_builder.build(conv_hyperparams, is_training=True) - - def test_get_boxes_for_five_aspect_ratios_per_location(self): - image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - image_features, num_predictions_per_location=5, scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - objectness_predictions = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)]) - self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4]) - self.assertAllEqual(objectness_predictions_shape, [4, 320, 1]) - - def test_get_boxes_for_one_aspect_ratio_per_location(self): - image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - image_features, num_predictions_per_location=1, scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - objectness_predictions = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)]) - self.assertAllEqual(box_encodings_shape, [4, 64, 1, 4]) - self.assertAllEqual(objectness_predictions_shape, [4, 64, 1]) - - def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( - self): - num_classes_without_background = 6 - image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=num_classes_without_background, - conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - image_features, - num_predictions_per_location=5, - scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - class_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, class_predictions_with_background_shape - ) = sess.run([ - tf.shape(box_encodings), tf.shape(class_predictions_with_background)]) - self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4]) - self.assertAllEqual(class_predictions_with_background_shape, - [4, 320, num_classes_without_background+1]) - - def test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional( - self): - image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) - conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( - is_training=False, - num_classes=0, - conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(), - min_depth=0, - max_depth=32, - num_layers_before_predictor=1, - use_dropout=True, - dropout_keep_prob=0.8, - kernel_size=1, - box_code_size=4 - ) - box_predictions = conv_box_predictor.predict( - image_features, num_predictions_per_location=5, scope='BoxPredictor') - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - objectness_predictions = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - init_op = tf.global_variables_initializer() - - resolution = 32 - expected_num_anchors = resolution*resolution*5 - with self.test_session() as sess: - sess.run(init_op) - (box_encodings_shape, - objectness_predictions_shape) = sess.run( - [tf.shape(box_encodings), tf.shape(objectness_predictions)], - feed_dict={image_features: - np.random.rand(4, resolution, resolution, 64)}) - self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) - self.assertAllEqual(objectness_predictions_shape, - [4, expected_num_anchors, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/data_decoder.py b/object_detection/core/data_decoder.py deleted file mode 100644 index 9ae18c1f..00000000 --- a/object_detection/core/data_decoder.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Interface for data decoders. - -Data decoders decode the input data and return a dictionary of tensors keyed by -the entries in core.reader.Fields. -""" -from abc import ABCMeta -from abc import abstractmethod - - -class DataDecoder(object): - """Interface for data decoders.""" - __metaclass__ = ABCMeta - - @abstractmethod - def decode(self, data): - """Return a single image and associated labels. - - Args: - data: a string tensor holding a serialized protocol buffer corresponding - to data for a single image. - - Returns: - tensor_dict: a dictionary containing tensors. Possible keys are defined in - reader.Fields. - """ - pass diff --git a/object_detection/core/data_parser.py b/object_detection/core/data_parser.py deleted file mode 100644 index 3dac4de2..00000000 --- a/object_detection/core/data_parser.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Interface for data parsers. - -Data parser parses input data and returns a dictionary of numpy arrays -keyed by the entries in standard_fields.py. Since the parser parses records -to numpy arrays (materialized tensors) directly, it is used to read data for -evaluation/visualization; to parse the data during training, DataDecoder should -be used. -""" -from abc import ABCMeta -from abc import abstractmethod - - -class DataToNumpyParser(object): - __metaclass__ = ABCMeta - - @abstractmethod - def parse(self, input_data): - """Parses input and returns a numpy array or a dictionary of numpy arrays. - - Args: - input_data: an input data - - Returns: - A numpy array or a dictionary of numpy arrays or None, if input - cannot be parsed. - """ - pass diff --git a/object_detection/core/keypoint_ops.py b/object_detection/core/keypoint_ops.py deleted file mode 100644 index e520845f..00000000 --- a/object_detection/core/keypoint_ops.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Keypoint operations. - -Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2], -where the last dimension holds rank 2 tensors of the form [y, x] representing -the coordinates of the keypoint. -""" -import numpy as np -import tensorflow as tf - - -def scale(keypoints, y_scale, x_scale, scope=None): - """Scales keypoint coordinates in x and y dimensions. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - y_scale: (float) scalar tensor - x_scale: (float) scalar tensor - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'Scale'): - y_scale = tf.cast(y_scale, tf.float32) - x_scale = tf.cast(x_scale, tf.float32) - new_keypoints = keypoints * [[[y_scale, x_scale]]] - return new_keypoints - - -def clip_to_window(keypoints, window, scope=None): - """Clips keypoints to a window. - - This op clips any input keypoints to a window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window to which the op should clip the keypoints. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'ClipToWindow'): - y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - y = tf.maximum(tf.minimum(y, win_y_max), win_y_min) - x = tf.maximum(tf.minimum(x, win_x_max), win_x_min) - new_keypoints = tf.concat([y, x], 2) - return new_keypoints - - -def prune_outside_window(keypoints, window, scope=None): - """Prunes keypoints that fall outside a given window. - - This function replaces keypoints that fall outside the given window with nan. - See also clip_to_window which clips any keypoints that fall outside the given - window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window outside of which the op should prune the keypoints. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'PruneOutsideWindow'): - y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) - - valid_indices = tf.logical_and( - tf.logical_and(y >= win_y_min, y <= win_y_max), - tf.logical_and(x >= win_x_min, x <= win_x_max)) - - new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y)) - new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x)) - new_keypoints = tf.concat([new_y, new_x], 2) - - return new_keypoints - - -def change_coordinate_frame(keypoints, window, scope=None): - """Changes coordinate frame of the keypoints to be relative to window's frame. - - Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint - coordinates from keypoints of shape [num_instances, num_keypoints, 2] - to be relative to this window. - - An example use case is data augmentation: where we are given groundtruth - keypoints and would like to randomly crop the image to some window. In this - case we need to change the coordinate frame of each groundtruth keypoint to be - relative to this new window. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] - window we should change the coordinate frame to. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'ChangeCoordinateFrame'): - win_height = window[2] - window[0] - win_width = window[3] - window[1] - new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height, - 1.0 / win_width) - return new_keypoints - - -def to_normalized_coordinates(keypoints, height, width, - check_range=True, scope=None): - """Converts absolute keypoint coordinates to normalized coordinates in [0, 1]. - - Usually one uses the dynamic shape of the image or conv-layer tensor: - keypoints = keypoint_ops.to_normalized_coordinates(keypoints, - tf.shape(images)[1], - tf.shape(images)[2]), - - This function raises an assertion failed error at graph execution time when - the maximum coordinate is smaller than 1.01 (which means that coordinates are - already normalized). The value 1.01 is to deal with small rounding errors. - - Args: - keypoints: A tensor of shape [num_instances, num_keypoints, 2]. - height: Maximum value for y coordinate of absolute keypoint coordinates. - width: Maximum value for x coordinate of absolute keypoint coordinates. - check_range: If True, checks if the coordinates are normalized. - scope: name scope. - - Returns: - tensor of shape [num_instances, num_keypoints, 2] with normalized - coordinates in [0, 1]. - """ - with tf.name_scope(scope, 'ToNormalizedCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - if check_range: - max_val = tf.reduce_max(keypoints) - max_assert = tf.Assert(tf.greater(max_val, 1.01), - ['max value is lower than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(keypoints, 1.0 / height, 1.0 / width) - - -def to_absolute_coordinates(keypoints, height, width, - check_range=True, scope=None): - """Converts normalized keypoint coordinates to absolute pixel coordinates. - - This function raises an assertion failed error when the maximum keypoint - coordinate value is larger than 1.01 (in which case coordinates are already - absolute). - - Args: - keypoints: A tensor of shape [num_instances, num_keypoints, 2] - height: Maximum value for y coordinate of absolute keypoint coordinates. - width: Maximum value for x coordinate of absolute keypoint coordinates. - check_range: If True, checks if the coordinates are normalized or not. - scope: name scope. - - Returns: - tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates - in terms of the image size. - - """ - with tf.name_scope(scope, 'ToAbsoluteCoordinates'): - height = tf.cast(height, tf.float32) - width = tf.cast(width, tf.float32) - - # Ensure range of input keypoints is correct. - if check_range: - max_val = tf.reduce_max(keypoints) - max_assert = tf.Assert(tf.greater_equal(1.01, max_val), - ['maximum keypoint coordinate value is larger ' - 'than 1.01: ', max_val]) - with tf.control_dependencies([max_assert]): - width = tf.identity(width) - - return scale(keypoints, height, width) - - -def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None): - """Flips the keypoints horizontally around the flip_point. - - This operation flips the x coordinate for each keypoint around the flip_point - and also permutes the keypoints in a manner specified by flip_permutation. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - flip_point: (float) scalar tensor representing the x coordinate to flip the - keypoints around. - flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. This specifies the mapping from original keypoint indices - to the flipped keypoint indices. This is used primarily for keypoints - that are not reflection invariant. E.g. Suppose there are 3 keypoints - representing ['head', 'right_eye', 'left_eye'], then a logical choice for - flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' - and 'right_eye' after a horizontal flip. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'FlipHorizontal'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - keypoints = tf.gather(keypoints, flip_permutation) - v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - u = flip_point * 2.0 - u - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints - - -def flip_vertical(keypoints, flip_point, flip_permutation, scope=None): - """Flips the keypoints vertically around the flip_point. - - This operation flips the y coordinate for each keypoint around the flip_point - and also permutes the keypoints in a manner specified by flip_permutation. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - flip_point: (float) scalar tensor representing the y coordinate to flip the - keypoints around. - flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. This specifies the mapping from original keypoint indices - to the flipped keypoint indices. This is used primarily for keypoints - that are not reflection invariant. E.g. Suppose there are 3 keypoints - representing ['head', 'right_eye', 'left_eye'], then a logical choice for - flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' - and 'right_eye' after a horizontal flip. - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'FlipVertical'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - keypoints = tf.gather(keypoints, flip_permutation) - v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2) - v = flip_point * 2.0 - v - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints - - -def rot90(keypoints, scope=None): - """Rotates the keypoints counter-clockwise by 90 degrees. - - Args: - keypoints: a tensor of shape [num_instances, num_keypoints, 2] - scope: name scope. - - Returns: - new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] - """ - with tf.name_scope(scope, 'Rot90'): - keypoints = tf.transpose(keypoints, [1, 0, 2]) - v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2) - v = 1.0 - v - new_keypoints = tf.concat([v, u], 2) - new_keypoints = tf.transpose(new_keypoints, [1, 0, 2]) - return new_keypoints diff --git a/object_detection/core/keypoint_ops_test.py b/object_detection/core/keypoint_ops_test.py deleted file mode 100644 index 1c09c55a..00000000 --- a/object_detection/core/keypoint_ops_test.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.keypoint_ops.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import keypoint_ops - - -class KeypointOpsTest(tf.test.TestCase): - """Tests for common keypoint operations.""" - - def test_scale(self): - keypoints = tf.constant([ - [[0.0, 0.0], [100.0, 200.0]], - [[50.0, 120.0], [100.0, 140.0]] - ]) - y_scale = tf.constant(1.0 / 100) - x_scale = tf.constant(1.0 / 200) - - expected_keypoints = tf.constant([ - [[0., 0.], [1.0, 1.0]], - [[0.5, 0.6], [1.0, 0.7]] - ]) - output = keypoint_ops.scale(keypoints, y_scale, x_scale) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_clip_to_window(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.25], [0.75, 0.75]] - ]) - output = keypoint_ops.clip_to_window(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_prune_outside_window(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]], - [[np.nan, np.nan], [np.nan, np.nan]]]) - output = keypoint_ops.prune_outside_window(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_change_coordinate_frame(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - window = tf.constant([0.25, 0.25, 0.75, 0.75]) - - expected_keypoints = tf.constant([ - [[0, 0.5], [1.0, 1.0]], - [[0.5, -0.5], [1.5, 1.5]] - ]) - output = keypoint_ops.change_coordinate_frame(keypoints, window) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_normalized_coordinates(self): - keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - output = keypoint_ops.to_normalized_coordinates( - keypoints, 40, 60) - expected_keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_normalized_coordinates_already_normalized(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - output = keypoint_ops.to_normalized_coordinates( - keypoints, 40, 60) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(output) - - def test_to_absolute_coordinates(self): - keypoints = tf.constant([ - [[0.25, 0.5], [0.75, 0.75]], - [[0.5, 0.0], [1.0, 1.0]] - ]) - output = keypoint_ops.to_absolute_coordinates( - keypoints, 40, 60) - expected_keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_to_absolute_coordinates_already_absolute(self): - keypoints = tf.constant([ - [[10., 30.], [30., 45.]], - [[20., 0.], [40., 60.]] - ]) - output = keypoint_ops.to_absolute_coordinates( - keypoints, 40, 60) - - with self.test_session() as sess: - with self.assertRaisesOpError('assertion failed'): - sess.run(output) - - def test_flip_horizontal(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] - ]) - flip_permutation = [0, 2, 1] - - expected_keypoints = tf.constant([ - [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]], - [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]], - ]) - output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_flip_vertical(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]] - ]) - flip_permutation = [0, 2, 1] - - expected_keypoints = tf.constant([ - [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]], - [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]], - ]) - output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - def test_rot90(self): - keypoints = tf.constant([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]] - ]) - expected_keypoints = tf.constant([ - [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]], - [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]], - ]) - output = keypoint_ops.rot90(keypoints) - - with self.test_session() as sess: - output_, expected_keypoints_ = sess.run([output, expected_keypoints]) - self.assertAllClose(output_, expected_keypoints_) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/losses.py b/object_detection/core/losses.py deleted file mode 100644 index b8478c15..00000000 --- a/object_detection/core/losses.py +++ /dev/null @@ -1,621 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Classification and regression loss functions for object detection. - -Localization losses: - * WeightedL2LocalizationLoss - * WeightedSmoothL1LocalizationLoss - * WeightedIOULocalizationLoss - -Classification losses: - * WeightedSigmoidClassificationLoss - * WeightedSoftmaxClassificationLoss - * BootstrappedSigmoidClassificationLoss -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.utils import ops - -slim = tf.contrib.slim - - -class Loss(object): - """Abstract base class for loss functions.""" - __metaclass__ = ABCMeta - - def __call__(self, - prediction_tensor, - target_tensor, - ignore_nan_targets=False, - scope=None, - **params): - """Call the loss function. - - Args: - prediction_tensor: a tensor representing predicted quantities. - target_tensor: a tensor representing regression or classification targets. - ignore_nan_targets: whether to ignore nan targets in the loss computation. - E.g. can be used if the target tensor is missing groundtruth data that - shouldn't be factored into the loss. - scope: Op scope name. Defaults to 'Loss' if None. - **params: Additional keyword arguments for specific implementations of - the Loss. - - Returns: - loss: a tensor representing the value of the loss function. - """ - with tf.name_scope(scope, 'Loss', - [prediction_tensor, target_tensor, params]) as scope: - if ignore_nan_targets: - target_tensor = tf.where(tf.is_nan(target_tensor), - prediction_tensor, - target_tensor) - return self._compute_loss(prediction_tensor, target_tensor, **params) - - @abstractmethod - def _compute_loss(self, prediction_tensor, target_tensor, **params): - """Method to be overridden by implementations. - - Args: - prediction_tensor: a tensor representing predicted quantities - target_tensor: a tensor representing regression or classification targets - **params: Additional keyword arguments for specific implementations of - the Loss. - - Returns: - loss: a tensor representing the value of the loss function - """ - pass - - -class WeightedL2LocalizationLoss(Loss): - """L2 localization loss function with anchorwise output support. - - Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2 - """ - - def __init__(self, anchorwise_output=False): - """Constructor. - - Args: - anchorwise_output: Outputs loss per anchor. (default False) - - """ - self._anchorwise_output = anchorwise_output - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the (encoded) predicted locations of objects. - target_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the regression targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a (scalar) tensor representing the value of the loss function - or a float tensor of shape [batch_size, num_anchors] - """ - weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims( - weights, 2) - square_diff = 0.5 * tf.square(weighted_diff) - if self._anchorwise_output: - return tf.reduce_sum(square_diff, 2) - return tf.reduce_sum(square_diff) - - -class WeightedSmoothL1LocalizationLoss(Loss): - """Smooth L1 localization loss function. - - The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5 - otherwise, where x is the difference between predictions and target. - - See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015) - """ - - def __init__(self, anchorwise_output=False): - """Constructor. - - Args: - anchorwise_output: Outputs loss per anchor. (default False) - - """ - self._anchorwise_output = anchorwise_output - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the (encoded) predicted locations of objects. - target_tensor: A float tensor of shape [batch_size, num_anchors, - code_size] representing the regression targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a (scalar) tensor representing the value of the loss function - """ - diff = prediction_tensor - target_tensor - abs_diff = tf.abs(diff) - abs_diff_lt_1 = tf.less(abs_diff, 1) - anchorwise_smooth_l1norm = tf.reduce_sum( - tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5), - 2) * weights - if self._anchorwise_output: - return anchorwise_smooth_l1norm - return tf.reduce_sum(anchorwise_smooth_l1norm) - - -class WeightedIOULocalizationLoss(Loss): - """IOU localization loss function. - - Sums the IOU for corresponding pairs of predicted/groundtruth boxes - and for each pair assign a loss of 1 - IOU. We then compute a weighted - sum over all pairs which is returned as the total loss. - """ - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4] - representing the decoded predicted boxes - target_tensor: A float tensor of shape [batch_size, num_anchors, 4] - representing the decoded target boxes - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a (scalar) tensor representing the value of the loss function - """ - predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4])) - target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4])) - per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes, - target_boxes) - return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss) - - -class WeightedSigmoidClassificationLoss(Loss): - """Sigmoid cross entropy classification loss function.""" - - def __init__(self, anchorwise_output=False): - """Constructor. - - Args: - anchorwise_output: Outputs loss per anchor. (default False) - - """ - self._anchorwise_output = anchorwise_output - - def _compute_loss(self, - prediction_tensor, - target_tensor, - weights, - class_indices=None): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - class_indices: (Optional) A 1-D integer tensor of class indices. - If provided, computes loss only for the specified class indices. - - Returns: - loss: a (scalar) tensor representing the value of the loss function - or a float tensor of shape [batch_size, num_anchors] - """ - weights = tf.expand_dims(weights, 2) - if class_indices is not None: - weights *= tf.reshape( - ops.indices_to_dense_vector(class_indices, - tf.shape(prediction_tensor)[2]), - [1, 1, -1]) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=target_tensor, logits=prediction_tensor)) - if self._anchorwise_output: - return tf.reduce_sum(per_entry_cross_ent * weights, 2) - return tf.reduce_sum(per_entry_cross_ent * weights) - - -class SigmoidFocalClassificationLoss(Loss): - """Sigmoid focal cross entropy loss. - - Focal loss down-weights well classified examples and focusses on the hard - examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition. - """ - - def __init__(self, anchorwise_output=False, gamma=2.0, alpha=0.25): - """Constructor. - - Args: - anchorwise_output: Outputs loss per anchor. (default False) - gamma: exponent of the modulating factor (1 - p_t) ^ gamma. - alpha: optional alpha weighting factor to balance positives vs negatives. - """ - self._anchorwise_output = anchorwise_output - self._alpha = alpha - self._gamma = gamma - - def _compute_loss(self, - prediction_tensor, - target_tensor, - weights, - class_indices=None): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - class_indices: (Optional) A 1-D integer tensor of class indices. - If provided, computes loss only for the specified class indices. - - Returns: - loss: a (scalar) tensor representing the value of the loss function - or a float tensor of shape [batch_size, num_anchors] - """ - weights = tf.expand_dims(weights, 2) - if class_indices is not None: - weights *= tf.reshape( - ops.indices_to_dense_vector(class_indices, - tf.shape(prediction_tensor)[2]), - [1, 1, -1]) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=target_tensor, logits=prediction_tensor)) - prediction_probabilities = tf.sigmoid(prediction_tensor) - p_t = ((target_tensor * prediction_probabilities) + - ((1 - target_tensor) * (1 - prediction_probabilities))) - modulating_factor = 1.0 - if self._gamma: - modulating_factor = tf.pow(1.0 - p_t, self._gamma) - alpha_weight_factor = 1.0 - if self._alpha is not None: - alpha_weight_factor = (target_tensor * self._alpha + - (1 - target_tensor) * (1 - self._alpha)) - focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor * - per_entry_cross_ent) - if self._anchorwise_output: - return tf.reduce_sum(focal_cross_entropy_loss * weights, 2) - return tf.reduce_sum(focal_cross_entropy_loss * weights) - - -class WeightedSoftmaxClassificationLoss(Loss): - """Softmax loss function.""" - - def __init__(self, anchorwise_output=False, logit_scale=1.0): - """Constructor. - - Args: - anchorwise_output: Whether to output loss per anchor (default False) - logit_scale: When this value is high, the prediction is "diffused" and - when this value is low, the prediction is made peakier. - (default 1.0) - - """ - self._anchorwise_output = anchorwise_output - self._logit_scale = logit_scale - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a (scalar) tensor representing the value of the loss function - """ - num_classes = prediction_tensor.get_shape().as_list()[-1] - prediction_tensor = tf.divide( - prediction_tensor, self._logit_scale, name='scale_logit') - per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( - labels=tf.reshape(target_tensor, [-1, num_classes]), - logits=tf.reshape(prediction_tensor, [-1, num_classes]))) - if self._anchorwise_output: - return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights - return tf.reduce_sum(per_row_cross_ent * tf.reshape(weights, [-1])) - - -class BootstrappedSigmoidClassificationLoss(Loss): - """Bootstrapped sigmoid cross entropy classification loss function. - - This loss uses a convex combination of training labels and the current model's - predictions as training targets in the classification loss. The idea is that - as the model improves over time, its predictions can be trusted more and we - can use these predictions to mitigate the damage of noisy/incorrect labels, - because incorrect labels are likely to be eventually highly inconsistent with - other stimuli predicted to have the same label by the model. - - In "soft" bootstrapping, we use all predicted class probabilities, whereas in - "hard" bootstrapping, we use the single class favored by the model. - - See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by - Reed et al. (ICLR 2015). - """ - - def __init__(self, alpha, bootstrap_type='soft', anchorwise_output=False): - """Constructor. - - Args: - alpha: a float32 scalar tensor between 0 and 1 representing interpolation - weight - bootstrap_type: set to either 'hard' or 'soft' (default) - anchorwise_output: Outputs loss per anchor. (default False) - - Raises: - ValueError: if bootstrap_type is not either 'hard' or 'soft' - """ - if bootstrap_type != 'hard' and bootstrap_type != 'soft': - raise ValueError('Unrecognized bootstrap_type: must be one of ' - '\'hard\' or \'soft.\'') - self._alpha = alpha - self._bootstrap_type = bootstrap_type - self._anchorwise_output = anchorwise_output - - def _compute_loss(self, prediction_tensor, target_tensor, weights): - """Compute loss function. - - Args: - prediction_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing the predicted logits for each class - target_tensor: A float tensor of shape [batch_size, num_anchors, - num_classes] representing one-hot encoded classification targets - weights: a float tensor of shape [batch_size, num_anchors] - - Returns: - loss: a (scalar) tensor representing the value of the loss function - or a float tensor of shape [batch_size, num_anchors] - """ - if self._bootstrap_type == 'soft': - bootstrap_target_tensor = self._alpha * target_tensor + ( - 1.0 - self._alpha) * tf.sigmoid(prediction_tensor) - else: - bootstrap_target_tensor = self._alpha * target_tensor + ( - 1.0 - self._alpha) * tf.cast( - tf.sigmoid(prediction_tensor) > 0.5, tf.float32) - per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( - labels=bootstrap_target_tensor, logits=prediction_tensor)) - if self._anchorwise_output: - return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2), 2) - return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2)) - - -class HardExampleMiner(object): - """Hard example mining for regions in a list of images. - - Implements hard example mining to select a subset of regions to be - back-propagated. For each image, selects the regions with highest losses, - subject to the condition that a newly selected region cannot have - an IOU > iou_threshold with any of the previously selected regions. - This can be achieved by re-using a greedy non-maximum suppression algorithm. - A constraint on the number of negatives mined per positive region can also be - enforced. - - Reference papers: "Training Region-based Object Detectors with Online - Hard Example Mining" (CVPR 2016) by Srivastava et al., and - "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al. - """ - - def __init__(self, - num_hard_examples=64, - iou_threshold=0.7, - loss_type='both', - cls_loss_weight=0.05, - loc_loss_weight=0.06, - max_negatives_per_positive=None, - min_negatives_per_image=0): - """Constructor. - - The hard example mining implemented by this class can replicate the behavior - in the two aforementioned papers (Srivastava et al., and Liu et al). - To replicate the A2 paper (Srivastava et al), num_hard_examples is set - to a fixed parameter (64 by default) and iou_threshold is set to .7 for - running non-max-suppression the predicted boxes prior to hard mining. - In order to replicate the SSD paper (Liu et al), num_hard_examples should - be set to None, max_negatives_per_positive should be 3 and iou_threshold - should be 1.0 (in order to effectively turn off NMS). - - Args: - num_hard_examples: maximum number of hard examples to be - selected per image (prior to enforcing max negative to positive ratio - constraint). If set to None, all examples obtained after NMS are - considered. - iou_threshold: minimum intersection over union for an example - to be discarded during NMS. - loss_type: use only classification losses ('cls', default), - localization losses ('loc') or both losses ('both'). - In the last case, cls_loss_weight and loc_loss_weight are used to - compute weighted sum of the two losses. - cls_loss_weight: weight for classification loss. - loc_loss_weight: weight for location loss. - max_negatives_per_positive: maximum number of negatives to retain for - each positive anchor. By default, num_negatives_per_positive is None, - which means that we do not enforce a prespecified negative:positive - ratio. Note also that num_negatives_per_positives can be a float - (and will be converted to be a float even if it is passed in otherwise). - min_negatives_per_image: minimum number of negative anchors to sample for - a given image. Setting this to a positive number allows sampling - negatives in an image without any positive anchors and thus not biased - towards at least one detection per image. - """ - self._num_hard_examples = num_hard_examples - self._iou_threshold = iou_threshold - self._loss_type = loss_type - self._cls_loss_weight = cls_loss_weight - self._loc_loss_weight = loc_loss_weight - self._max_negatives_per_positive = max_negatives_per_positive - self._min_negatives_per_image = min_negatives_per_image - if self._max_negatives_per_positive is not None: - self._max_negatives_per_positive = float(self._max_negatives_per_positive) - self._num_positives_list = None - self._num_negatives_list = None - - def __call__(self, - location_losses, - cls_losses, - decoded_boxlist_list, - match_list=None): - """Computes localization and classification losses after hard mining. - - Args: - location_losses: a float tensor of shape [num_images, num_anchors] - representing anchorwise localization losses. - cls_losses: a float tensor of shape [num_images, num_anchors] - representing anchorwise classification losses. - decoded_boxlist_list: a list of decoded BoxList representing location - predictions for each image. - match_list: an optional list of matcher.Match objects encoding the match - between anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. Match objects in match_list are - used to reference which anchors are positive, negative or ignored. If - self._max_negatives_per_positive exists, these are then used to enforce - a prespecified negative to positive ratio. - - Returns: - mined_location_loss: a float scalar with sum of localization losses from - selected hard examples. - mined_cls_loss: a float scalar with sum of classification losses from - selected hard examples. - Raises: - ValueError: if location_losses, cls_losses and decoded_boxlist_list do - not have compatible shapes (i.e., they must correspond to the same - number of images). - ValueError: if match_list is specified but its length does not match - len(decoded_boxlist_list). - """ - mined_location_losses = [] - mined_cls_losses = [] - location_losses = tf.unstack(location_losses) - cls_losses = tf.unstack(cls_losses) - num_images = len(decoded_boxlist_list) - if not match_list: - match_list = num_images * [None] - if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses): - raise ValueError('location_losses, cls_losses and decoded_boxlist_list ' - 'do not have compatible shapes.') - if not isinstance(match_list, list): - raise ValueError('match_list must be a list.') - if len(match_list) != len(decoded_boxlist_list): - raise ValueError('match_list must either be None or have ' - 'length=len(decoded_boxlist_list).') - num_positives_list = [] - num_negatives_list = [] - for ind, detection_boxlist in enumerate(decoded_boxlist_list): - box_locations = detection_boxlist.get() - match = match_list[ind] - image_losses = cls_losses[ind] - if self._loss_type == 'loc': - image_losses = location_losses[ind] - elif self._loss_type == 'both': - image_losses *= self._cls_loss_weight - image_losses += location_losses[ind] * self._loc_loss_weight - if self._num_hard_examples is not None: - num_hard_examples = self._num_hard_examples - else: - num_hard_examples = detection_boxlist.num_boxes() - selected_indices = tf.image.non_max_suppression( - box_locations, image_losses, num_hard_examples, self._iou_threshold) - if self._max_negatives_per_positive is not None and match: - (selected_indices, num_positives, - num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio( - selected_indices, match, self._max_negatives_per_positive, - self._min_negatives_per_image) - num_positives_list.append(num_positives) - num_negatives_list.append(num_negatives) - mined_location_losses.append( - tf.reduce_sum(tf.gather(location_losses[ind], selected_indices))) - mined_cls_losses.append( - tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices))) - location_loss = tf.reduce_sum(tf.stack(mined_location_losses)) - cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses)) - if match and self._max_negatives_per_positive: - self._num_positives_list = num_positives_list - self._num_negatives_list = num_negatives_list - return (location_loss, cls_loss) - - def summarize(self): - """Summarize the number of positives and negatives after mining.""" - if self._num_positives_list and self._num_negatives_list: - avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list)) - avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list)) - tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives) - tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives) - - def _subsample_selection_to_desired_neg_pos_ratio(self, - indices, - match, - max_negatives_per_positive, - min_negatives_per_image=0): - """Subsample a collection of selected indices to a desired neg:pos ratio. - - This function takes a subset of M indices (indexing into a large anchor - collection of N anchors where M=0, - meaning that column i is matched with row match_results[i]. - (2) match_results[i]=-1, meaning that column i is not matched. - (3) match_results[i]=-2, meaning that column i is ignored. - - Raises: - ValueError: if match_results does not have rank 1 or is not an - integer int32 scalar tensor - """ - if match_results.shape.ndims != 1: - raise ValueError('match_results should have rank 1') - if match_results.dtype != tf.int32: - raise ValueError('match_results should be an int32 or int64 scalar ' - 'tensor') - self._match_results = match_results - - @property - def match_results(self): - """The accessor for match results. - - Returns: - the tensor which encodes the match results. - """ - return self._match_results - - def matched_column_indices(self): - """Returns column indices that match to some row. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1))) - - def matched_column_indicator(self): - """Returns column indices that are matched. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return tf.greater_equal(self._match_results, 0) - - def num_matched_columns(self): - """Returns number (int32 scalar tensor) of matched columns.""" - return tf.size(self.matched_column_indices()) - - def unmatched_column_indices(self): - """Returns column indices that do not match any row. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1))) - - def unmatched_column_indicator(self): - """Returns column indices that are unmatched. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return tf.equal(self._match_results, -1) - - def num_unmatched_columns(self): - """Returns number (int32 scalar tensor) of unmatched columns.""" - return tf.size(self.unmatched_column_indices()) - - def ignored_column_indices(self): - """Returns column indices that are ignored (neither Matched nor Unmatched). - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(self.ignored_column_indicator())) - - def ignored_column_indicator(self): - """Returns boolean column indicator where True means the colum is ignored. - - Returns: - column_indicator: boolean vector which is True for all ignored column - indices. - """ - return tf.equal(self._match_results, -2) - - def num_ignored_columns(self): - """Returns number (int32 scalar tensor) of matched columns.""" - return tf.size(self.ignored_column_indices()) - - def unmatched_or_ignored_column_indices(self): - """Returns column indices that are unmatched or ignored. - - The indices returned by this op are always sorted in increasing order. - - Returns: - column_indices: int32 tensor of shape [K] with column indices. - """ - return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results))) - - def matched_row_indices(self): - """Returns row indices that match some column. - - The indices returned by this op are ordered so as to be in correspondence - with the output of matched_column_indicator(). For example if - self.matched_column_indicator() is [0,2], and self.matched_row_indices() is - [7, 3], then we know that column 0 was matched to row 7 and column 2 was - matched to row 3. - - Returns: - row_indices: int32 tensor of shape [K] with row indices. - """ - return self._reshape_and_cast( - tf.gather(self._match_results, self.matched_column_indices())) - - def _reshape_and_cast(self, t): - return tf.cast(tf.reshape(t, [-1]), tf.int32) - - -class Matcher(object): - """Abstract base class for matcher. - """ - __metaclass__ = ABCMeta - - def match(self, similarity_matrix, scope=None, **params): - """Computes matches among row and column indices and returns the result. - - Computes matches among the row and column indices based on the similarity - matrix and optional arguments. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher value means more similar. - scope: Op scope name. Defaults to 'Match' if None. - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - A Match object with the results of matching. - """ - with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope: - return Match(self._match(similarity_matrix, **params)) - - @abstractmethod - def _match(self, similarity_matrix, **params): - """Method to be overriden by implementations. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher value means more similar. - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - match_results: Integer tensor of shape [M]: match_results[i]>=0 means - that column i is matched to row match_results[i], match_results[i]=-1 - means that the column is not matched. match_results[i]=-2 means that - the column is ignored (usually this happens when there is a very weak - match which one neither wants as positive nor negative example). - """ - pass diff --git a/object_detection/core/matcher_test.py b/object_detection/core/matcher_test.py deleted file mode 100644 index 7054015f..00000000 --- a/object_detection/core/matcher_test.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.matcher.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import matcher - - -class AnchorMatcherTest(tf.test.TestCase): - - def test_get_correct_matched_columnIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [0, 1, 3, 5] - matched_column_indices = match.matched_column_indices() - self.assertEquals(matched_column_indices.dtype, tf.int32) - with self.test_session() as sess: - matched_column_indices = sess.run(matched_column_indices) - self.assertAllEqual(matched_column_indices, expected_column_indices) - - def test_get_correct_counts(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - exp_num_matched_columns = 4 - exp_num_unmatched_columns = 2 - exp_num_ignored_columns = 1 - num_matched_columns = match.num_matched_columns() - num_unmatched_columns = match.num_unmatched_columns() - num_ignored_columns = match.num_ignored_columns() - self.assertEquals(num_matched_columns.dtype, tf.int32) - self.assertEquals(num_unmatched_columns.dtype, tf.int32) - self.assertEquals(num_ignored_columns.dtype, tf.int32) - with self.test_session() as sess: - (num_matched_columns_out, num_unmatched_columns_out, - num_ignored_columns_out) = sess.run( - [num_matched_columns, num_unmatched_columns, num_ignored_columns]) - self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns) - self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns) - self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns) - - def testGetCorrectUnmatchedColumnIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [2, 4] - unmatched_column_indices = match.unmatched_column_indices() - self.assertEquals(unmatched_column_indices.dtype, tf.int32) - with self.test_session() as sess: - unmatched_column_indices = sess.run(unmatched_column_indices) - self.assertAllEqual(unmatched_column_indices, expected_column_indices) - - def testGetCorrectMatchedRowIndices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_row_indices = [3, 1, 0, 5] - matched_row_indices = match.matched_row_indices() - self.assertEquals(matched_row_indices.dtype, tf.int32) - with self.test_session() as sess: - matched_row_inds = sess.run(matched_row_indices) - self.assertAllEqual(matched_row_inds, expected_row_indices) - - def test_get_correct_ignored_column_indices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [6] - ignored_column_indices = match.ignored_column_indices() - self.assertEquals(ignored_column_indices.dtype, tf.int32) - with self.test_session() as sess: - ignored_column_indices = sess.run(ignored_column_indices) - self.assertAllEqual(ignored_column_indices, expected_column_indices) - - def test_get_correct_matched_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [True, True, False, True, False, True, False] - matched_column_indicator = match.matched_column_indicator() - self.assertEquals(matched_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - matched_column_indicator = sess.run(matched_column_indicator) - self.assertAllEqual(matched_column_indicator, expected_column_indicator) - - def test_get_correct_unmatched_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [False, False, True, False, True, False, False] - unmatched_column_indicator = match.unmatched_column_indicator() - self.assertEquals(unmatched_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - unmatched_column_indicator = sess.run(unmatched_column_indicator) - self.assertAllEqual(unmatched_column_indicator, expected_column_indicator) - - def test_get_correct_ignored_column_indicator(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indicator = [False, False, False, False, False, False, True] - ignored_column_indicator = match.ignored_column_indicator() - self.assertEquals(ignored_column_indicator.dtype, tf.bool) - with self.test_session() as sess: - ignored_column_indicator = sess.run(ignored_column_indicator) - self.assertAllEqual(ignored_column_indicator, expected_column_indicator) - - def test_get_correct_unmatched_ignored_column_indices(self): - match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) - match = matcher.Match(match_results) - expected_column_indices = [2, 4, 6] - unmatched_ignored_column_indices = (match. - unmatched_or_ignored_column_indices()) - self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32) - with self.test_session() as sess: - unmatched_ignored_column_indices = sess.run( - unmatched_ignored_column_indices) - self.assertAllEqual(unmatched_ignored_column_indices, - expected_column_indices) - - def test_all_columns_accounted_for(self): - # Note: deliberately setting to small number so not always - # all possibilities appear (matched, unmatched, ignored) - num_matches = 10 - match_results = tf.random_uniform( - [num_matches], minval=-2, maxval=5, dtype=tf.int32) - match = matcher.Match(match_results) - matched_column_indices = match.matched_column_indices() - unmatched_column_indices = match.unmatched_column_indices() - ignored_column_indices = match.ignored_column_indices() - with self.test_session() as sess: - matched, unmatched, ignored = sess.run([ - matched_column_indices, unmatched_column_indices, - ignored_column_indices - ]) - all_indices = np.hstack((matched, unmatched, ignored)) - all_indices_sorted = np.sort(all_indices) - self.assertAllEqual(all_indices_sorted, - np.arange(num_matches, dtype=np.int32)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/minibatch_sampler.py b/object_detection/core/minibatch_sampler.py deleted file mode 100644 index dc622221..00000000 --- a/object_detection/core/minibatch_sampler.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base minibatch sampler module. - -The job of the minibatch_sampler is to subsample a minibatch based on some -criterion. - -The main function call is: - subsample(indicator, batch_size, **params). -Indicator is a 1d boolean tensor where True denotes which examples can be -sampled. It returns a boolean indicator where True denotes an example has been -sampled.. - -Subclasses should implement the Subsample function and can make use of the -@staticmethod SubsampleIndicator. -""" - -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.utils import ops - - -class MinibatchSampler(object): - """Abstract base class for subsampling minibatches.""" - __metaclass__ = ABCMeta - - def __init__(self): - """Constructs a minibatch sampler.""" - pass - - @abstractmethod - def subsample(self, indicator, batch_size, **params): - """Returns subsample of entries in indicator. - - Args: - indicator: boolean tensor of shape [N] whose True entries can be sampled. - batch_size: desired batch size. - **params: additional keyword arguments for specific implementations of - the MinibatchSampler. - - Returns: - sample_indicator: boolean tensor of shape [N] whose True entries have been - sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size - """ - pass - - @staticmethod - def subsample_indicator(indicator, num_samples): - """Subsample indicator vector. - - Given a boolean indicator vector with M elements set to `True`, the function - assigns all but `num_samples` of these previously `True` elements to - `False`. If `num_samples` is greater than M, the original indicator vector - is returned. - - Args: - indicator: a 1-dimensional boolean tensor indicating which elements - are allowed to be sampled and which are not. - num_samples: int32 scalar tensor - - Returns: - a boolean tensor with the same shape as input (indicator) tensor - """ - indices = tf.where(indicator) - indices = tf.random_shuffle(indices) - indices = tf.reshape(indices, [-1]) - - num_samples = tf.minimum(tf.size(indices), num_samples) - selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) - - selected_indicator = ops.indices_to_dense_vector(selected_indices, - tf.shape(indicator)[0]) - - return tf.equal(selected_indicator, 1) diff --git a/object_detection/core/minibatch_sampler_test.py b/object_detection/core/minibatch_sampler_test.py deleted file mode 100644 index 7420ae5d..00000000 --- a/object_detection/core/minibatch_sampler_test.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for google3.research.vale.object_detection.minibatch_sampler.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import minibatch_sampler - - -class MinibatchSamplerTest(tf.test.TestCase): - - def test_subsample_indicator_when_more_true_elements_than_num_samples(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 3) - with self.test_session() as sess: - samples_out = sess.run(samples) - self.assertTrue(np.sum(samples_out), 3) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_when_more_true_elements_than_num_samples_no_shape(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.placeholder(tf.bool) - feed_dict = {indicator: np_indicator} - - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 3) - with self.test_session() as sess: - samples_out = sess.run(samples, feed_dict=feed_dict) - self.assertTrue(np.sum(samples_out), 3) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_indicator_when_less_true_elements_than_num_samples(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 5) - with self.test_session() as sess: - samples_out = sess.run(samples) - self.assertTrue(np.sum(samples_out), 4) - self.assertAllEqual(samples_out, - np.logical_and(samples_out, np_indicator)) - - def test_subsample_indicator_when_num_samples_is_zero(self): - np_indicator = [True, False, True, False, True, True, False] - indicator = tf.constant(np_indicator) - samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator, 0) - with self.test_session() as sess: - samples_none_out = sess.run(samples_none) - self.assertAllEqual( - np.zeros_like(samples_none_out, dtype=bool), - samples_none_out) - - def test_subsample_indicator_when_indicator_all_false(self): - indicator_empty = tf.zeros([0], dtype=tf.bool) - samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator( - indicator_empty, 4) - with self.test_session() as sess: - samples_empty_out = sess.run(samples_empty) - self.assertEqual(0, samples_empty_out.size) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/model.py b/object_detection/core/model.py deleted file mode 100644 index 08843944..00000000 --- a/object_detection/core/model.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Abstract detection model. - -This file defines a generic base class for detection models. Programs that are -designed to work with arbitrary detection models should only depend on this -class. We intend for the functions in this class to follow tensor-in/tensor-out -design, thus all functions have tensors or lists/dictionaries holding tensors as -inputs and outputs. - -Abstractly, detection models predict output tensors given input images -which can be passed to a loss function at training time or passed to a -postprocessing function at eval time. The computation graphs at a high level -consequently look as follows: - -Training time: -inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor) - -Evaluation time: -inputs (images tensor) -> preprocess -> predict -> postprocess - -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor) - -DetectionModels must thus implement four functions (1) preprocess, (2) predict, -(3) postprocess and (4) loss. DetectionModels should make no assumptions about -the input size or aspect ratio --- they are responsible for doing any -resize/reshaping necessary (see docstring for the preprocess function). -Output classes are always integers in the range [0, num_classes). Any mapping -of these integers to semantic labels is to be handled outside of this class. - -By default, DetectionModels produce bounding box detections; However, we support -a handful of auxiliary annotations associated with each bounding box, namely, -instance masks and keypoints. -""" -from abc import ABCMeta -from abc import abstractmethod - -from object_detection.core import standard_fields as fields - - -class DetectionModel(object): - """Abstract base class for detection models.""" - __metaclass__ = ABCMeta - - def __init__(self, num_classes): - """Constructor. - - Args: - num_classes: number of classes. Note that num_classes *does not* include - background categories that might be implicitly be predicted in various - implementations. - """ - self._num_classes = num_classes - self._groundtruth_lists = {} - - @property - def num_classes(self): - return self._num_classes - - def groundtruth_lists(self, field): - """Access list of groundtruth tensors. - - Args: - field: a string key, options are - fields.BoxListFields.{boxes,classes,masks,keypoints} - - Returns: - a list of tensors holding groundtruth information (see also - provide_groundtruth function below), with one entry for each image in the - batch. - Raises: - RuntimeError: if the field has not been provided via provide_groundtruth. - """ - if field not in self._groundtruth_lists: - raise RuntimeError('Groundtruth tensor %s has not been provided', field) - return self._groundtruth_lists[field] - - def groundtruth_has_field(self, field): - """Determines whether the groundtruth includes the given field. - - Args: - field: a string key, options are - fields.BoxListFields.{boxes,classes,masks,keypoints} - - Returns: - True if the groundtruth includes the given field, False otherwise. - """ - return field in self._groundtruth_lists - - @abstractmethod - def preprocess(self, inputs): - """Input preprocessing. - - To be overridden by implementations. - - This function is responsible for any scaling/shifting of input values that - is necessary prior to running the detector on an input image. - It is also responsible for any resizing that might be necessary as images - are assumed to arrive in arbitrary sizes. While this function could - conceivably be part of the predict method (below), it is often convenient - to keep these separate --- for example, we may want to preprocess on one - device, place onto a queue, and let another device (e.g., the GPU) handle - prediction. - - A few important notes about the preprocess function: - + We assume that this operation does not have any trainable variables nor - does it affect the groundtruth annotations in any way (thus data - augmentation operations such as random cropping should be performed - externally). - + There is no assumption that the batchsize in this function is the same as - the batch size in the predict function. In fact, we recommend calling the - preprocess function prior to calling any batching operations (which should - happen outside of the model) and thus assuming that batch sizes are equal - to 1 in the preprocess function. - + There is also no explicit assumption that the output resolutions - must be fixed across inputs --- this is to support "fully convolutional" - settings in which input images can have different shapes/resolutions. - - Args: - inputs: a [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - """ - pass - - @abstractmethod - def predict(self, preprocessed_inputs): - """Predict prediction tensors from inputs tensor. - - Outputs of this function can be passed to loss or postprocess functions. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float32 tensor - representing a batch of images. - - Returns: - prediction_dict: a dictionary holding prediction tensors to be - passed to the Loss or Postprocess functions. - """ - pass - - @abstractmethod - def postprocess(self, prediction_dict, **params): - """Convert predicted output tensors to final detections. - - Outputs adhere to the following conventions: - * Classes are integers in [0, num_classes); background classes are removed - and the first non-background class is mapped to 0. If the model produces - class-agnostic detections, then no output is produced for classes. - * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max] - format and normalized relative to the image window. - * `num_detections` is provided for settings where detections are padded to a - fixed number of boxes. - * We do not specifically assume any kind of probabilistic interpretation - of the scores --- the only important thing is their relative ordering. - Thus implementations of the postprocess function are free to output - logits, probabilities, calibrated probabilities, or anything else. - - Args: - prediction_dict: a dictionary holding prediction tensors. - **params: Additional keyword arguments for specific implementations of - DetectionModel. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detections, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - (If a model is producing class-agnostic detections, this field may be - missing) - instance_masks: [batch, max_detections, image_height, image_width] - (optional) - keypoints: [batch, max_detections, num_keypoints, 2] (optional) - num_detections: [batch] - """ - pass - - @abstractmethod - def loss(self, prediction_dict): - """Compute scalar loss tensors with respect to provided groundtruth. - - Calling this function requires that groundtruth tensors have been - provided via the provide_groundtruth function. - - Args: - prediction_dict: a dictionary holding predicted tensors - - Returns: - a dictionary mapping strings (loss names) to scalar tensors representing - loss values. - """ - pass - - def provide_groundtruth(self, - groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list=None, - groundtruth_keypoints_list=None): - """Provide groundtruth tensors. - - Args: - groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape - [num_boxes, 4] containing coordinates of the groundtruth boxes. - Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] - format and assumed to be normalized and clipped - relative to the image window with y_min <= y_max and x_min <= x_max. - groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot) - tensors of shape [num_boxes, num_classes] containing the class targets - with the 0th index assumed to map to the first non-background class. - groundtruth_masks_list: a list of 3-D tf.float32 tensors of - shape [num_boxes, height_in, width_in] containing instance - masks with values in {0, 1}. If None, no masks are provided. - Mask resolution `height_in`x`width_in` must agree with the resolution - of the input image tensor provided to the `preprocess` function. - groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of - shape [num_boxes, num_keypoints, 2] containing keypoints. - Keypoints are assumed to be provided in normalized coordinates and - missing keypoints should be encoded as NaN. - """ - self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list - self._groundtruth_lists[ - fields.BoxListFields.classes] = groundtruth_classes_list - if groundtruth_masks_list: - self._groundtruth_lists[ - fields.BoxListFields.masks] = groundtruth_masks_list - if groundtruth_keypoints_list: - self._groundtruth_lists[ - fields.BoxListFields.keypoints] = groundtruth_keypoints_list - - @abstractmethod - def restore_map(self, from_detection_checkpoint=True): - """Returns a map of variables to load from a foreign checkpoint. - - Returns a map of variable names to load from a checkpoint to variables in - the model graph. This enables the model to initialize based on weights from - another task. For example, the feature extractor variables from a - classification model can be used to bootstrap training of an object - detector. When loading from an object detection model, the checkpoint model - should have the same parameters as this detection model with exception of - the num_classes parameter. - - Args: - from_detection_checkpoint: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - pass diff --git a/object_detection/core/post_processing.py b/object_detection/core/post_processing.py deleted file mode 100644 index d34f0683..00000000 --- a/object_detection/core/post_processing.py +++ /dev/null @@ -1,395 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Post-processing operations on detected boxes.""" - -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import standard_fields as fields - - -def multiclass_non_max_suppression(boxes, - scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size=0, - clip_window=None, - change_coordinate_frame=False, - masks=None, - additional_fields=None, - scope=None): - """Multi-class version of non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. It operates independently for each class for - which scores are provided (via the scores field of the input box_list), - pruning boxes with score less than a provided threshold prior to - applying NMS. - - Please note that this operation is performed on *all* classes, therefore any - background classes should be removed prior to calling this function. - - Args: - boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either - number of classes or 1 depending on whether a separate box is predicted - per class. - scores: A [k, num_classes] float32 tensor containing the scores for each of - the k detections. - score_thresh: scalar threshold for score (low scoring boxes are removed). - iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap - with previously selected boxes are removed). - max_size_per_class: maximum number of retained boxes per class. - max_total_size: maximum number of boxes retained over all classes. By - default returns all boxes retained after capping boxes per class. - clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] - representing the window to clip and normalize boxes to before performing - non-max suppression. - change_coordinate_frame: Whether to normalize coordinates after clipping - relative to clip_window (this can only be set to True if a clip_window - is provided) - masks: (optional) a [k, q, mask_height, mask_width] float32 tensor - containing box masks. `q` can be either number of classes or 1 depending - on whether a separate mask is predicted per class. - additional_fields: (optional) If not None, a dictionary that maps keys to - tensors whose first dimensions are all of size `k`. After non-maximum - suppression, all tensors corresponding to the selected boxes will be - added to resulting BoxList. - scope: name scope. - - Returns: - a BoxList holding M boxes with a rank-1 scores field representing - corresponding scores for each box with scores sorted in decreasing order - and a rank-1 classes field representing a class label for each box. - - Raises: - ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have - a valid scores field. - """ - if not 0 <= iou_thresh <= 1.0: - raise ValueError('iou_thresh must be between 0 and 1') - if scores.shape.ndims != 2: - raise ValueError('scores field must be of rank 2') - if scores.shape[1].value is None: - raise ValueError('scores must have statically defined second ' - 'dimension') - if boxes.shape.ndims != 3: - raise ValueError('boxes must be of rank 3.') - if not (boxes.shape[1].value == scores.shape[1].value or - boxes.shape[1].value == 1): - raise ValueError('second dimension of boxes must be either 1 or equal ' - 'to the second dimension of scores') - if boxes.shape[2].value != 4: - raise ValueError('last dimension of boxes must be of size 4.') - if change_coordinate_frame and clip_window is None: - raise ValueError('if change_coordinate_frame is True, then a clip_window' - 'must be specified.') - - with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): - num_boxes = tf.shape(boxes)[0] - num_scores = tf.shape(scores)[0] - num_classes = scores.get_shape()[1] - - length_assert = tf.Assert( - tf.equal(num_boxes, num_scores), - ['Incorrect scores field length: actual vs expected.', - num_scores, num_boxes]) - - selected_boxes_list = [] - per_class_boxes_list = tf.unstack(boxes, axis=1) - if masks is not None: - per_class_masks_list = tf.unstack(masks, axis=1) - boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 - else [0] * num_classes) - for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): - per_class_boxes = per_class_boxes_list[boxes_idx] - boxlist_and_class_scores = box_list.BoxList(per_class_boxes) - with tf.control_dependencies([length_assert]): - class_scores = tf.reshape( - tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) - boxlist_and_class_scores.add_field(fields.BoxListFields.scores, - class_scores) - if masks is not None: - per_class_masks = per_class_masks_list[boxes_idx] - boxlist_and_class_scores.add_field(fields.BoxListFields.masks, - per_class_masks) - if additional_fields is not None: - for key, tensor in additional_fields.items(): - boxlist_and_class_scores.add_field(key, tensor) - boxlist_filtered = box_list_ops.filter_greater_than( - boxlist_and_class_scores, score_thresh) - if clip_window is not None: - boxlist_filtered = box_list_ops.clip_to_window( - boxlist_filtered, clip_window) - if change_coordinate_frame: - boxlist_filtered = box_list_ops.change_coordinate_frame( - boxlist_filtered, clip_window) - max_selection_size = tf.minimum(max_size_per_class, - boxlist_filtered.num_boxes()) - selected_indices = tf.image.non_max_suppression( - boxlist_filtered.get(), - boxlist_filtered.get_field(fields.BoxListFields.scores), - max_selection_size, - iou_threshold=iou_thresh) - nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) - nms_result.add_field( - fields.BoxListFields.classes, (tf.zeros_like( - nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) - selected_boxes_list.append(nms_result) - selected_boxes = box_list_ops.concatenate(selected_boxes_list) - sorted_boxes = box_list_ops.sort_by_field(selected_boxes, - fields.BoxListFields.scores) - if max_total_size: - max_total_size = tf.minimum(max_total_size, - sorted_boxes.num_boxes()) - sorted_boxes = box_list_ops.gather(sorted_boxes, - tf.range(max_total_size)) - return sorted_boxes - - -def batch_multiclass_non_max_suppression(boxes, - scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size=0, - clip_window=None, - change_coordinate_frame=False, - num_valid_boxes=None, - masks=None, - additional_fields=None, - scope=None, - parallel_iterations=32): - """Multi-class version of non maximum suppression that operates on a batch. - - This op is similar to `multiclass_non_max_suppression` but operates on a batch - of boxes and scores. See documentation for `multiclass_non_max_suppression` - for details. - - Args: - boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing - detections. If `q` is 1 then same boxes are used for all classes - otherwise, if `q` is equal to number of classes, class-specific boxes - are used. - scores: A [batch_size, num_anchors, num_classes] float32 tensor containing - the scores for each of the `num_anchors` detections. - score_thresh: scalar threshold for score (low scoring boxes are removed). - iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap - with previously selected boxes are removed). - max_size_per_class: maximum number of retained boxes per class. - max_total_size: maximum number of boxes retained over all classes. By - default returns all boxes retained after capping boxes per class. - clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] - representing the window to clip boxes to before performing non-max - suppression. - change_coordinate_frame: Whether to normalize coordinates after clipping - relative to clip_window (this can only be set to True if a clip_window - is provided) - num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape - [batch_size] representing the number of valid boxes to be considered - for each image in the batch. This parameter allows for ignoring zero - paddings. - masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width] - float32 tensor containing box masks. `q` can be either number of classes - or 1 depending on whether a separate mask is predicted per class. - additional_fields: (optional) If not None, a dictionary that maps keys to - tensors whose dimensions are [batch_size, num_anchors, ...]. - scope: tf scope name. - parallel_iterations: (optional) number of batch items to process in - parallel. - - Returns: - 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor - containing the non-max suppressed boxes. - 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing - the scores for the boxes. - 'nmsed_classes': A [batch_size, max_detections] float32 tensor - containing the class for boxes. - 'nmsed_masks': (optional) a - [batch_size, max_detections, mask_height, mask_width] float32 tensor - containing masks for each selected box. This is set to None if input - `masks` is None. - 'nmsed_additional_fields': (optional) a dictionary of - [batch_size, max_detections, ...] float32 tensors corresponding to the - tensors specified in the input `additional_fields`. This is not returned - if input `additional_fields` is None. - 'num_detections': A [batch_size] int32 tensor indicating the number of - valid detections per batch item. Only the top num_detections[i] entries in - nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the - entries are zero paddings. - - Raises: - ValueError: if `q` in boxes.shape is not 1 or not equal to number of - classes as inferred from scores.shape. - """ - q = boxes.shape[2].value - num_classes = scores.shape[2].value - if q != 1 and q != num_classes: - raise ValueError('third dimension of boxes must be either 1 or equal ' - 'to the third dimension of scores') - - original_masks = masks - original_additional_fields = additional_fields - with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): - boxes_shape = boxes.shape - batch_size = boxes_shape[0].value - num_anchors = boxes_shape[1].value - - if batch_size is None: - batch_size = tf.shape(boxes)[0] - if num_anchors is None: - num_anchors = tf.shape(boxes)[1] - - # If num valid boxes aren't provided, create one and mark all boxes as - # valid. - if num_valid_boxes is None: - num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors - - # If masks aren't provided, create dummy masks so we can only have one copy - # of _single_image_nms_fn and discard the dummy masks after map_fn. - if masks is None: - masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0]) - masks = tf.zeros(masks_shape) - - if additional_fields is None: - additional_fields = {} - - def _single_image_nms_fn(args): - """Runs NMS on a single image and returns padded output. - - Args: - args: A list of tensors consisting of the following: - per_image_boxes - A [num_anchors, q, 4] float32 tensor containing - detections. If `q` is 1 then same boxes are used for all classes - otherwise, if `q` is equal to number of classes, class-specific - boxes are used. - per_image_scores - A [num_anchors, num_classes] float32 tensor - containing the scores for each of the `num_anchors` detections. - per_image_masks - A [num_anchors, q, mask_height, mask_width] float32 - tensor containing box masks. `q` can be either number of classes - or 1 depending on whether a separate mask is predicted per class. - per_image_additional_fields - (optional) A variable number of float32 - tensors each with size [num_anchors, ...]. - per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of - shape [batch_size] representing the number of valid boxes to be - considered for each image in the batch. This parameter allows for - ignoring zero paddings. - - Returns: - 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the - non-max suppressed boxes. - 'nmsed_scores': A [max_detections] float32 tensor containing the scores - for the boxes. - 'nmsed_classes': A [max_detections] float32 tensor containing the class - for boxes. - 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width] - float32 tensor containing masks for each selected box. This is set to - None if input `masks` is None. - 'nmsed_additional_fields': (optional) A variable number of float32 - tensors each with size [max_detections, ...] corresponding to the - input `per_image_additional_fields`. - 'num_detections': A [batch_size] int32 tensor indicating the number of - valid detections per batch item. Only the top num_detections[i] - entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The - rest of the entries are zero paddings. - """ - per_image_boxes = args[0] - per_image_scores = args[1] - per_image_masks = args[2] - per_image_additional_fields = { - key: value - for key, value in zip(additional_fields, args[3:-1]) - } - per_image_num_valid_boxes = args[-1] - per_image_boxes = tf.reshape( - tf.slice(per_image_boxes, 3 * [0], - tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4]) - per_image_scores = tf.reshape( - tf.slice(per_image_scores, [0, 0], - tf.stack([per_image_num_valid_boxes, -1])), - [-1, num_classes]) - per_image_masks = tf.reshape( - tf.slice(per_image_masks, 4 * [0], - tf.stack([per_image_num_valid_boxes, -1, -1, -1])), - [-1, q, per_image_masks.shape[2].value, - per_image_masks.shape[3].value]) - if per_image_additional_fields is not None: - for key, tensor in per_image_additional_fields.items(): - additional_field_shape = tensor.get_shape() - additional_field_dim = len(additional_field_shape) - per_image_additional_fields[key] = tf.reshape( - tf.slice(per_image_additional_fields[key], - additional_field_dim * [0], - tf.stack([per_image_num_valid_boxes] + - (additional_field_dim - 1) * [-1])), - [-1] + [dim.value for dim in additional_field_shape[1:]]) - nmsed_boxlist = multiclass_non_max_suppression( - per_image_boxes, - per_image_scores, - score_thresh, - iou_thresh, - max_size_per_class, - max_total_size, - clip_window=clip_window, - change_coordinate_frame=change_coordinate_frame, - masks=per_image_masks, - additional_fields=per_image_additional_fields) - padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist, - max_total_size) - num_detections = nmsed_boxlist.num_boxes() - nmsed_boxes = padded_boxlist.get() - nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores) - nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes) - nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks) - nmsed_additional_fields = [ - padded_boxlist.get_field(key) for key in per_image_additional_fields - ] - return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] + - nmsed_additional_fields + [num_detections]) - - num_additional_fields = 0 - if additional_fields is not None: - num_additional_fields = len(additional_fields) - num_nmsed_outputs = 4 + num_additional_fields - - batch_outputs = tf.map_fn( - _single_image_nms_fn, - elems=([boxes, scores, masks] + list(additional_fields.values()) + - [num_valid_boxes]), - dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]), - parallel_iterations=parallel_iterations) - - batch_nmsed_boxes = batch_outputs[0] - batch_nmsed_scores = batch_outputs[1] - batch_nmsed_classes = batch_outputs[2] - batch_nmsed_masks = batch_outputs[3] - batch_nmsed_additional_fields = { - key: value - for key, value in zip(additional_fields, batch_outputs[4:-1]) - } - batch_num_detections = batch_outputs[-1] - - if original_masks is None: - batch_nmsed_masks = None - - if original_additional_fields is None: - batch_nmsed_additional_fields = None - - return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, - batch_nmsed_masks, batch_nmsed_additional_fields, - batch_num_detections) diff --git a/object_detection/core/post_processing_test.py b/object_detection/core/post_processing_test.py deleted file mode 100644 index 542f8e18..00000000 --- a/object_detection/core/post_processing_test.py +++ /dev/null @@ -1,959 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for tensorflow_models.object_detection.core.post_processing.""" -import numpy as np -import tensorflow as tf -from object_detection.core import post_processing -from object_detection.core import standard_fields as fields - - -class MulticlassNonMaxSuppressionTest(tf.test.TestCase): - - def test_with_invalid_scores_size(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]]], tf.float32) - scores = tf.constant([[.9], [.75], [.6], [.95], [.5]]) - iou_thresh = .5 - score_thresh = 0.6 - max_output_size = 3 - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - with self.assertRaisesWithPredicateMatch( - tf.errors.InvalidArgumentError, 'Incorrect scores field length'): - sess.run(nms.get()) - - def test_multiclass_nms_select_with_shared_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - num_keypoints = 6 - keypoints = tf.tile( - tf.reshape(tf.range(8), [8, 1, 1]), - [1, num_keypoints, 2]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_keypoints_tensor = tf.tile( - tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), - [1, num_keypoints, 2]) - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={ - fields.BoxListFields.keypoints: keypoints}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_keypoints, - exp_nms_keypoints) = sess.run([ - nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.keypoints), - exp_nms_keypoints_tensor - ]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_keypoints, exp_nms_keypoints) - - def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - - num_boxes = tf.shape(boxes)[0] - heatmap_height = 5 - heatmap_width = 5 - num_keypoints = 17 - keypoint_heatmaps = tf.ones( - [num_boxes, heatmap_height, heatmap_width, num_keypoints], - dtype=tf.float32) - - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_keypoint_heatmaps = np.ones( - (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32) - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={ - fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_keypoint_heatmaps) = sess.run( - [nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.keypoint_heatmaps)]) - - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps) - - def test_multiclass_nms_with_additional_fields(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - - coarse_boxes_key = 'coarse_boxes' - coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1], - [0.1, 0.2, 1.1, 1.2], - [0.1, -0.2, 1.1, 1.0], - [0.1, 10.1, 1.1, 11.1], - [0.1, 10.2, 1.1, 11.2], - [0.1, 100.1, 1.1, 101.1], - [0.1, 1000.1, 1.1, 1002.1], - [0.1, 1000.1, 1.1, 1002.2]], tf.float32) - - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]], dtype=np.float32) - - exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1], - [0.1, 0.1, 1.1, 1.1], - [0.1, 1000.1, 1.1, 1002.1], - [0.1, 100.1, 1.1, 101.1]], - dtype=np.float32) - - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - additional_fields={coarse_boxes_key: coarse_boxes}) - - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_coarse_corners) = sess.run( - [nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(coarse_boxes_key)]) - - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners) - - def test_multiclass_nms_select_with_shared_boxes_given_masks(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - num_classes = 2 - mask_height = 3 - mask_width = 3 - masks = tf.tile( - tf.reshape(tf.range(8), [8, 1, 1, 1]), - [1, num_classes, mask_height, mask_width]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - exp_nms_masks_tensor = tf.tile( - tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]), - [1, mask_height, mask_width]) - - nms = post_processing.multiclass_non_max_suppression(boxes, scores, - score_thresh, - iou_thresh, - max_output_size, - masks=masks) - with self.test_session() as sess: - (nms_corners_output, - nms_scores_output, - nms_classes_output, - nms_masks, - exp_nms_masks) = sess.run([nms.get(), - nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes), - nms.get_field(fields.BoxListFields.masks), - exp_nms_masks_tensor]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - self.assertAllEqual(nms_masks, exp_nms_masks) - - def test_multiclass_nms_select_with_clip_window(self): - boxes = tf.constant([[[0, 0, 10, 10]], - [[1, 1, 11, 11]]], tf.float32) - scores = tf.constant([[.9], [.75]]) - clip_window = tf.constant([5, 4, 8, 7], tf.float32) - score_thresh = 0.0 - iou_thresh = 0.5 - max_output_size = 100 - - exp_nms_corners = [[5, 4, 8, 7]] - exp_nms_scores = [.9] - exp_nms_classes = [0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - clip_window=clip_window) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self): - boxes = tf.constant([[[0, 0, 10, 10]], - [[1, 1, 11, 11]]], tf.float32) - scores = tf.constant([[.9], [.75]]) - clip_window = tf.constant([5, 4, 8, 7], tf.float32) - score_thresh = 0.0 - iou_thresh = 0.5 - max_output_size = 100 - - exp_nms_corners = [[0, 0, 1, 1]] - exp_nms_scores = [.9] - exp_nms_classes = [0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size, - clip_window=clip_window, change_coordinate_frame=True) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_per_class_cap(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_size_per_class = 2 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 1000, 1, 1002]] - exp_nms_scores = [.95, .9, .85] - exp_nms_classes = [0, 0, 1] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_size_per_class) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_select_with_total_cap(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_size_per_class = 4 - max_total_size = 2 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1]] - exp_nms_scores = [.95, .9] - exp_nms_classes = [0, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_size_per_class, - max_total_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_multiclass_nms_threshold_then_select_with_shared_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1]], - [[0, 0.1, 1, 1.1]], - [[0, -0.1, 1, 0.9]], - [[0, 10, 1, 11]], - [[0, 10.1, 1, 11.1]], - [[0, 100, 1, 101]], - [[0, 1000, 1, 1002]], - [[0, 1000, 1, 1002.1]]], tf.float32) - scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 3 - - exp_nms = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 100, 1, 101]] - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_output = sess.run(nms.get()) - self.assertAllClose(nms_output, exp_nms) - - def test_multiclass_nms_select_with_separate_boxes(self): - boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]], - [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]], - tf.float32) - scores = tf.constant([[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 999, 2, 1004], - [0, 100, 1, 101]] - exp_nms_scores = [.95, .9, .85, .3] - exp_nms_classes = [0, 0, 1, 0] - - nms = post_processing.multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, max_output_size) - with self.test_session() as sess: - nms_corners_output, nms_scores_output, nms_classes_output = sess.run( - [nms.get(), nms.get_field(fields.BoxListFields.scores), - nms.get_field(fields.BoxListFields.classes)]) - self.assertAllClose(nms_corners_output, exp_nms_corners) - self.assertAllClose(nms_scores_output, exp_nms_scores) - self.assertAllClose(nms_classes_output, exp_nms_classes) - - def test_batch_multiclass_nms_with_batch_size_1(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]], - [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0], - [.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 999, 2, 1004], - [0, 100, 1, 101]]] - exp_nms_scores = [[.95, .9, .85, .3]] - exp_nms_classes = [[0, 0, 1, 0]] - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertEqual(num_detections, [4]) - - def test_batch_multiclass_nms_with_batch_size_2(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size) - - self.assertIsNone(nmsed_masks) - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), - exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), - exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), - exp_nms_classes.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - - def test_batch_multiclass_nms_with_masks(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], - tf.float32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_masks = np.array([[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - masks=masks) - - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) - self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_additional_fields(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - additional_fields = { - 'keypoints': tf.constant( - [[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]], - tf.float32) - } - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_additional_fields = { - 'keypoints': np.array([[[[0, 0], [0, 0]], - [[6, 7], [8, 9]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[10, 11], [12, 13]], - [[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[0, 0], [0, 0]]]]) - } - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - additional_fields=additional_fields) - - self.assertIsNone(nmsed_masks) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) - self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) - self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape) - self.assertEqual(len(nmsed_additional_fields), - len(exp_nms_additional_fields)) - for key in exp_nms_additional_fields: - self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(), - exp_nms_additional_fields[key].shape) - self.assertEqual(num_detections.shape.as_list(), [2]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_additional_fields, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - for key in exp_nms_additional_fields: - self.assertAllClose(nmsed_additional_fields[key], - exp_nms_additional_fields[key]) - self.assertAllClose(num_detections, [2, 3]) - - def test_batch_multiclass_nms_with_dynamic_batch_size(self): - boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4)) - scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2)) - masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2)) - - boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]]) - scores = np.array([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]]) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = np.array([[[0, 10, 1, 11], - [0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 999, 2, 1004], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101], - [0, 0, 0, 0]]]) - exp_nms_scores = np.array([[.95, .9, 0, 0], - [.85, .5, .3, 0]]) - exp_nms_classes = np.array([[0, 0, 0, 0], - [1, 0, 0, 0]]) - exp_nms_masks = np.array([[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]]) - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes_placeholder, scores_placeholder, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - masks=masks_placeholder) - - self.assertIsNone(nmsed_additional_fields) - # Check static shapes - self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4]) - self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4]) - self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4]) - self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2]) - self.assertEqual(num_detections.shape.as_list(), [None]) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections], - feed_dict={boxes_placeholder: boxes, - scores_placeholder: scores, - masks_placeholder: masks}) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [2, 3]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]], - [[[2, 3], [4, 5]], [[3, 4], [5, 6]]], - [[[4, 5], [6, 7]], [[5, 6], [7, 8]]], - [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]], - [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]], - [[[10, 11], [12, 13]], [[11, 12], [13, 14]]], - [[[12, 13], [14, 15]], [[13, 14], [15, 16]]], - [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]], - tf.float32) - num_valid_boxes = tf.constant([1, 1], tf.int32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_nms_scores = [[.9, 0, 0, 0], - [.5, 0, 0, 0]] - exp_nms_classes = [[0, 0, 0, 0], - [0, 0, 0, 0]] - exp_nms_masks = [[[[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[8, 9], [10, 11]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]]] - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - num_valid_boxes=num_valid_boxes, masks=masks) - - self.assertIsNone(nmsed_additional_fields) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_masks, num_detections]) - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - self.assertAllClose(num_detections, [1, 1]) - self.assertAllClose(nmsed_masks, exp_nms_masks) - - def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes( - self): - boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], - [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], - [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]], - [[0, 10, 1, 11], [0, 10, 1, 11]]], - [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]], - [[0, 100, 1, 101], [0, 100, 1, 101]], - [[0, 1000, 1, 1002], [0, 999, 2, 1004]], - [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]], - tf.float32) - scores = tf.constant([[[.9, 0.01], [.75, 0.05], - [.6, 0.01], [.95, 0]], - [[.5, 0.01], [.3, 0.01], - [.01, .85], [.01, .5]]]) - additional_fields = { - 'keypoints': tf.constant( - [[[[6, 7], [8, 9]], - [[0, 1], [2, 3]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[8, 9], [10, 11]], - [[10, 11], [12, 13]], - [[0, 0], [0, 0]]]], - tf.float32) - } - num_valid_boxes = tf.constant([1, 1], tf.int32) - score_thresh = 0.1 - iou_thresh = .5 - max_output_size = 4 - - exp_nms_corners = [[[0, 0, 1, 1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 10.1, 1, 11.1], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_nms_scores = [[.9, 0, 0, 0], - [.5, 0, 0, 0]] - exp_nms_classes = [[0, 0, 0, 0], - [0, 0, 0, 0]] - exp_nms_additional_fields = { - 'keypoints': np.array([[[[6, 7], [8, 9]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]], - [[[13, 14], [15, 16]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]], - [[0, 0], [0, 0]]]]) - } - - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, - nmsed_additional_fields, num_detections - ) = post_processing.batch_multiclass_non_max_suppression( - boxes, scores, score_thresh, iou_thresh, - max_size_per_class=max_output_size, max_total_size=max_output_size, - num_valid_boxes=num_valid_boxes, - additional_fields=additional_fields) - - self.assertIsNone(nmsed_masks) - - with self.test_session() as sess: - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields, - num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes, - nmsed_additional_fields, num_detections]) - - self.assertAllClose(nmsed_boxes, exp_nms_corners) - self.assertAllClose(nmsed_scores, exp_nms_scores) - self.assertAllClose(nmsed_classes, exp_nms_classes) - for key in exp_nms_additional_fields: - self.assertAllClose(nmsed_additional_fields[key], - exp_nms_additional_fields[key]) - self.assertAllClose(num_detections, [1, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/prefetcher.py b/object_detection/core/prefetcher.py deleted file mode 100644 index e690c599..00000000 --- a/object_detection/core/prefetcher.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides functions to prefetch tensors to feed into models.""" -import tensorflow as tf - - -def prefetch(tensor_dict, capacity): - """Creates a prefetch queue for tensors. - - Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a - dequeue op that evaluates to a tensor_dict. This function is useful in - prefetching preprocessed tensors so that the data is readily available for - consumers. - - Example input pipeline when you don't need batching: - ---------------------------------------------------- - key, string_tensor = slim.parallel_reader.parallel_read(...) - tensor_dict = decoder.decode(string_tensor) - tensor_dict = preprocessor.preprocess(tensor_dict, ...) - prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) - tensor_dict = prefetch_queue.dequeue() - outputs = Model(tensor_dict) - ... - ---------------------------------------------------- - - For input pipelines with batching, refer to core/batcher.py - - Args: - tensor_dict: a dictionary of tensors to prefetch. - capacity: the size of the prefetch queue. - - Returns: - a FIFO prefetcher queue - """ - names = list(tensor_dict.keys()) - dtypes = [t.dtype for t in tensor_dict.values()] - shapes = [t.get_shape() for t in tensor_dict.values()] - prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, - shapes=shapes, - names=names, - name='prefetch_queue') - enqueue_op = prefetch_queue.enqueue(tensor_dict) - tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( - prefetch_queue, [enqueue_op])) - tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, - capacity), - tf.to_float(prefetch_queue.size()) * (1. / capacity)) - return prefetch_queue diff --git a/object_detection/core/prefetcher_test.py b/object_detection/core/prefetcher_test.py deleted file mode 100644 index 63f557e3..00000000 --- a/object_detection/core/prefetcher_test.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.prefetcher.""" -import tensorflow as tf - -from object_detection.core import prefetcher - -slim = tf.contrib.slim - - -class PrefetcherTest(tf.test.TestCase): - - def test_prefetch_tensors_with_fully_defined_shapes(self): - with self.test_session() as sess: - batch_size = 10 - image_size = 32 - num_batches = 5 - examples = tf.Variable(tf.constant(0, dtype=tf.int64)) - counter = examples.count_up_to(num_batches) - image = tf.random_normal([batch_size, image_size, - image_size, 3], - dtype=tf.float32, - name='images') - label = tf.random_uniform([batch_size, 1], 0, 10, - dtype=tf.int32, name='labels') - - prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, - 'image': image, - 'label': label}, - capacity=100) - tensor_dict = prefetch_queue.dequeue() - - self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), - [batch_size, image_size, image_size, 3]) - self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), - [batch_size, 1]) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - for _ in range(num_batches): - results = sess.run(tensor_dict) - self.assertEquals(results['image'].shape, - (batch_size, image_size, image_size, 3)) - self.assertEquals(results['label'].shape, (batch_size, 1)) - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(tensor_dict) - - def test_prefetch_tensors_with_partially_defined_shapes(self): - with self.test_session() as sess: - batch_size = 10 - image_size = 32 - num_batches = 5 - examples = tf.Variable(tf.constant(0, dtype=tf.int64)) - counter = examples.count_up_to(num_batches) - image = tf.random_normal([batch_size, - tf.Variable(image_size), - tf.Variable(image_size), 3], - dtype=tf.float32, - name='image') - image.set_shape([batch_size, None, None, 3]) - label = tf.random_uniform([batch_size, tf.Variable(1)], 0, - 10, dtype=tf.int32, name='label') - label.set_shape([batch_size, None]) - - prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter, - 'image': image, - 'label': label}, - capacity=100) - tensor_dict = prefetch_queue.dequeue() - - self.assertAllEqual(tensor_dict['image'].get_shape().as_list(), - [batch_size, None, None, 3]) - self.assertAllEqual(tensor_dict['label'].get_shape().as_list(), - [batch_size, None]) - - tf.initialize_all_variables().run() - with slim.queues.QueueRunners(sess): - for _ in range(num_batches): - results = sess.run(tensor_dict) - self.assertEquals(results['image'].shape, - (batch_size, image_size, image_size, 3)) - self.assertEquals(results['label'].shape, (batch_size, 1)) - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(tensor_dict) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/preprocessor.py b/object_detection/core/preprocessor.py deleted file mode 100644 index 33435f7b..00000000 --- a/object_detection/core/preprocessor.py +++ /dev/null @@ -1,2562 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Preprocess images and bounding boxes for detection. - -We perform two sets of operations in preprocessing stage: -(a) operations that are applied to both training and testing data, -(b) operations that are applied only to training data for the purpose of - data augmentation. - -A preprocessing function receives a set of inputs, -e.g. an image and bounding boxes, -performs an operation on them, and returns them. -Some examples are: randomly cropping the image, randomly mirroring the image, - randomly changing the brightness, contrast, hue and - randomly jittering the bounding boxes. - -The preprocess function receives a tensor_dict which is a dictionary that maps -different field names to their tensors. For example, -tensor_dict[fields.InputDataFields.image] holds the image tensor. -The image is a rank 4 tensor: [1, height, width, channels] with -dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where -in each row there is a box with [ymin xmin ymax xmax]. -Boxes are in normalized coordinates meaning -their coordinate values range in [0, 1] - -Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing -functions receive a rank 3 tensor for processing the image. Thus, inside the -preprocess function we squeeze the image to become a rank 3 tensor and then -we pass it to the functions. At the end of the preprocess we expand the image -back to rank 4. -""" - -import sys -import tensorflow as tf - -from tensorflow.python.ops import control_flow_ops - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import keypoint_ops -from object_detection.core import standard_fields as fields - - -def _apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([func( - control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case) - for case in range(num_cases)])[0] - - -def _apply_with_random_selector_tuples(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - Args: - x: A tuple of input tensors. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - num_inputs = len(x) - rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - - tuples = [list() for t in x] - for case in range(num_cases): - new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x] - output = func(tuple(new_x), case) - for j in range(num_inputs): - tuples[j].append(output[j]) - - for i in range(num_inputs): - tuples[i] = control_flow_ops.merge(tuples[i])[0] - return tuple(tuples) - - -def _random_integer(minval, maxval, seed): - """Returns a random 0-D tensor between minval and maxval. - - Args: - minval: minimum value of the random tensor. - maxval: maximum value of the random tensor. - seed: random seed. - - Returns: - A random 0-D tensor between minval and maxval. - """ - return tf.random_uniform( - [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed) - - -def normalize_image(image, original_minval, original_maxval, target_minval, - target_maxval): - """Normalizes pixel values in the image. - - Moves the pixel values from the current [original_minval, original_maxval] - range to a the [target_minval, target_maxval] range. - - Args: - image: rank 3 float32 tensor containing 1 - image -> [height, width, channels]. - original_minval: current image minimum value. - original_maxval: current image maximum value. - target_minval: target image minimum value. - target_maxval: target image maximum value. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('NormalizeImage', values=[image]): - original_minval = float(original_minval) - original_maxval = float(original_maxval) - target_minval = float(target_minval) - target_maxval = float(target_maxval) - image = tf.to_float(image) - image = tf.subtract(image, original_minval) - image = tf.multiply(image, (target_maxval - target_minval) / - (original_maxval - original_minval)) - image = tf.add(image, target_minval) - return image - - -def retain_boxes_above_threshold(boxes, - labels, - label_scores, - masks=None, - keypoints=None, - threshold=0.0): - """Retains boxes whose label score is above a given threshold. - - If the label score for a box is missing (represented by NaN), the box is - retained. The boxes that don't pass the threshold will not appear in the - returned tensor. - - Args: - boxes: float32 tensor of shape [num_instance, 4] representing boxes - location in normalized coordinates. - labels: rank 1 int32 tensor of shape [num_instance] containing the object - classes. - label_scores: float32 tensor of shape [num_instance] representing the - score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks are of - the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized - coordinates. - threshold: scalar python float. - - Returns: - retained_boxes: [num_retained_instance, 4] - retianed_labels: [num_retained_instance] - retained_label_scores: [num_retained_instance] - - If masks, or keypoints are not None, the function also returns: - - retained_masks: [num_retained_instance, height, width] - retained_keypoints: [num_retained_instance, num_keypoints, 2] - """ - with tf.name_scope('RetainBoxesAboveThreshold', - values=[boxes, labels, label_scores]): - indices = tf.where( - tf.logical_or(label_scores > threshold, tf.is_nan(label_scores))) - indices = tf.squeeze(indices, axis=1) - retained_boxes = tf.gather(boxes, indices) - retained_labels = tf.gather(labels, indices) - retained_label_scores = tf.gather(label_scores, indices) - result = [retained_boxes, retained_labels, retained_label_scores] - - if masks is not None: - retained_masks = tf.gather(masks, indices) - result.append(retained_masks) - - if keypoints is not None: - retained_keypoints = tf.gather(keypoints, indices) - result.append(retained_keypoints) - - return result - - -def _flip_boxes_left_right(boxes): - """Left-right flip the boxes. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Flipped boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - flipped_xmin = tf.subtract(1.0, xmax) - flipped_xmax = tf.subtract(1.0, xmin) - flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1) - return flipped_boxes - - -def _flip_boxes_up_down(boxes): - """Up-down flip the boxes. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Flipped boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - flipped_ymin = tf.subtract(1.0, ymax) - flipped_ymax = tf.subtract(1.0, ymin) - flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1) - return flipped_boxes - - -def _rot90_boxes(boxes): - """Rotate boxes counter-clockwise by 90 degrees. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - - Returns: - Rotated boxes. - """ - ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1) - rotated_ymin = tf.subtract(1.0, xmax) - rotated_ymax = tf.subtract(1.0, xmin) - rotated_xmin = ymin - rotated_xmax = ymax - rotated_boxes = tf.concat( - [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1) - return rotated_boxes - - -def _flip_masks_left_right(masks): - """Left-right flip masks. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - flipped masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - return masks[:, :, ::-1] - - -def _flip_masks_up_down(masks): - """Up-down flip masks. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - flipped masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - return masks[:, ::-1, :] - - -def _rot90_masks(masks): - """Rotate masks counter-clockwise by 90 degrees. - - Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - - Returns: - rotated masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. - """ - masks = tf.transpose(masks, [0, 2, 1]) - return masks[:, ::-1, :] - - -def random_horizontal_flip(image, - boxes=None, - masks=None, - keypoints=None, - keypoint_flip_permutation=None, - seed=None): - """Randomly flips the image and detections horizontally. - - The probability of flipping the image is 50%. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. - seed: random seed - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, keypoints, and keypoint_flip_permutation are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: if keypoints are provided but keypoint_flip_permutation is not. - """ - - def _flip_image(image): - # flip image - image_flipped = tf.image.flip_left_right(image) - return image_flipped - - if keypoints is not None and keypoint_flip_permutation is None: - raise ValueError( - 'keypoints are provided but keypoints_flip_permutation is not provided') - - with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): - result = [] - # random variable defining whether to do flip or not - do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) - - # flip image - image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None and keypoint_flip_permutation is not None: - permutation = keypoint_flip_permutation - keypoints = tf.cond( - do_a_flip_random, - lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_vertical_flip(image, - boxes=None, - masks=None, - keypoints=None, - keypoint_flip_permutation=None, - seed=None): - """Randomly flips the image and detections vertically. - - The probability of flipping the image is 50%. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. - seed: random seed - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, keypoints, and keypoint_flip_permutation are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: if keypoints are provided but keypoint_flip_permutation is not. - """ - - def _flip_image(image): - # flip image - image_flipped = tf.image.flip_up_down(image) - return image_flipped - - if keypoints is not None and keypoint_flip_permutation is None: - raise ValueError( - 'keypoints are provided but keypoints_flip_permutation is not provided') - - with tf.name_scope('RandomVerticalFlip', values=[image, boxes]): - result = [] - # random variable defining whether to do flip or not - do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) - - # flip image - image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None and keypoint_flip_permutation is not None: - permutation = keypoint_flip_permutation - keypoints = tf.cond( - do_a_flip_random, - lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_rotation90(image, - boxes=None, - masks=None, - keypoints=None, - seed=None): - """Randomly rotates the image and detections 90 degrees counter-clockwise. - - The probability of rotating the image is 50%. This can be combined with - random_horizontal_flip and random_vertical_flip to produce an output with a - uniform distribution of the eight possible 90 degree rotation / reflection - combinations. - - Args: - image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - seed: random seed - - Returns: - image: image which is the same shape as input image. - - If boxes, masks, and keypoints, are not None, - the function also returns the following tensors. - - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def _rot90_image(image): - # flip image - image_rotated = tf.image.rot90(image) - return image_rotated - - with tf.name_scope('RandomRotation90', values=[image, boxes]): - result = [] - - # random variable defining whether to rotate by 90 degrees or not - do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) - - # flip image - image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image), - lambda: image) - result.append(image) - - # flip boxes - if boxes is not None: - boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes), - lambda: boxes) - result.append(boxes) - - # flip masks - if masks is not None: - masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks), - lambda: masks) - result.append(masks) - - # flip keypoints - if keypoints is not None: - keypoints = tf.cond( - do_a_rot90_random, - lambda: keypoint_ops.rot90(keypoints), - lambda: keypoints) - result.append(keypoints) - - return tuple(result) - - -def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None): - """Scales each value in the pixels of the image. - - This function scales each pixel independent of the other ones. - For each value in image tensor, draws a random number between - minval and maxval and multiples the values with them. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - minval: lower ratio of scaling pixel values. - maxval: upper ratio of scaling pixel values. - seed: random seed. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomPixelValueScale', values=[image]): - color_coef = tf.random_uniform( - tf.shape(image), - minval=minval, - maxval=maxval, - dtype=tf.float32, - seed=seed) - image = tf.multiply(image, color_coef) - image = tf.clip_by_value(image, 0.0, 1.0) - - return image - - -def random_image_scale(image, - masks=None, - min_scale_ratio=0.5, - max_scale_ratio=2.0, - seed=None): - """Scales the image size. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels]. - masks: (optional) rank 3 float32 tensor containing masks with - size [height, width, num_masks]. The value is set to None if there are no - masks. - min_scale_ratio: minimum scaling ratio. - max_scale_ratio: maximum scaling ratio. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - masks: If masks is not none, resized masks which are the same rank as input - masks will be returned. - """ - with tf.name_scope('RandomImageScale', values=[image]): - result = [] - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - size_coef = tf.random_uniform([], - minval=min_scale_ratio, - maxval=max_scale_ratio, - dtype=tf.float32, seed=seed) - image_newysize = tf.to_int32( - tf.multiply(tf.to_float(image_height), size_coef)) - image_newxsize = tf.to_int32( - tf.multiply(tf.to_float(image_width), size_coef)) - image = tf.image.resize_images( - image, [image_newysize, image_newxsize], align_corners=True) - result.append(image) - if masks: - masks = tf.image.resize_nearest_neighbor( - masks, [image_newysize, image_newxsize], align_corners=True) - result.append(masks) - return tuple(result) - - -def random_rgb_to_gray(image, probability=0.1, seed=None): - """Changes the image from RGB to Grayscale with the given probability. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - probability: the probability of returning a grayscale image. - The probability should be a number between [0, 1]. - seed: random seed. - - Returns: - image: image which is the same shape as input image. - """ - def _image_to_gray(image): - image_gray1 = tf.image.rgb_to_grayscale(image) - image_gray3 = tf.image.grayscale_to_rgb(image_gray1) - return image_gray3 - - with tf.name_scope('RandomRGBtoGray', values=[image]): - # random variable defining whether to do flip or not - do_gray_random = tf.random_uniform([], seed=seed) - - image = tf.cond( - tf.greater(do_gray_random, probability), lambda: image, - lambda: _image_to_gray(image)) - - return image - - -def random_adjust_brightness(image, max_delta=0.2): - """Randomly adjusts brightness. - - Makes sure the output image is still between 0 and 1. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - max_delta: how much to change the brightness. A value between [0, 1). - - Returns: - image: image which is the same shape as input image. - boxes: boxes which is the same shape as input boxes. - """ - with tf.name_scope('RandomAdjustBrightness', values=[image]): - image = tf.image.random_brightness(image, max_delta) - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) - return image - - -def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25): - """Randomly adjusts contrast. - - Makes sure the output image is still between 0 and 1. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - min_delta: see max_delta. - max_delta: how much to change the contrast. Contrast will change with a - value between min_delta and max_delta. This value will be - multiplied to the current contrast of the image. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustContrast', values=[image]): - image = tf.image.random_contrast(image, min_delta, max_delta) - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) - return image - - -def random_adjust_hue(image, max_delta=0.02): - """Randomly adjusts hue. - - Makes sure the output image is still between 0 and 1. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - max_delta: change hue randomly with a value between 0 and max_delta. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustHue', values=[image]): - image = tf.image.random_hue(image, max_delta) - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) - return image - - -def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25): - """Randomly adjusts saturation. - - Makes sure the output image is still between 0 and 1. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - min_delta: see max_delta. - max_delta: how much to change the saturation. Saturation will change with a - value between min_delta and max_delta. This value will be - multiplied to the current saturation of the image. - - Returns: - image: image which is the same shape as input image. - """ - with tf.name_scope('RandomAdjustSaturation', values=[image]): - image = tf.image.random_saturation(image, min_delta, max_delta) - image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) - return image - - -def random_distort_color(image, color_ordering=0): - """Randomly distorts color. - - Randomly distorts color using a combination of brightness, hue, contrast - and saturation changes. Makes sure the output image is still between 0 and 1. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0, 1). - - Returns: - image: image which is the same shape as input image. - - Raises: - ValueError: if color_ordering is not in {0, 1}. - """ - with tf.name_scope('RandomDistortColor', values=[image]): - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - else: - raise ValueError('color_ordering must be in {0, 1}') - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image - - -def random_jitter_boxes(boxes, ratio=0.05, seed=None): - """Randomly jitter boxes in image. - - Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - ratio: The ratio of the box width and height that the corners can jitter. - For example if the width is 100 pixels and ratio is 0.05, - the corners can jitter up to 5 pixels in the x direction. - seed: random seed. - - Returns: - boxes: boxes which is the same shape as input boxes. - """ - def random_jitter_box(box, ratio, seed): - """Randomly jitter box. - - Args: - box: bounding box [1, 1, 4]. - ratio: max ratio between jittered box and original box, - a number between [0, 0.5]. - seed: random seed. - - Returns: - jittered_box: jittered box. - """ - rand_numbers = tf.random_uniform( - [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed) - box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1]) - box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0]) - hw_coefs = tf.stack([box_height, box_width, box_height, box_width]) - hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers) - jittered_box = tf.add(box, hw_rand_coefs) - jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0) - return jittered_box - - with tf.name_scope('RandomJitterBoxes', values=[boxes]): - # boxes are [N, 4]. Lets first make them [N, 1, 1, 4] - boxes_shape = tf.shape(boxes) - boxes = tf.expand_dims(boxes, 1) - boxes = tf.expand_dims(boxes, 2) - - distorted_boxes = tf.map_fn( - lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32) - - distorted_boxes = tf.reshape(distorted_boxes, boxes_shape) - - return distorted_boxes - - -def _strict_random_crop_image(image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3): - """Performs random crop. - - Note: boxes will be clipped to the crop. Keypoint coordinates that are - outside the crop will be set to NaN, which is consistent with the original - keypoint encoding for non-existing keypoints. This function always crops - the image and is supposed to be used by `random_crop_image` function which - sometimes returns image unchanged. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes with shape - [num_instances, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, masks, or keypoints is not None, the function also returns: - label_scores: rank 1 float32 tensor with shape [num_instances]. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - with tf.name_scope('RandomCropImage', values=[image, boxes]): - image_shape = tf.shape(image) - - # boxes are [N, 4]. Lets first make them [N, 1, 4]. - boxes_expanded = tf.expand_dims( - tf.clip_by_value( - boxes, clip_value_min=0.0, clip_value_max=1.0), 1) - - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - image_shape, - bounding_boxes=boxes_expanded, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=100, - use_image_if_no_bounding_boxes=True) - - im_box_begin, im_box_size, im_box = sample_distorted_bounding_box - - new_image = tf.slice(image, im_box_begin, im_box_size) - new_image.set_shape([None, None, image.get_shape()[2]]) - - # [1, 4] - im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0]) - # [4] - im_box_rank1 = tf.squeeze(im_box) - - boxlist = box_list.BoxList(boxes) - boxlist.add_field('labels', labels) - - if label_scores is not None: - boxlist.add_field('label_scores', label_scores) - - im_boxlist = box_list.BoxList(im_box_rank2) - - # remove boxes that are outside cropped image - boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window( - boxlist, im_box_rank1) - - # remove boxes that are outside image - overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( - boxlist, im_boxlist, overlap_thresh) - - # change the coordinate of the remaining boxes - new_labels = overlapping_boxlist.get_field('labels') - new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, - im_box_rank1) - new_boxes = new_boxlist.get() - new_boxes = tf.clip_by_value( - new_boxes, clip_value_min=0.0, clip_value_max=1.0) - - result = [new_image, new_boxes, new_labels] - - if label_scores is not None: - new_label_scores = overlapping_boxlist.get_field('label_scores') - result.append(new_label_scores) - - if masks is not None: - masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids) - masks_of_boxes_completely_inside_window = tf.gather( - masks_of_boxes_inside_window, keep_ids) - masks_box_begin = [0, im_box_begin[0], im_box_begin[1]] - masks_box_size = [-1, im_box_size[0], im_box_size[1]] - new_masks = tf.slice( - masks_of_boxes_completely_inside_window, - masks_box_begin, masks_box_size) - result.append(new_masks) - - if keypoints is not None: - keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids) - keypoints_of_boxes_completely_inside_window = tf.gather( - keypoints_of_boxes_inside_window, keep_ids) - new_keypoints = keypoint_ops.change_coordinate_frame( - keypoints_of_boxes_completely_inside_window, im_box_rank1) - new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, - [0.0, 0.0, 1.0, 1.0]) - result.append(new_keypoints) - - return tuple(result) - - -def random_crop_image(image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3, - random_coef=0.0, - seed=None): - """Randomly crops the image. - - Given the input image and its bounding boxes, this op randomly - crops a subimage. Given a user-provided set of input constraints, - the crop window is resampled until it satisfies these constraints. - If within 100 trials it is unable to find a valid crop, the original - image is returned. See the Args section for a description of the input - constraints. Both input boxes and returned Boxes are in normalized - form (e.g., lie in the unit square [0, 1]). - This function will return the original image with probability random_coef. - - Note: boxes will be clipped to the crop. Keypoint coordinates that are - outside the crop will be set to NaN, which is consistent with the original - keypoint encoding for non-existing keypoints. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes with shape - [num_instances, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances]. - representing the score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - labels: new labels. - - If label_scores, masks, or keypoints are not None, the function also - returns: - label_scores: new scores. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def strict_random_crop_image_fn(): - return _strict_random_crop_image( - image, - boxes, - labels, - label_scores=label_scores, - masks=masks, - keypoints=keypoints, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh) - - # avoids tf.cond to make faster RCNN training on borg. See b/140057645. - if random_coef < sys.float_info.min: - result = strict_random_crop_image_fn() - else: - do_a_crop_random = tf.random_uniform([], seed=seed) - do_a_crop_random = tf.greater(do_a_crop_random, random_coef) - - outputs = [image, boxes, labels] - - if label_scores is not None: - outputs.append(label_scores) - if masks is not None: - outputs.append(masks) - if keypoints is not None: - outputs.append(keypoints) - - result = tf.cond(do_a_crop_random, strict_random_crop_image_fn, - lambda: tuple(outputs)) - return result - - -def random_pad_image(image, - boxes, - min_image_size=None, - max_image_size=None, - pad_color=None, - seed=None): - """Randomly pads the image. - - This function randomly pads the image with zeros. The final size of the - padded image will be between min_image_size and max_image_size. - if min_image_size is smaller than the input image size, min_image_size will - be set to the input image size. The same for max_image_size. The input image - will be located at a uniformly random location inside the padded image. - The relative location of the boxes to the original image will remain the same. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - min_image_size: a tensor of size [min_height, min_width], type tf.int32. - If passed as None, will be set to image size - [height, width]. - max_image_size: a tensor of size [max_height, max_width], type tf.int32. - If passed as None, will be set to twice the - image [height * 2, width * 2]. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the input - image. - - seed: random seed. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - """ - if pad_color is None: - pad_color = tf.reduce_mean(image, axis=[0, 1]) - - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - - if max_image_size is None: - max_image_size = tf.stack([image_height * 2, image_width * 2]) - max_image_size = tf.maximum(max_image_size, - tf.stack([image_height, image_width])) - - if min_image_size is None: - min_image_size = tf.stack([image_height, image_width]) - min_image_size = tf.maximum(min_image_size, - tf.stack([image_height, image_width])) - - target_height = tf.cond( - max_image_size[0] > min_image_size[0], - lambda: _random_integer(min_image_size[0], max_image_size[0], seed), - lambda: max_image_size[0]) - - target_width = tf.cond( - max_image_size[1] > min_image_size[1], - lambda: _random_integer(min_image_size[1], max_image_size[1], seed), - lambda: max_image_size[1]) - - offset_height = tf.cond( - target_height > image_height, - lambda: _random_integer(0, target_height - image_height, seed), - lambda: tf.constant(0, dtype=tf.int32)) - - offset_width = tf.cond( - target_width > image_width, - lambda: _random_integer(0, target_width - image_width, seed), - lambda: tf.constant(0, dtype=tf.int32)) - - new_image = tf.image.pad_to_bounding_box( - image, - offset_height=offset_height, - offset_width=offset_width, - target_height=target_height, - target_width=target_width) - - # Setting color of the padded pixels - image_ones = tf.ones_like(image) - image_ones_padded = tf.image.pad_to_bounding_box( - image_ones, - offset_height=offset_height, - offset_width=offset_width, - target_height=target_height, - target_width=target_width) - image_color_padded = (1.0 - image_ones_padded) * pad_color - new_image += image_color_padded - - # setting boxes - new_window = tf.to_float( - tf.stack([ - -offset_height, -offset_width, target_height - offset_height, - target_width - offset_width - ])) - new_window /= tf.to_float( - tf.stack([image_height, image_width, image_height, image_width])) - boxlist = box_list.BoxList(boxes) - new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window) - new_boxes = new_boxlist.get() - - return new_image, new_boxes - - -def random_crop_pad_image(image, - boxes, - labels, - label_scores=None, - min_object_covered=1.0, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.1, 1.0), - overlap_thresh=0.3, - random_coef=0.0, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - pad_color=None, - seed=None): - """Randomly crops and pads the image. - - Given an input image and its bounding boxes, this op first randomly crops - the image and then randomly pads the image with background values. Parameters - min_padded_size_ratio and max_padded_size_ratio, determine the range of the - final output image size. Specifically, the final image size will have a size - in the range of min_padded_size_ratio * tf.shape(image) and - max_padded_size_ratio * tf.shape(image). Note that these ratios are with - respect to the size of the original image, so we can't capture the same - effect easily by independently applying RandomCropImage - followed by RandomPadImage. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: rank 1 float32 containing the label scores. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the randomly - cropped image. - seed: random seed. - - Returns: - padded_image: padded image. - padded_boxes: boxes which is the same rank as input boxes. Boxes are in - normalized form. - cropped_labels: cropped labels. - if label_scores is not None also returns: - cropped_label_scores: cropped label scores. - """ - image_size = tf.shape(image) - image_height = image_size[0] - image_width = image_size[1] - result = random_crop_image( - image=image, - boxes=boxes, - labels=labels, - label_scores=label_scores, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - overlap_thresh=overlap_thresh, - random_coef=random_coef, - seed=seed) - - cropped_image, cropped_boxes, cropped_labels = result[:3] - - min_image_size = tf.to_int32( - tf.to_float(tf.stack([image_height, image_width])) * - min_padded_size_ratio) - max_image_size = tf.to_int32( - tf.to_float(tf.stack([image_height, image_width])) * - max_padded_size_ratio) - - padded_image, padded_boxes = random_pad_image( - cropped_image, - cropped_boxes, - min_image_size=min_image_size, - max_image_size=max_image_size, - pad_color=pad_color, - seed=seed) - - cropped_padded_output = (padded_image, padded_boxes, cropped_labels) - - if label_scores is not None: - cropped_label_scores = result[3] - cropped_padded_output += (cropped_label_scores,) - - return cropped_padded_output - - -def random_crop_to_aspect_ratio(image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - aspect_ratio=1.0, - overlap_thresh=0.3, - seed=None): - """Randomly crops an image to the specified aspect ratio. - - Randomly crops the a portion of the image such that the crop is of the - specified aspect ratio, and the crop is as large as possible. If the specified - aspect ratio is larger than the aspect ratio of the image, this op will - randomly remove rows from the top and bottom of the image. If the specified - aspect ratio is less than the aspect ratio of the image, this op will randomly - remove cols from the left and right of the image. If the specified aspect - ratio is the same as the aspect ratio of the image, this op will return the - image. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - aspect_ratio: the aspect ratio of cropped image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, masks, or keypoints is not None, the function also returns: - label_scores: new label scores. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: If image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('RandomCropToAspectRatio', values=[image]): - image_shape = tf.shape(image) - orig_height = image_shape[0] - orig_width = image_shape[1] - orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height) - new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) - def target_height_fn(): - return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio)) - - target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio, - lambda: orig_height, target_height_fn) - - def target_width_fn(): - return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio)) - - target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio, - lambda: orig_width, target_width_fn) - - # either offset_height = 0 and offset_width is randomly chosen from - # [0, offset_width - target_width), or else offset_width = 0 and - # offset_height is randomly chosen from [0, offset_height - target_height) - offset_height = _random_integer(0, orig_height - target_height + 1, seed) - offset_width = _random_integer(0, orig_width - target_width + 1, seed) - new_image = tf.image.crop_to_bounding_box( - image, offset_height, offset_width, target_height, target_width) - - im_box = tf.stack([ - tf.to_float(offset_height) / tf.to_float(orig_height), - tf.to_float(offset_width) / tf.to_float(orig_width), - tf.to_float(offset_height + target_height) / tf.to_float(orig_height), - tf.to_float(offset_width + target_width) / tf.to_float(orig_width) - ]) - - boxlist = box_list.BoxList(boxes) - boxlist.add_field('labels', labels) - - if label_scores is not None: - boxlist.add_field('label_scores', label_scores) - - im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0)) - - # remove boxes whose overlap with the image is less than overlap_thresh - overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes( - boxlist, im_boxlist, overlap_thresh) - - # change the coordinate of the remaining boxes - new_labels = overlapping_boxlist.get_field('labels') - new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist, - im_box) - new_boxlist = box_list_ops.clip_to_window(new_boxlist, - tf.constant([0.0, 0.0, 1.0, 1.0], - tf.float32)) - new_boxes = new_boxlist.get() - - result = [new_image, new_boxes, new_labels] - - if label_scores is not None: - new_label_scores = overlapping_boxlist.get_field('label_scores') - result.append(new_label_scores) - - if masks is not None: - masks_inside_window = tf.gather(masks, keep_ids) - masks_box_begin = tf.stack([0, offset_height, offset_width]) - masks_box_size = tf.stack([-1, target_height, target_width]) - new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size) - result.append(new_masks) - - if keypoints is not None: - keypoints_inside_window = tf.gather(keypoints, keep_ids) - new_keypoints = keypoint_ops.change_coordinate_frame( - keypoints_inside_window, im_box) - new_keypoints = keypoint_ops.prune_outside_window(new_keypoints, - [0.0, 0.0, 1.0, 1.0]) - result.append(new_keypoints) - - return tuple(result) - - -def random_pad_to_aspect_ratio(image, - boxes, - masks=None, - keypoints=None, - aspect_ratio=1.0, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - seed=None): - """Randomly zero pads an image to the specified aspect ratio. - - Pads the image so that the resulting image will have the specified aspect - ratio without scaling less than the min_padded_size_ratio or more than the - max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio - is lower than what is possible to maintain the aspect ratio, then this method - will use the least padding to achieve the specified aspect ratio. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - aspect_ratio: aspect ratio of the final image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, masks, or keypoints is not None, the function also returns: - label_scores: new label scores. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - - Raises: - ValueError: If image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('RandomPadToAspectRatio', values=[image]): - image_shape = tf.shape(image) - image_height = tf.to_float(image_shape[0]) - image_width = tf.to_float(image_shape[1]) - image_aspect_ratio = image_width / image_height - new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32) - target_height = tf.cond( - image_aspect_ratio <= new_aspect_ratio, - lambda: image_height, - lambda: image_width / new_aspect_ratio) - target_width = tf.cond( - image_aspect_ratio >= new_aspect_ratio, - lambda: image_width, - lambda: image_height * new_aspect_ratio) - - min_height = tf.maximum( - min_padded_size_ratio[0] * image_height, target_height) - min_width = tf.maximum( - min_padded_size_ratio[1] * image_width, target_width) - max_height = tf.maximum( - max_padded_size_ratio[0] * image_height, target_height) - max_width = tf.maximum( - max_padded_size_ratio[1] * image_width, target_width) - - min_scale = tf.maximum(min_height / target_height, min_width / target_width) - max_scale = tf.minimum(max_height / target_height, max_width / target_width) - scale = tf.random_uniform([], min_scale, max_scale, seed=seed) - - target_height = scale * target_height - target_width = scale * target_width - - new_image = tf.image.pad_to_bounding_box( - image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width)) - - im_box = tf.stack([ - 0.0, - 0.0, - target_height / image_height, - target_width / image_width - ]) - boxlist = box_list.BoxList(boxes) - new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box) - new_boxes = new_boxlist.get() - - result = [new_image, new_boxes] - - if masks is not None: - new_masks = tf.expand_dims(masks, -1) - new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0, - tf.to_int32(target_height), - tf.to_int32(target_width)) - new_masks = tf.squeeze(new_masks, [-1]) - result.append(new_masks) - - if keypoints is not None: - new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box) - result.append(new_keypoints) - - return tuple(result) - - -def random_black_patches(image, - max_black_patches=10, - probability=0.5, - size_to_image_ratio=0.1, - random_seed=None): - """Randomly adds some black patches to the image. - - This op adds up to max_black_patches square black patches of a fixed size - to the image where size is specified via the size_to_image_ratio parameter. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - max_black_patches: number of times that the function tries to add a - black box to the image. - probability: at each try, what is the chance of adding a box. - size_to_image_ratio: Determines the ratio of the size of the black patches - to the size of the image. - box_size = size_to_image_ratio * - min(image_width, image_height) - random_seed: random seed. - - Returns: - image - """ - def add_black_patch_to_image(image): - """Function for adding one patch to the image. - - Args: - image: image - - Returns: - image with a randomly added black box - """ - image_shape = tf.shape(image) - image_height = image_shape[0] - image_width = image_shape[1] - box_size = tf.to_int32( - tf.multiply( - tf.minimum(tf.to_float(image_height), tf.to_float(image_width)), - size_to_image_ratio)) - normalized_y_min = tf.random_uniform( - [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed) - normalized_x_min = tf.random_uniform( - [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed) - y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height)) - x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width)) - black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32) - mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min, - image_height, image_width) - image = tf.multiply(image, mask) - return image - - with tf.name_scope('RandomBlackPatchInImage', values=[image]): - for _ in range(max_black_patches): - random_prob = tf.random_uniform( - [], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed) - image = tf.cond( - tf.greater(random_prob, probability), lambda: image, - lambda: add_black_patch_to_image(image)) - - return image - - -def image_to_float(image): - """Used in Faster R-CNN. Casts image pixel values to float. - - Args: - image: input image which might be in tf.uint8 or sth else format - - Returns: - image: image in tf.float32 format. - """ - with tf.name_scope('ImageToFloat', values=[image]): - image = tf.to_float(image) - return image - - -def random_resize_method(image, target_size): - """Uses a random resize method to resize the image to target size. - - Args: - image: a rank 3 tensor. - target_size: a list of [target_height, target_width] - - Returns: - resized image. - """ - - resized_image = _apply_with_random_selector( - image, - lambda x, method: tf.image.resize_images(x, target_size, method), - num_cases=4) - - return resized_image - - -def _compute_new_static_size(image, min_dimension, max_dimension): - """Compute new static shape for resize_to_range method.""" - image_shape = image.get_shape().as_list() - orig_height = image_shape[0] - orig_width = image_shape[1] - orig_min_dim = min(orig_height, orig_width) - # Calculates the larger of the possible sizes - large_scale_factor = min_dimension / float(orig_min_dim) - # Scaling orig_(height|width) by large_scale_factor will make the smaller - # dimension equal to min_dimension, save for floating point rounding errors. - # For reasonably-sized images, taking the nearest integer will reliably - # eliminate this error. - large_height = int(round(orig_height * large_scale_factor)) - large_width = int(round(orig_width * large_scale_factor)) - large_size = [large_height, large_width] - if max_dimension: - # Calculates the smaller of the possible sizes, use that if the larger - # is too big. - orig_max_dim = max(orig_height, orig_width) - small_scale_factor = max_dimension / float(orig_max_dim) - # Scaling orig_(height|width) by small_scale_factor will make the larger - # dimension equal to max_dimension, save for floating point rounding - # errors. For reasonably-sized images, taking the nearest integer will - # reliably eliminate this error. - small_height = int(round(orig_height * small_scale_factor)) - small_width = int(round(orig_width * small_scale_factor)) - small_size = [small_height, small_width] - new_size = large_size - if max(large_size) > max_dimension: - new_size = small_size - else: - new_size = large_size - return tf.constant(new_size) - - -def _compute_new_dynamic_size(image, min_dimension, max_dimension): - """Compute new dynamic shape for resize_to_range method.""" - image_shape = tf.shape(image) - orig_height = tf.to_float(image_shape[0]) - orig_width = tf.to_float(image_shape[1]) - orig_min_dim = tf.minimum(orig_height, orig_width) - # Calculates the larger of the possible sizes - min_dimension = tf.constant(min_dimension, dtype=tf.float32) - large_scale_factor = min_dimension / orig_min_dim - # Scaling orig_(height|width) by large_scale_factor will make the smaller - # dimension equal to min_dimension, save for floating point rounding errors. - # For reasonably-sized images, taking the nearest integer will reliably - # eliminate this error. - large_height = tf.to_int32(tf.round(orig_height * large_scale_factor)) - large_width = tf.to_int32(tf.round(orig_width * large_scale_factor)) - large_size = tf.stack([large_height, large_width]) - if max_dimension: - # Calculates the smaller of the possible sizes, use that if the larger - # is too big. - orig_max_dim = tf.maximum(orig_height, orig_width) - max_dimension = tf.constant(max_dimension, dtype=tf.float32) - small_scale_factor = max_dimension / orig_max_dim - # Scaling orig_(height|width) by small_scale_factor will make the larger - # dimension equal to max_dimension, save for floating point rounding - # errors. For reasonably-sized images, taking the nearest integer will - # reliably eliminate this error. - small_height = tf.to_int32(tf.round(orig_height * small_scale_factor)) - small_width = tf.to_int32(tf.round(orig_width * small_scale_factor)) - small_size = tf.stack([small_height, small_width]) - new_size = tf.cond( - tf.to_float(tf.reduce_max(large_size)) > max_dimension, - lambda: small_size, lambda: large_size) - else: - new_size = large_size - return new_size - - -def resize_to_range(image, - masks=None, - min_dimension=None, - max_dimension=None, - method=tf.image.ResizeMethod.BILINEAR, - align_corners=False): - """Resizes an image so its dimensions are within the provided value. - - The output size can be described by two cases: - 1. If the image can be rescaled so its minimum dimension is equal to the - provided value without the other dimension exceeding max_dimension, - then do so. - 2. Otherwise, resize so the largest dimension is equal to max_dimension. - - Args: - image: A 3D tensor of shape [height, width, channels] - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. - min_dimension: (optional) (scalar) desired size of the smaller image - dimension. - max_dimension: (optional) (scalar) maximum allowed size - of the larger image dimension. - method: (optional) interpolation method used in resizing. Defaults to - BILINEAR. - align_corners: bool. If true, exactly align all 4 corners of the input - and output. Defaults to False. - - Returns: - A 3D tensor of shape [new_height, new_width, channels], - where the image has been resized (with bilinear interpolation) so that - min(new_height, new_width) == min_dimension or - max(new_height, new_width) == max_dimension. - - If masks is not None, also outputs masks: - A 3D tensor of shape [num_instances, new_height, new_width] - - Raises: - ValueError: if the image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('ResizeToRange', values=[image, min_dimension]): - if image.get_shape().is_fully_defined(): - new_size = _compute_new_static_size(image, min_dimension, max_dimension) - else: - new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension) - new_image = tf.image.resize_images( - image, new_size, method=method, align_corners=align_corners) - - result = new_image - if masks is not None: - new_masks = tf.expand_dims(masks, 3) - new_masks = tf.image.resize_nearest_neighbor( - new_masks, new_size, align_corners=align_corners) - new_masks = tf.squeeze(new_masks, 3) - result = [new_image, new_masks] - - return result - - -# TODO: Make sure the static shapes are preserved. -def resize_to_min_dimension(image, masks=None, min_dimension=600): - """Resizes image and masks given the min size maintaining the aspect ratio. - - If one of the image dimensions is smaller that min_dimension, it will scale - the image such that its smallest dimension is equal to min_dimension. - Otherwise, will keep the image size as is. - - Args: - image: a tensor of size [height, width, channels]. - masks: (optional) a tensors of size [num_instances, height, width]. - min_dimension: minimum image dimension. - - Returns: - a tuple containing the following: - Resized image. A tensor of size [new_height, new_width, channels]. - (optional) Resized masks. A tensor of - size [num_instances, new_height, new_width]. - - Raises: - ValueError: if the image is not a 3D tensor. - """ - if len(image.get_shape()) != 3: - raise ValueError('Image should be 3D tensor') - - with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]): - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] - min_image_dimension = tf.minimum(image_height, image_width) - min_target_dimension = tf.maximum(min_image_dimension, min_dimension) - target_ratio = tf.to_float(min_target_dimension) / tf.to_float( - min_image_dimension) - target_height = tf.to_int32(tf.to_float(image_height) * target_ratio) - target_width = tf.to_int32(tf.to_float(image_width) * target_ratio) - image = tf.image.resize_bilinear( - tf.expand_dims(image, axis=0), - size=[target_height, target_width], - align_corners=True) - result = tf.squeeze(image, axis=0) - if masks is not None: - masks = tf.image.resize_nearest_neighbor( - tf.expand_dims(masks, axis=3), - size=[target_height, target_width], - align_corners=True) - result = (result, tf.squeeze(masks, axis=3)) - return result - - -def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): - """Scales boxes from normalized to pixel coordinates. - - Args: - image: A 3D float32 tensor of shape [height, width, channels]. - boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding - boxes in normalized coordinates. Each row is of the form - [ymin, xmin, ymax, xmax]. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized - coordinates. - - Returns: - image: unchanged input image. - scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the - bounding boxes in pixel coordinates. - scaled_keypoints: a 3D float32 tensor with shape - [num_instances, num_keypoints, 2] containing the keypoints in pixel - coordinates. - """ - boxlist = box_list.BoxList(boxes) - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] - scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get() - result = [image, scaled_boxes] - if keypoints is not None: - scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width) - result.append(scaled_keypoints) - return tuple(result) - - -# pylint: disable=g-doc-return-or-yield -def resize_image(image, - masks=None, - new_height=600, - new_width=1024, - method=tf.image.ResizeMethod.BILINEAR, - align_corners=False): - """See `tf.image.resize_images` for detailed doc.""" - with tf.name_scope( - 'ResizeImage', - values=[image, new_height, new_width, method, align_corners]): - new_image = tf.image.resize_images( - image, [new_height, new_width], - method=method, - align_corners=align_corners) - result = new_image - if masks is not None: - num_instances = tf.shape(masks)[0] - new_size = tf.constant([new_height, new_width], dtype=tf.int32) - def resize_masks_branch(): - new_masks = tf.expand_dims(masks, 3) - new_masks = tf.image.resize_nearest_neighbor( - new_masks, new_size, align_corners=align_corners) - new_masks = tf.squeeze(new_masks, axis=3) - return new_masks - - def reshape_masks_branch(): - new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]]) - return new_masks - - masks = tf.cond(num_instances > 0, resize_masks_branch, - reshape_masks_branch) - result = [new_image, masks] - - return result - - -def subtract_channel_mean(image, means=None): - """Normalizes an image by subtracting a mean from each channel. - - Args: - image: A 3D tensor of shape [height, width, channels] - means: float list containing a mean for each channel - Returns: - normalized_images: a tensor of shape [height, width, channels] - Raises: - ValueError: if images is not a 4D tensor or if the number of means is not - equal to the number of channels. - """ - with tf.name_scope('SubtractChannelMean', values=[image, means]): - if len(image.get_shape()) != 3: - raise ValueError('Input must be of size [height, width, channels]') - if len(means) != image.get_shape()[-1]: - raise ValueError('len(means) must match the number of channels') - return image - [[means]] - - -def one_hot_encoding(labels, num_classes=None): - """One-hot encodes the multiclass labels. - - Example usage: - labels = tf.constant([1, 4], dtype=tf.int32) - one_hot = OneHotEncoding(labels, num_classes=5) - one_hot.eval() # evaluates to [0, 1, 0, 0, 1] - - Args: - labels: A tensor of shape [None] corresponding to the labels. - num_classes: Number of classes in the dataset. - Returns: - onehot_labels: a tensor of shape [num_classes] corresponding to the one hot - encoding of the labels. - Raises: - ValueError: if num_classes is not specified. - """ - with tf.name_scope('OneHotEncoding', values=[labels]): - if num_classes is None: - raise ValueError('num_classes must be specified') - - labels = tf.one_hot(labels, num_classes, 1, 0) - return tf.reduce_max(labels, 0) - - -def rgb_to_gray(image): - """Converts a 3 channel RGB image to a 1 channel grayscale image. - - Args: - image: Rank 3 float32 tensor containing 1 image -> [height, width, 3] - with pixel values varying between [0, 1]. - - Returns: - image: A single channel grayscale image -> [image, height, 1]. - """ - return tf.image.rgb_to_grayscale(image) - - -def ssd_random_crop(image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio_range=((0.5, 2.0),) * 7, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - seed=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: rank 1 float32 tensor containing the scores. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If label_scores, masks, or keypoints is not None, the function also returns: - label_scores: new label scores. - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - - def random_crop_selector(selected_result, index): - """Applies random_crop_image to selected result. - - Args: - selected_result: A tuple containing image, boxes, labels, keypoints (if - not None), and masks (if not None). - index: The index that was randomly selected. - - Returns: A tuple containing image, boxes, labels, keypoints (if not None), - and masks (if not None). - """ - i = 3 - image, boxes, labels = selected_result[:i] - selected_label_scores = None - selected_masks = None - selected_keypoints = None - if label_scores is not None: - selected_label_scores = selected_result[i] - i += 1 - if masks is not None: - selected_masks = selected_result[i] - i += 1 - if keypoints is not None: - selected_keypoints = selected_result[i] - - return random_crop_image( - image=image, - boxes=boxes, - labels=labels, - label_scores=selected_label_scores, - masks=selected_masks, - keypoints=selected_keypoints, - min_object_covered=min_object_covered[index], - aspect_ratio_range=aspect_ratio_range[index], - area_range=area_range[index], - overlap_thresh=overlap_thresh[index], - random_coef=random_coef[index], - seed=seed) - - result = _apply_with_random_selector_tuples( - tuple( - t for t in (image, boxes, labels, label_scores, masks, keypoints) - if t is not None), - random_crop_selector, - num_cases=len(min_object_covered)) - return result - - -def ssd_random_crop_pad(image, - boxes, - labels, - label_scores=None, - min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio_range=((0.5, 2.0),) * 6, - area_range=((0.1, 1.0),) * 6, - overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 6, - min_padded_size_ratio=((1.0, 1.0),) * 6, - max_padded_size_ratio=((2.0, 2.0),) * 6, - pad_color=(None,) * 6, - seed=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - Args: - image: rank 3 float32 tensor containing 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: float32 tensor of shape [num_instances] representing the - score for each box. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. - if set as None, it will be set to average color of the randomly - cropped image. - seed: random seed. - - Returns: - image: Image shape will be [new_height, new_width, channels]. - boxes: boxes which is the same rank as input boxes. Boxes are in normalized - form. - new_labels: new labels. - new_label_scores: new label scores. - """ - - def random_crop_pad_selector(image_boxes_labels, index): - i = 3 - image, boxes, labels = image_boxes_labels[:i] - selected_label_scores = None - if label_scores is not None: - selected_label_scores = image_boxes_labels[i] - - return random_crop_pad_image( - image, - boxes, - labels, - selected_label_scores, - min_object_covered=min_object_covered[index], - aspect_ratio_range=aspect_ratio_range[index], - area_range=area_range[index], - overlap_thresh=overlap_thresh[index], - random_coef=random_coef[index], - min_padded_size_ratio=min_padded_size_ratio[index], - max_padded_size_ratio=max_padded_size_ratio[index], - pad_color=pad_color[index], - seed=seed) - - return _apply_with_random_selector_tuples( - tuple(t for t in (image, boxes, labels, label_scores) if t is not None), - random_crop_pad_selector, - num_cases=len(min_object_covered)) - - -def ssd_random_crop_fixed_aspect_ratio( - image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio=1.0, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - seed=None): - """Random crop preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - The only difference is that the aspect ratio of the crops are fixed. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio: aspect ratio of the cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If masks or keypoints is not None, the function also returns: - - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range) - - crop_result = ssd_random_crop( - image, boxes, labels, label_scores, masks, keypoints, min_object_covered, - aspect_ratio_range, area_range, overlap_thresh, random_coef, seed) - i = 3 - new_image, new_boxes, new_labels = crop_result[:i] - new_label_scores = None - new_masks = None - new_keypoints = None - if label_scores is not None: - new_label_scores = crop_result[i] - i += 1 - if masks is not None: - new_masks = crop_result[i] - i += 1 - if keypoints is not None: - new_keypoints = crop_result[i] - result = random_crop_to_aspect_ratio( - new_image, - new_boxes, - new_labels, - new_label_scores, - new_masks, - new_keypoints, - aspect_ratio=aspect_ratio, - seed=seed) - - return result - - -def ssd_random_crop_pad_fixed_aspect_ratio( - image, - boxes, - labels, - label_scores=None, - masks=None, - keypoints=None, - min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - aspect_ratio=1.0, - aspect_ratio_range=((0.5, 2.0),) * 7, - area_range=((0.1, 1.0),) * 7, - overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), - random_coef=(0.15,) * 7, - min_padded_size_ratio=(1.0, 1.0), - max_padded_size_ratio=(2.0, 2.0), - seed=None): - """Random crop and pad preprocessing with default parameters as in SSD paper. - - Liu et al., SSD: Single shot multibox detector. - For further information on random crop preprocessing refer to RandomCrop - function above. - - The only difference is that after the initial crop, images are zero-padded - to a fixed aspect ratio instead of being resized to that aspect ratio. - - Args: - image: rank 3 float32 tensor contains 1 image -> [height, width, channels] - with pixel values varying between [0, 1]. - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - labels: rank 1 int32 tensor containing the object classes. - label_scores: (optional) float32 tensor of shape [num_instances] - representing the score for each box. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. - min_object_covered: the cropped image must cover at least this fraction of - at least one of the input bounding boxes. - aspect_ratio: the final aspect ratio to pad to. - aspect_ratio_range: allowed range for aspect ratio of cropped image. - area_range: allowed range for area ratio between cropped image and the - original image. - overlap_thresh: minimum overlap thresh with new cropped - image to keep the box. - random_coef: a random coefficient that defines the chance of getting the - original image. If random_coef is 0, we will always get the - cropped image, and if it is 1.0, we will always get the - original image. - min_padded_size_ratio: min ratio of padded image height and width to the - input image's height and width. - max_padded_size_ratio: max ratio of padded image height and width to the - input image's height and width. - seed: random seed. - - Returns: - image: image which is the same rank as input image. - boxes: boxes which is the same rank as input boxes. - Boxes are in normalized form. - labels: new labels. - - If masks or keypoints is not None, the function also returns: - - masks: rank 3 float32 tensor with shape [num_instances, height, width] - containing instance masks. - keypoints: rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2] - """ - crop_result = ssd_random_crop( - image, boxes, labels, label_scores, masks, keypoints, min_object_covered, - aspect_ratio_range, area_range, overlap_thresh, random_coef, seed) - i = 3 - new_image, new_boxes, new_labels = crop_result[:i] - new_label_scores = None - new_masks = None - new_keypoints = None - if label_scores is not None: - new_label_scores = crop_result[i] - i += 1 - if masks is not None: - new_masks = crop_result[i] - i += 1 - if keypoints is not None: - new_keypoints = crop_result[i] - result = random_pad_to_aspect_ratio( - new_image, - new_boxes, - new_masks, - new_keypoints, - aspect_ratio=aspect_ratio, - min_padded_size_ratio=min_padded_size_ratio, - max_padded_size_ratio=max_padded_size_ratio, - seed=seed) - - result = list(result) - if new_label_scores is not None: - result.insert(2, new_label_scores) - result.insert(2, new_labels) - result = tuple(result) - - return result - - -def get_default_func_arg_map(include_label_scores=False, - include_instance_masks=False, - include_keypoints=False): - """Returns the default mapping from a preprocessor function to its args. - - Args: - include_label_scores: If True, preprocessing functions will modify the - label scores, too. - include_instance_masks: If True, preprocessing functions will modify the - instance masks, too. - include_keypoints: If True, preprocessing functions will modify the - keypoints, too. - - Returns: - A map from preprocessing functions to the arguments they receive. - """ - groundtruth_label_scores = None - if include_label_scores: - groundtruth_label_scores = (fields.InputDataFields.groundtruth_label_scores) - - groundtruth_instance_masks = None - if include_instance_masks: - groundtruth_instance_masks = ( - fields.InputDataFields.groundtruth_instance_masks) - - groundtruth_keypoints = None - if include_keypoints: - groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints - - prep_func_arg_map = { - normalize_image: (fields.InputDataFields.image,), - random_horizontal_flip: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_vertical_flip: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_rotation90: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_pixel_value_scale: (fields.InputDataFields.image,), - random_image_scale: ( - fields.InputDataFields.image, - groundtruth_instance_masks,), - random_rgb_to_gray: (fields.InputDataFields.image,), - random_adjust_brightness: (fields.InputDataFields.image,), - random_adjust_contrast: (fields.InputDataFields.image,), - random_adjust_hue: (fields.InputDataFields.image,), - random_adjust_saturation: (fields.InputDataFields.image,), - random_distort_color: (fields.InputDataFields.image,), - random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,), - random_crop_image: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_pad_image: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes), - random_crop_pad_image: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores), - random_crop_to_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_pad_to_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_instance_masks, - groundtruth_keypoints,), - random_black_patches: (fields.InputDataFields.image,), - retain_boxes_above_threshold: ( - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - image_to_float: (fields.InputDataFields.image,), - random_resize_method: (fields.InputDataFields.image,), - resize_to_range: ( - fields.InputDataFields.image, - groundtruth_instance_masks,), - resize_to_min_dimension: ( - fields.InputDataFields.image, - groundtruth_instance_masks,), - scale_boxes_to_pixel_coordinates: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - groundtruth_keypoints,), - resize_image: ( - fields.InputDataFields.image, - groundtruth_instance_masks,), - subtract_channel_mean: (fields.InputDataFields.image,), - one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,), - rgb_to_gray: (fields.InputDataFields.image,), - ssd_random_crop: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - ssd_random_crop_pad: (fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores), - ssd_random_crop_fixed_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - ssd_random_crop_pad_fixed_aspect_ratio: ( - fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - groundtruth_label_scores, - groundtruth_instance_masks, - groundtruth_keypoints,), - } - - return prep_func_arg_map - - -def preprocess(tensor_dict, preprocess_options, func_arg_map=None): - """Preprocess images and bounding boxes. - - Various types of preprocessing (to be implemented) based on the - preprocess_options dictionary e.g. "crop image" (affects image and possibly - boxes), "white balance image" (affects only image), etc. If self._options - is None, no preprocessing is done. - - Args: - tensor_dict: dictionary that contains images, boxes, and can contain other - things as well. - images-> rank 4 float32 tensor contains - 1 image -> [1, height, width, 3]. - with pixel values varying between [0, 1] - boxes-> rank 2 float32 tensor containing - the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning - their coordinates vary between [0, 1]. - Each row is in the form - of [ymin, xmin, ymax, xmax]. - preprocess_options: It is a list of tuples, where each tuple contains a - function and a dictionary that contains arguments and - their values. - func_arg_map: mapping from preprocessing functions to arguments that they - expect to receive and return. - - Returns: - tensor_dict: which contains the preprocessed images, bounding boxes, etc. - - Raises: - ValueError: (a) If the functions passed to Preprocess - are not in func_arg_map. - (b) If the arguments that a function needs - do not exist in tensor_dict. - (c) If image in tensor_dict is not rank 4 - """ - if func_arg_map is None: - func_arg_map = get_default_func_arg_map() - - # changes the images to image (rank 4 to rank 3) since the functions - # receive rank 3 tensor for image - if fields.InputDataFields.image in tensor_dict: - images = tensor_dict[fields.InputDataFields.image] - if len(images.get_shape()) != 4: - raise ValueError('images in tensor_dict should be rank 4') - image = tf.squeeze(images, squeeze_dims=[0]) - tensor_dict[fields.InputDataFields.image] = image - - # Preprocess inputs based on preprocess_options - for option in preprocess_options: - func, params = option - if func not in func_arg_map: - raise ValueError('The function %s does not exist in func_arg_map' % - (func.__name__)) - arg_names = func_arg_map[func] - for a in arg_names: - if a is not None and a not in tensor_dict: - raise ValueError('The function %s requires argument %s' % - (func.__name__, a)) - - def get_arg(key): - return tensor_dict[key] if key is not None else None - - args = [get_arg(a) for a in arg_names] - results = func(*args, **params) - if not isinstance(results, (list, tuple)): - results = (results,) - # Removes None args since the return values will not contain those. - arg_names = [arg_name for arg_name in arg_names if arg_name is not None] - for res, arg_name in zip(results, arg_names): - tensor_dict[arg_name] = res - - # changes the image to images (rank 3 to rank 4) to be compatible to what - # we received in the first place - if fields.InputDataFields.image in tensor_dict: - image = tensor_dict[fields.InputDataFields.image] - images = tf.expand_dims(image, 0) - tensor_dict[fields.InputDataFields.image] = images - - return tensor_dict diff --git a/object_detection/core/preprocessor_test.py b/object_detection/core/preprocessor_test.py deleted file mode 100644 index a163bea0..00000000 --- a/object_detection/core/preprocessor_test.py +++ /dev/null @@ -1,2288 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.preprocessor.""" - -import numpy as np -import six - -import tensorflow as tf - -from object_detection.core import preprocessor -from object_detection.core import standard_fields as fields - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - from unittest import mock # pylint: disable=g-import-not-at-top - - -class PreprocessorTest(tf.test.TestCase): - - def createColorfulTestImage(self): - ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8)) - ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8)) - ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8)) - imr = tf.concat([ch255, ch0, ch0], 3) - img = tf.concat([ch255, ch255, ch0], 3) - imb = tf.concat([ch255, ch0, ch255], 3) - imw = tf.concat([ch128, ch128, ch128], 3) - imu = tf.concat([imr, img], 2) - imd = tf.concat([imb, imw], 2) - im = tf.concat([imu, imd], 1) - return im - - def createTestImages(self): - images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128], - [0, 128, 128, 128], [192, 192, 128, 128]]], - dtype=tf.uint8) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128], - [0, 128, 192, 192], [192, 192, 128, 192]]], - dtype=tf.uint8) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192], - [0, 128, 128, 0], [192, 192, 192, 128]]], - dtype=tf.uint8) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def createEmptyTestBoxes(self): - boxes = tf.constant([[]], dtype=tf.float32) - return boxes - - def createTestBoxes(self): - boxes = tf.constant( - [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) - return boxes - - def createTestLabelScores(self): - return tf.constant([1.0, 0.5], dtype=tf.float32) - - def createTestLabelScoresWithMissingScore(self): - return tf.constant([0.5, np.nan], dtype=tf.float32) - - def createTestMasks(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]], - [[255.0, 255.0, 0.0], - [255.0, 255.0, 0.0], - [255.0, 255.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def createTestKeypoints(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createTestKeypointsInsideCrop(self): - keypoints = np.array([ - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createTestKeypointsOutsideCrop(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def createKeypointFlipPermutation(self): - return np.array([0, 2, 1], dtype=np.int32) - - def createTestLabels(self): - labels = tf.constant([1, 2], dtype=tf.int32) - return labels - - def createTestBoxesOutOfImage(self): - boxes = tf.constant( - [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32) - return boxes - - def expectedImagesAfterNormalization(self): - images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0], - [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0], - [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5], - [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedMaxImageAfterColorScale(self): - images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], - [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1], - [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6], - [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedMinImageAfterColorScale(self): - images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1], - [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1], - [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4], - [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterLeftRightFlip(self): - images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1], - [0, 0, 0, -1], [0, 0, 0.5, 0.5]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1], - [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1], - [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterUpDownFlip(self): - images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0], - [-1, -1, 0, 0], [0, 0, 0, 0]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5], - [-1, -1, 0, 0], [-1, -1, 0, 0]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1], - [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedImagesAfterRot90(self): - images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0], - [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], - dtype=tf.float32) - images_r = tf.expand_dims(images_r, 3) - images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0], - [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]], - dtype=tf.float32) - images_g = tf.expand_dims(images_g, 3) - images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5], - [0, -1, 0, 0.5], [0, -1, -1, 0.5]]], - dtype=tf.float32) - images_b = tf.expand_dims(images_b, 3) - images = tf.concat([images_r, images_g, images_b], 3) - return images - - def expectedBoxesAfterLeftRightFlip(self): - boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]], - dtype=tf.float32) - return boxes - - def expectedBoxesAfterUpDownFlip(self): - boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]], - dtype=tf.float32) - return boxes - - def expectedBoxesAfterRot90(self): - boxes = tf.constant( - [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32) - return boxes - - def expectedMasksAfterLeftRightFlip(self): - mask = np.array([ - [[0.0, 0.0, 255.0], - [0.0, 0.0, 255.0], - [0.0, 0.0, 255.0]], - [[0.0, 255.0, 255.0], - [0.0, 255.0, 255.0], - [0.0, 255.0, 255.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedMasksAfterUpDownFlip(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]], - [[255.0, 255.0, 0.0], - [255.0, 255.0, 0.0], - [255.0, 255.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedMasksAfterRot90(self): - mask = np.array([ - [[0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [255.0, 255.0, 255.0]], - [[0.0, 0.0, 0.0], - [255.0, 255.0, 255.0], - [255.0, 255.0, 255.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedLabelScoresAfterThresholding(self): - return tf.constant([1.0], dtype=tf.float32) - - def expectedBoxesAfterThresholding(self): - return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32) - - def expectedLabelsAfterThresholding(self): - return tf.constant([1], dtype=tf.float32) - - def expectedMasksAfterThresholding(self): - mask = np.array([ - [[255.0, 0.0, 0.0], - [255.0, 0.0, 0.0], - [255.0, 0.0, 0.0]]]) - return tf.constant(mask, dtype=tf.float32) - - def expectedKeypointsAfterThresholding(self): - keypoints = np.array([ - [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] - ]) - return tf.constant(keypoints, dtype=tf.float32) - - def expectedLabelScoresAfterThresholdingWithMissingScore(self): - return tf.constant([np.nan], dtype=tf.float32) - - def expectedBoxesAfterThresholdingWithMissingScore(self): - return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32) - - def expectedLabelsAfterThresholdingWithMissingScore(self): - return tf.constant([2], dtype=tf.float32) - - def testNormalizeImage(self): - preprocess_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 256, - 'target_minval': -1, - 'target_maxval': 1 - })] - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - images_expected = self.expectedImagesAfterNormalization() - - with self.test_session() as sess: - (images_, images_expected_) = sess.run( - [images, images_expected]) - images_shape_ = images_.shape - images_expected_shape_ = images_expected_.shape - expected_shape = [1, 4, 4, 3] - self.assertAllEqual(images_expected_shape_, images_shape_) - self.assertAllEqual(images_shape_, expected_shape) - self.assertAllClose(images_, images_expected_) - - def testRetainBoxesAboveThreshold(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - (retained_boxes, retained_labels, - retained_label_scores) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, threshold=0.6) - with self.test_session() as sess: - (retained_boxes_, retained_labels_, retained_label_scores_, - expected_retained_boxes_, expected_retained_labels_, - expected_retained_label_scores_) = sess.run([ - retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholding(), - self.expectedLabelsAfterThresholding(), - self.expectedLabelScoresAfterThresholding()]) - self.assertAllClose( - retained_boxes_, expected_retained_boxes_) - self.assertAllClose( - retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testRetainBoxesAboveThresholdWithMasks(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - masks = self.createTestMasks() - _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, masks, threshold=0.6) - with self.test_session() as sess: - retained_masks_, expected_retained_masks_ = sess.run([ - retained_masks, self.expectedMasksAfterThresholding()]) - - self.assertAllClose( - retained_masks_, expected_retained_masks_) - - def testRetainBoxesAboveThresholdWithKeypoints(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - keypoints = self.createTestKeypoints() - (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, keypoints=keypoints, threshold=0.6) - with self.test_session() as sess: - (retained_keypoints_, - expected_retained_keypoints_) = sess.run([ - retained_keypoints, - self.expectedKeypointsAfterThresholding()]) - - self.assertAllClose( - retained_keypoints_, expected_retained_keypoints_) - - def testRetainBoxesAboveThresholdWithMissingScore(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScoresWithMissingScore() - (retained_boxes, retained_labels, - retained_label_scores) = preprocessor.retain_boxes_above_threshold( - boxes, labels, label_scores, threshold=0.6) - with self.test_session() as sess: - (retained_boxes_, retained_labels_, retained_label_scores_, - expected_retained_boxes_, expected_retained_labels_, - expected_retained_label_scores_) = sess.run([ - retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholdingWithMissingScore(), - self.expectedLabelsAfterThresholdingWithMissingScore(), - self.expectedLabelScoresAfterThresholdingWithMissingScore()]) - self.assertAllClose( - retained_boxes_, expected_retained_boxes_) - self.assertAllClose( - retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testFlipBoxesLeftRight(self): - boxes = self.createTestBoxes() - flipped_boxes = preprocessor._flip_boxes_left_right(boxes) - expected_boxes = self.expectedBoxesAfterLeftRightFlip() - with self.test_session() as sess: - flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) - self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) - - def testFlipBoxesUpDown(self): - boxes = self.createTestBoxes() - flipped_boxes = preprocessor._flip_boxes_up_down(boxes) - expected_boxes = self.expectedBoxesAfterUpDownFlip() - with self.test_session() as sess: - flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes]) - self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten()) - - def testRot90Boxes(self): - boxes = self.createTestBoxes() - rotated_boxes = preprocessor._rot90_boxes(boxes) - expected_boxes = self.expectedBoxesAfterRot90() - with self.test_session() as sess: - rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes]) - self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten()) - - def testFlipMasksLeftRight(self): - test_mask = self.createTestMasks() - flipped_mask = preprocessor._flip_masks_left_right(test_mask) - expected_mask = self.expectedMasksAfterLeftRightFlip() - with self.test_session() as sess: - flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) - self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) - - def testFlipMasksUpDown(self): - test_mask = self.createTestMasks() - flipped_mask = preprocessor._flip_masks_up_down(test_mask) - expected_mask = self.expectedMasksAfterUpDownFlip() - with self.test_session() as sess: - flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask]) - self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten()) - - def testRot90Masks(self): - test_mask = self.createTestMasks() - rotated_mask = preprocessor._rot90_masks(test_mask) - expected_mask = self.expectedMasksAfterRot90() - with self.test_session() as sess: - rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask]) - self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten()) - - def testRandomHorizontalFlip(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterLeftRightFlip() - boxes_expected1 = self.expectedBoxesAfterLeftRightFlip() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomHorizontalFlipWithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterLeftRightFlip() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRunRandomHorizontalFlipWithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_horizontal_flip, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - keypoint_flip_permutation = self.createKeypointFlipPermutation() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocess_options = [ - (preprocessor.random_horizontal_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomVerticalFlip(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterUpDownFlip() - boxes_expected1 = self.expectedBoxesAfterUpDownFlip() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomVerticalFlipWithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterUpDownFlip() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRunRandomVerticalFlipWithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_vertical_flip, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - keypoint_flip_permutation = self.createKeypointFlipPermutation() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocess_options = [ - (preprocessor.random_vertical_flip, - {'keypoint_flip_permutation': keypoint_flip_permutation})] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomRotation90(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterRot90() - boxes_expected1 = self.expectedBoxesAfterRot90() - images_expected2 = images - boxes_expected2 = boxes - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - boxes_diff1 = tf.squared_difference(boxes, boxes_expected1) - boxes_diff2 = tf.squared_difference(boxes, boxes_expected2) - boxes_diff = tf.multiply(boxes_diff1, boxes_diff2) - boxes_diff_expected = tf.zeros_like(boxes_diff) - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_diff_, - boxes_diff_expected_) = sess.run([images_diff, images_diff_expected, - boxes_diff, boxes_diff_expected]) - self.assertAllClose(boxes_diff_, boxes_diff_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRandomRotation90WithEmptyBoxes(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - images = self.expectedImagesAfterNormalization() - boxes = self.createEmptyTestBoxes() - tensor_dict = {fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes} - images_expected1 = self.expectedImagesAfterRot90() - boxes_expected = self.createEmptyTestBoxes() - images_expected2 = images - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images = tensor_dict[fields.InputDataFields.image] - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - - images_diff1 = tf.squared_difference(images, images_expected1) - images_diff2 = tf.squared_difference(images, images_expected2) - images_diff = tf.multiply(images_diff1, images_diff2) - images_diff_expected = tf.zeros_like(images_diff) - - with self.test_session() as sess: - (images_diff_, images_diff_expected_, boxes_, - boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes, - boxes_expected]) - self.assertAllClose(boxes_, boxes_expected_) - self.assertAllClose(images_diff_, images_diff_expected_) - - def testRunRandomRotation90WithMaskAndKeypoints(self): - preprocess_options = [(preprocessor.random_rotation90, {})] - image_height = 3 - image_width = 3 - images = tf.random_uniform([1, image_height, image_width, 3]) - boxes = self.createTestBoxes() - masks = self.createTestMasks() - keypoints = self.createTestKeypoints() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_instance_masks: masks, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True, include_keypoints=True) - tensor_dict = preprocessor.preprocess( - tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) - boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] - keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - boxes, masks, keypoints = sess.run([boxes, masks, keypoints]) - self.assertTrue(boxes is not None) - self.assertTrue(masks is not None) - self.assertTrue(keypoints is not None) - - def testRandomPixelValueScale(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_pixel_value_scale, {})) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_min = tf.to_float(images) * 0.9 / 255.0 - images_max = tf.to_float(images) * 1.1 / 255.0 - images = tensor_dict[fields.InputDataFields.image] - values_greater = tf.greater_equal(images, images_min) - values_less = tf.less_equal(images, images_max) - values_true = tf.fill([1, 4, 4, 3], True) - with self.test_session() as sess: - (values_greater_, values_less_, values_true_) = sess.run( - [values_greater, values_less, values_true]) - self.assertAllClose(values_greater_, values_true_) - self.assertAllClose(values_less_, values_true_) - - def testRandomImageScale(self): - preprocess_options = [(preprocessor.random_image_scale, {})] - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images_scaled = tensor_dict[fields.InputDataFields.image] - images_original_shape = tf.shape(images_original) - images_scaled_shape = tf.shape(images_scaled) - with self.test_session() as sess: - (images_original_shape_, images_scaled_shape_) = sess.run( - [images_original_shape, images_scaled_shape]) - self.assertTrue( - images_original_shape_[1] * 0.5 <= images_scaled_shape_[1]) - self.assertTrue( - images_original_shape_[1] * 2.0 >= images_scaled_shape_[1]) - self.assertTrue( - images_original_shape_[2] * 0.5 <= images_scaled_shape_[2]) - self.assertTrue( - images_original_shape_[2] * 2.0 >= images_scaled_shape_[2]) - - def testRandomRGBtoGray(self): - preprocess_options = [(preprocessor.random_rgb_to_gray, {})] - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options) - images_gray = tensor_dict[fields.InputDataFields.image] - images_gray_r, images_gray_g, images_gray_b = tf.split( - value=images_gray, num_or_size_splits=3, axis=3) - images_r, images_g, images_b = tf.split( - value=images_original, num_or_size_splits=3, axis=3) - images_r_diff1 = tf.squared_difference(tf.to_float(images_r), - tf.to_float(images_gray_r)) - images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r), - tf.to_float(images_gray_g)) - images_r_diff = tf.multiply(images_r_diff1, images_r_diff2) - images_g_diff1 = tf.squared_difference(tf.to_float(images_g), - tf.to_float(images_gray_g)) - images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g), - tf.to_float(images_gray_b)) - images_g_diff = tf.multiply(images_g_diff1, images_g_diff2) - images_b_diff1 = tf.squared_difference(tf.to_float(images_b), - tf.to_float(images_gray_b)) - images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b), - tf.to_float(images_gray_r)) - images_b_diff = tf.multiply(images_b_diff1, images_b_diff2) - image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1]) - with self.test_session() as sess: - (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run( - [images_r_diff, images_g_diff, images_b_diff, image_zero1]) - self.assertAllClose(images_r_diff_, image_zero1_) - self.assertAllClose(images_g_diff_, image_zero1_) - self.assertAllClose(images_b_diff_, image_zero1_) - - def testRandomAdjustBrightness(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_brightness, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_bright = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_bright_shape = tf.shape(images_bright) - with self.test_session() as sess: - (image_original_shape_, image_bright_shape_) = sess.run( - [image_original_shape, image_bright_shape]) - self.assertAllEqual(image_original_shape_, image_bright_shape_) - - def testRandomAdjustContrast(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_contrast, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_contrast = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_contrast_shape = tf.shape(images_contrast) - with self.test_session() as sess: - (image_original_shape_, image_contrast_shape_) = sess.run( - [image_original_shape, image_contrast_shape]) - self.assertAllEqual(image_original_shape_, image_contrast_shape_) - - def testRandomAdjustHue(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_adjust_hue, {})) - images_original = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_hue = tensor_dict[fields.InputDataFields.image] - image_original_shape = tf.shape(images_original) - image_hue_shape = tf.shape(images_hue) - with self.test_session() as sess: - (image_original_shape_, image_hue_shape_) = sess.run( - [image_original_shape, image_hue_shape]) - self.assertAllEqual(image_original_shape_, image_hue_shape_) - - def testRandomDistortColor(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_distort_color, {})) - images_original = self.createTestImages() - images_original_shape = tf.shape(images_original) - tensor_dict = {fields.InputDataFields.image: images_original} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images_distorted_color = tensor_dict[fields.InputDataFields.image] - images_distorted_color_shape = tf.shape(images_distorted_color) - with self.test_session() as sess: - (images_original_shape_, images_distorted_color_shape_) = sess.run( - [images_original_shape, images_distorted_color_shape]) - self.assertAllEqual(images_original_shape_, images_distorted_color_shape_) - - def testRandomJitterBoxes(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.random_jitter_boxes, {})) - boxes = self.createTestBoxes() - boxes_shape = tf.shape(boxes) - tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - distorted_boxes_shape = tf.shape(distorted_boxes) - - with self.test_session() as sess: - (boxes_shape_, distorted_boxes_shape_) = sess.run( - [boxes_shape, distorted_boxes_shape]) - self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) - - def testRandomCropImage(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_crop_image, {})) - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - self.assertEqual(3, distorted_images.get_shape()[3]) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageGrayscale(self): - preprocessing_options = [(preprocessor.rgb_to_gray, {}), - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1, - }), - (preprocessor.random_crop_image, {})] - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - self.assertEqual(1, distorted_images.get_shape()[3]) - - with self.test_session() as sess: - session_results = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = session_results - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageWithBoxOutOfImage(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_crop_image, {})) - images = self.createTestImages() - boxes = self.createTestBoxesOutOfImage() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testRandomCropImageWithRandomCoefOne(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_image, { - 'random_coef': 1.0 - })] - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_label_scores = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_label_scores] - boxes_shape = tf.shape(boxes) - distorted_boxes_shape = tf.shape(distorted_boxes) - images_shape = tf.shape(images) - distorted_images_shape = tf.shape(distorted_images) - - with self.test_session() as sess: - (boxes_shape_, distorted_boxes_shape_, images_shape_, - distorted_images_shape_, images_, distorted_images_, - boxes_, distorted_boxes_, labels_, distorted_labels_, - label_scores_, distorted_label_scores_) = sess.run( - [boxes_shape, distorted_boxes_shape, images_shape, - distorted_images_shape, images, distorted_images, - boxes, distorted_boxes, labels, distorted_labels, - label_scores, distorted_label_scores]) - self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) - self.assertAllEqual(images_shape_, distorted_images_shape_) - self.assertAllClose(images_, distorted_images_) - self.assertAllClose(boxes_, distorted_boxes_) - self.assertAllEqual(labels_, distorted_labels_) - self.assertAllEqual(label_scores_, distorted_label_scores_) - - def testRandomCropWithMockSampleDistortedBoundingBox(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createColorfulTestImage() - boxes = tf.constant([[0.1, 0.1, 0.8, 0.3], - [0.2, 0.4, 0.75, 0.75], - [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32) - labels = tf.constant([1, 7, 11], dtype=tf.int32) - - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_image, {})] - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = (tf.constant( - [6, 143, 0], dtype=tf.int32), tf.constant( - [190, 237, -1], dtype=tf.int32), tf.constant( - [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733], - [0.28421, 0.0, 0.38947365, 0.57805908]], - dtype=tf.float32) - expected_labels = tf.constant([7, 11], dtype=tf.int32) - - with self.test_session() as sess: - (distorted_boxes_, distorted_labels_, - expected_boxes_, expected_labels_) = sess.run( - [distorted_boxes, distorted_labels, - expected_boxes, expected_labels]) - self.assertAllClose(distorted_boxes_, expected_boxes_) - self.assertAllEqual(distorted_labels_, expected_labels_) - - def testStrictRandomCropImageWithLabelScores(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_label_scores = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, label_scores)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_label_scores = ( - sess.run( - [new_image, new_boxes, new_labels, new_label_scores]) - ) - - expected_boxes = np.array( - [[0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllEqual(new_label_scores, [1.0, 0.5]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - - def testStrictRandomCropImageWithMasks(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_masks = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, masks=masks)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_masks = sess.run( - [new_image, new_boxes, new_labels, new_masks]) - expected_boxes = np.array( - [[0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllEqual(new_masks.shape, [2, 190, 237]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - - def testStrictRandomCropImageWithKeypoints(self): - image = self.createColorfulTestImage()[0] - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - new_image, new_boxes, new_labels, new_keypoints = ( - preprocessor._strict_random_crop_image( - image, boxes, labels, keypoints=keypoints)) - with self.test_session() as sess: - new_image, new_boxes, new_labels, new_keypoints = sess.run( - [new_image, new_boxes, new_labels, new_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32) - expected_keypoints = np.array([ - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]] - ], dtype=np.float32) - self.assertAllEqual(new_image.shape, [190, 237, 3]) - self.assertAllClose( - new_boxes.flatten(), expected_boxes.flatten()) - self.assertAllClose( - new_keypoints.flatten(), expected_keypoints.flatten()) - - def testRunRandomCropImageWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks, - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_masks]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_masks_.shape, [2, 190, 237]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - - def testRunRandomCropImageWithKeypointsInsideCrop(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypointsInsideCrop() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - expected_keypoints = np.array([ - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]], - [[0.38947368, 0.07173], - [0.49473682, 0.24050637], - [0.60000002, 0.40928277]] - ]) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - self.assertAllClose( - distorted_keypoints_.flatten(), expected_keypoints.flatten()) - - def testRunRandomCropImageWithKeypointsOutsideCrop(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypointsOutsideCrop() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_image, {})] - - with mock.patch.object( - tf.image, - 'sample_distorted_bounding_box' - ) as mock_sample_distorted_bounding_box: - mock_sample_distorted_bounding_box.return_value = ( - tf.constant([6, 143, 0], dtype=tf.int32), - tf.constant([190, 237, -1], dtype=tf.int32), - tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32)) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run( - [distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints]) - - expected_boxes = np.array([ - [0.0, 0.0, 0.75789469, 1.0], - [0.23157893, 0.24050637, 0.75789469, 1.0], - ], dtype=np.float32) - expected_keypoints = np.array([ - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - [[np.nan, np.nan], - [np.nan, np.nan], - [np.nan, np.nan]], - ]) - self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose( - distorted_boxes_.flatten(), expected_boxes.flatten()) - self.assertAllClose( - distorted_keypoints_.flatten(), expected_keypoints.flatten()) - - def testRunRetainBoxesAboveThreshold(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores - } - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True) - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_boxes = retained_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - retained_labels = retained_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - retained_label_scores = retained_tensor_dict[ - fields.InputDataFields.groundtruth_label_scores] - - with self.test_session() as sess: - (retained_boxes_, retained_labels_, - retained_label_scores_, expected_retained_boxes_, - expected_retained_labels_, expected_retained_label_scores_) = sess.run( - [retained_boxes, retained_labels, retained_label_scores, - self.expectedBoxesAfterThresholding(), - self.expectedLabelsAfterThresholding(), - self.expectedLabelScoresAfterThresholding()]) - - self.assertAllClose(retained_boxes_, expected_retained_boxes_) - self.assertAllClose(retained_labels_, expected_retained_labels_) - self.assertAllClose( - retained_label_scores_, expected_retained_label_scores_) - - def testRunRetainBoxesAboveThresholdWithMasks(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - masks = self.createTestMasks() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True, - include_instance_masks=True) - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_masks = retained_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - - with self.test_session() as sess: - (retained_masks_, expected_masks_) = sess.run( - [retained_masks, - self.expectedMasksAfterThresholding()]) - self.assertAllClose(retained_masks_, expected_masks_) - - def testRunRetainBoxesAboveThresholdWithKeypoints(self): - boxes = self.createTestBoxes() - labels = self.createTestLabels() - label_scores = self.createTestLabelScores() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_label_scores: label_scores, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=True, - include_keypoints=True) - - preprocessing_options = [ - (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6}) - ] - - retained_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - retained_keypoints = retained_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - - with self.test_session() as sess: - (retained_keypoints_, expected_keypoints_) = sess.run( - [retained_keypoints, - self.expectedKeypointsAfterThresholding()]) - self.assertAllClose(retained_keypoints_, expected_keypoints_) - - def testRunRandomCropToAspectRatioWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] - - with mock.patch.object(preprocessor, - '_random_integer') as mock_random_integer: - mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, distorted_masks - ]) - - expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) - self.assertAllEqual(distorted_labels_, [1]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllEqual(distorted_masks_.shape, [1, 200, 200]) - - def testRunRandomCropToAspectRatioWithKeypoints(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})] - - with mock.patch.object(preprocessor, - '_random_integer') as mock_random_integer: - mock_random_integer.return_value = tf.constant(0, dtype=tf.int32) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints - ]) - - expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32) - expected_keypoints = np.array( - [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3]) - self.assertAllEqual(distorted_labels_, [1]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllClose(distorted_keypoints_.flatten(), - expected_keypoints.flatten()) - - def testRunRandomPadToAspectRatioWithMasks(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - masks = tf.random_uniform([2, 200, 400], dtype=tf.float32) - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_instance_masks: masks - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_instance_masks=True) - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] - - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_masks = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_masks_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, distorted_masks - ]) - - expected_boxes = np.array( - [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllEqual(distorted_masks_.shape, [2, 400, 400]) - - def testRunRandomPadToAspectRatioWithKeypoints(self): - image = self.createColorfulTestImage() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - keypoints = self.createTestKeypoints() - - tensor_dict = { - fields.InputDataFields.image: image, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - fields.InputDataFields.groundtruth_keypoints: keypoints - } - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_keypoints=True) - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})] - - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_image = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - distorted_labels = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - distorted_keypoints = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] - with self.test_session() as sess: - (distorted_image_, distorted_boxes_, distorted_labels_, - distorted_keypoints_) = sess.run([ - distorted_image, distorted_boxes, distorted_labels, - distorted_keypoints - ]) - - expected_boxes = np.array( - [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32) - expected_keypoints = np.array([ - [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]], - [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]], - ], dtype=np.float32) - self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3]) - self.assertAllEqual(distorted_labels_, [1, 2]) - self.assertAllClose(distorted_boxes_.flatten(), - expected_boxes.flatten()) - self.assertAllClose(distorted_keypoints_.flatten(), - expected_keypoints.flatten()) - - def testRandomPadImage(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_pad_image, {})] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_, boxes_, padded_boxes_) = sess.run( - [boxes_shape, padded_boxes_shape, images_shape, - padded_images_shape, boxes, padded_boxes]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) - self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) - self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) - self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) - self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( - padded_boxes_[:, 2] - padded_boxes_[:, 0]))) - self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( - padded_boxes_[:, 3] - padded_boxes_[:, 1]))) - - def testRandomCropPadImageWithRandomCoefOne(self): - preprocessing_options = [(preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })] - - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_pad_image, { - 'random_coef': 1.0 - })] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_, boxes_, padded_boxes_) = sess.run( - [boxes_shape, padded_boxes_shape, images_shape, - padded_images_shape, boxes, padded_boxes]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all) - self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all) - self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all) - self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all) - self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= ( - padded_boxes_[:, 2] - padded_boxes_[:, 0]))) - self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( - padded_boxes_[:, 3] - padded_boxes_[:, 1]))) - - def testRandomCropToAspectRatio(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, []) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, { - 'aspect_ratio': 2.0 - })] - cropped_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - cropped_images = cropped_tensor_dict[fields.InputDataFields.image] - cropped_boxes = cropped_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - cropped_boxes_shape = tf.shape(cropped_boxes) - images_shape = tf.shape(images) - cropped_images_shape = tf.shape(cropped_images) - - with self.test_session() as sess: - (boxes_shape_, cropped_boxes_shape_, images_shape_, - cropped_images_shape_) = sess.run([ - boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape - ]) - self.assertAllEqual(boxes_shape_, cropped_boxes_shape_) - self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2) - self.assertEqual(images_shape_[2], cropped_images_shape_[2]) - - def testRandomPadToAspectRatio(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - tensor_dict = preprocessor.preprocess(tensor_dict, []) - images = tensor_dict[fields.InputDataFields.image] - - preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, { - 'aspect_ratio': 2.0 - })] - padded_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - - padded_images = padded_tensor_dict[fields.InputDataFields.image] - padded_boxes = padded_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - boxes_shape = tf.shape(boxes) - padded_boxes_shape = tf.shape(padded_boxes) - images_shape = tf.shape(images) - padded_images_shape = tf.shape(padded_images) - - with self.test_session() as sess: - (boxes_shape_, padded_boxes_shape_, images_shape_, - padded_images_shape_) = sess.run([ - boxes_shape, padded_boxes_shape, images_shape, padded_images_shape - ]) - self.assertAllEqual(boxes_shape_, padded_boxes_shape_) - self.assertEqual(images_shape_[1], padded_images_shape_[1]) - self.assertEqual(2 * images_shape_[2], padded_images_shape_[2]) - - def testRandomBlackPatches(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_black_patches, { - 'size_to_image_ratio': 0.5 - })) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - blacked_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - blacked_images = blacked_tensor_dict[fields.InputDataFields.image] - images_shape = tf.shape(images) - blacked_images_shape = tf.shape(blacked_images) - - with self.test_session() as sess: - (images_shape_, blacked_images_shape_) = sess.run( - [images_shape, blacked_images_shape]) - self.assertAllEqual(images_shape_, blacked_images_shape_) - - def testRandomResizeMethod(self): - preprocessing_options = [] - preprocessing_options.append((preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - })) - preprocessing_options.append((preprocessor.random_resize_method, { - 'target_size': (75, 150) - })) - images = self.createTestImages() - tensor_dict = {fields.InputDataFields.image: images} - resized_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - resized_images = resized_tensor_dict[fields.InputDataFields.image] - resized_images_shape = tf.shape(resized_images) - expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32) - - with self.test_session() as sess: - (expected_images_shape_, resized_images_shape_) = sess.run( - [expected_images_shape, resized_images_shape]) - self.assertAllEqual(expected_images_shape_, - resized_images_shape_) - - def testResizeImageWithMasks(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - height = 50 - width = 100 - expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_image( - in_image, in_masks, new_height=height, new_width=width) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeImageWithNoInstanceMask(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - height = 50 - width = 100 - expected_image_shape_list = [[50, 100, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_image( - in_image, in_masks, new_height=height, new_width=width) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRangePreservesStaticSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] - min_dim = 50 - max_dim = 100 - expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.random_uniform(in_shape) - out_image = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - self.assertAllEqual(out_image.get_shape().as_list(), expected_shape) - - def testResizeToRangeWithDynamicSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]] - min_dim = 50 - max_dim = 100 - expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - out_image = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - with self.test_session() as sess: - out_image_shape = sess.run(out_image_shape, - feed_dict={in_image: - np.random.randn(*in_shape)}) - self.assertAllEqual(out_image_shape, expected_shape) - - def testResizeToRangeWithMasksPreservesStaticSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape) - self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape) - - def testResizeToRangeWithMasksAndDynamicSpatialShape(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 40], [10, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape], - feed_dict={ - in_image: np.random.randn(*in_image_shape), - in_masks: np.random.randn(*in_masks_shape) - }) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - min_dim = 50 - max_dim = 100 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_to_range( - in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToRange4DImageTensor(self): - image = tf.random_uniform([1, 200, 300, 3]) - with self.assertRaises(ValueError): - preprocessor.resize_to_range(image, 500, 600) - - def testResizeToRangeSameMinMax(self): - """Tests image resizing, checking output sizes.""" - in_shape_list = [[312, 312, 3], [299, 299, 3]] - min_dim = 320 - max_dim = 320 - expected_shape_list = [[320, 320, 3], [320, 320, 3]] - - for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): - in_image = tf.random_uniform(in_shape) - out_image = preprocessor.resize_to_range( - in_image, min_dimension=min_dim, max_dimension=max_dim) - out_image_shape = tf.shape(out_image) - - with self.test_session() as sess: - out_image_shape = sess.run(out_image_shape) - self.assertAllEqual(out_image_shape, expected_shape) - - def testResizeToMinDimensionTensorShapes(self): - in_image_shape_list = [[60, 55, 3], [15, 30, 3]] - in_masks_shape_list = [[15, 60, 55], [10, 15, 30]] - min_dim = 50 - expected_image_shape_list = [[60, 55, 3], [50, 100, 3]] - expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_to_min_dimension( - in_image, in_masks, min_dimension=min_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape], - feed_dict={ - in_image: np.random.randn(*in_image_shape), - in_masks: np.random.randn(*in_masks_shape) - }) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self): - """Tests image resizing, checking output sizes.""" - in_image_shape_list = [[60, 40, 3], [15, 30, 3]] - in_masks_shape_list = [[0, 60, 40], [0, 15, 30]] - min_dim = 50 - expected_image_shape_list = [[75, 50, 3], [50, 100, 3]] - expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]] - - for (in_image_shape, expected_image_shape, in_masks_shape, - expected_mask_shape) in zip(in_image_shape_list, - expected_image_shape_list, - in_masks_shape_list, - expected_masks_shape_list): - in_image = tf.random_uniform(in_image_shape) - in_masks = tf.random_uniform(in_masks_shape) - out_image, out_masks = preprocessor.resize_to_min_dimension( - in_image, in_masks, min_dimension=min_dim) - out_image_shape = tf.shape(out_image) - out_masks_shape = tf.shape(out_masks) - - with self.test_session() as sess: - out_image_shape, out_masks_shape = sess.run( - [out_image_shape, out_masks_shape]) - self.assertAllEqual(out_image_shape, expected_image_shape) - self.assertAllEqual(out_masks_shape, expected_mask_shape) - - def testResizeToMinDimensionRaisesErrorOn4DImage(self): - image = tf.random_uniform([1, 200, 300, 3]) - with self.assertRaises(ValueError): - preprocessor.resize_to_min_dimension(image, 500) - - def testScaleBoxesToPixelCoordinates(self): - """Tests box scaling, checking scaled values.""" - in_shape = [60, 40, 3] - in_boxes = [[0.1, 0.2, 0.4, 0.6], - [0.5, 0.3, 0.9, 0.7]] - - expected_boxes = [[6., 8., 24., 24.], - [30., 12., 54., 28.]] - - in_image = tf.random_uniform(in_shape) - in_boxes = tf.constant(in_boxes) - _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates( - in_image, boxes=in_boxes) - with self.test_session() as sess: - out_boxes = sess.run(out_boxes) - self.assertAllClose(out_boxes, expected_boxes) - - def testScaleBoxesToPixelCoordinatesWithKeypoints(self): - """Tests box and keypoint scaling, checking scaled values.""" - in_shape = [60, 40, 3] - in_boxes = self.createTestBoxes() - in_keypoints = self.createTestKeypoints() - - expected_boxes = [[0., 10., 45., 40.], - [15., 20., 45., 40.]] - expected_keypoints = [ - [[6., 4.], [12., 8.], [18., 12.]], - [[24., 16.], [30., 20.], [36., 24.]], - ] - - in_image = tf.random_uniform(in_shape) - _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates( - in_image, boxes=in_boxes, keypoints=in_keypoints) - with self.test_session() as sess: - out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints]) - self.assertAllClose(out_boxes_, expected_boxes) - self.assertAllClose(out_keypoints_, expected_keypoints) - - def testSubtractChannelMean(self): - """Tests whether channel means have been subtracted.""" - with self.test_session(): - image = tf.zeros((240, 320, 3)) - means = [1, 2, 3] - actual = preprocessor.subtract_channel_mean(image, means=means) - actual = actual.eval() - - self.assertTrue((actual[:, :, 0] == -1).all()) - self.assertTrue((actual[:, :, 1] == -2).all()) - self.assertTrue((actual[:, :, 2] == -3).all()) - - def testOneHotEncoding(self): - """Tests one hot encoding of multiclass labels.""" - with self.test_session(): - labels = tf.constant([1, 4, 2], dtype=tf.int32) - one_hot = preprocessor.one_hot_encoding(labels, num_classes=5) - one_hot = one_hot.eval() - - self.assertAllEqual([0, 1, 1, 0, 1], one_hot) - - def testSSDRandomCrop(self): - preprocessing_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop, {})] - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testSSDRandomCropPad(self): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - preprocessing_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop_pad, {})] - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels, - } - distorted_tensor_dict = preprocessor.preprocess(tensor_dict, - preprocessing_options) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run([ - boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank - ]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def _testSSDRandomCropFixedAspectRatio(self, - include_label_scores, - include_instance_masks, - include_keypoints): - images = self.createTestImages() - boxes = self.createTestBoxes() - labels = self.createTestLabels() - preprocessing_options = [ - (preprocessor.normalize_image, { - 'original_minval': 0, - 'original_maxval': 255, - 'target_minval': 0, - 'target_maxval': 1 - }), - (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})] - tensor_dict = { - fields.InputDataFields.image: images, - fields.InputDataFields.groundtruth_boxes: boxes, - fields.InputDataFields.groundtruth_classes: labels - } - if include_label_scores: - label_scores = self.createTestLabelScores() - tensor_dict[fields.InputDataFields.groundtruth_label_scores] = ( - label_scores) - if include_instance_masks: - masks = self.createTestMasks() - tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks - if include_keypoints: - keypoints = self.createTestKeypoints() - tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints - - preprocessor_arg_map = preprocessor.get_default_func_arg_map( - include_label_scores=include_label_scores, - include_instance_masks=include_instance_masks, - include_keypoints=include_keypoints) - distorted_tensor_dict = preprocessor.preprocess( - tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map) - distorted_images = distorted_tensor_dict[fields.InputDataFields.image] - distorted_boxes = distorted_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] - images_rank = tf.rank(images) - distorted_images_rank = tf.rank(distorted_images) - boxes_rank = tf.rank(boxes) - distorted_boxes_rank = tf.rank(distorted_boxes) - - with self.test_session() as sess: - (boxes_rank_, distorted_boxes_rank_, images_rank_, - distorted_images_rank_) = sess.run( - [boxes_rank, distorted_boxes_rank, images_rank, - distorted_images_rank]) - self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) - self.assertAllEqual(images_rank_, distorted_images_rank_) - - def testSSDRandomCropFixedAspectRatio(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=False, - include_instance_masks=False, - include_keypoints=False) - - def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=False, - include_instance_masks=True, - include_keypoints=True) - - def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self): - self._testSSDRandomCropFixedAspectRatio(include_label_scores=True, - include_instance_masks=True, - include_keypoints=True) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/region_similarity_calculator.py b/object_detection/core/region_similarity_calculator.py deleted file mode 100644 index f344006a..00000000 --- a/object_detection/core/region_similarity_calculator.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Region Similarity Calculators for BoxLists. - -Region Similarity Calculators compare a pairwise measure of similarity -between the boxes in two BoxLists. -""" -from abc import ABCMeta -from abc import abstractmethod - -import tensorflow as tf - -from object_detection.core import box_list_ops - - -class RegionSimilarityCalculator(object): - """Abstract base class for region similarity calculator.""" - __metaclass__ = ABCMeta - - def compare(self, boxlist1, boxlist2, scope=None): - """Computes matrix of pairwise similarity between BoxLists. - - This op (to be overriden) computes a measure of pairwise similarity between - the boxes in the given BoxLists. Higher values indicate more similarity. - - Note that this method simply measures similarity and does not explicitly - perform a matching. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - scope: Op scope name. Defaults to 'Compare' if None. - - Returns: - a (float32) tensor of shape [N, M] with pairwise similarity score. - """ - with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope: - return self._compare(boxlist1, boxlist2) - - @abstractmethod - def _compare(self, boxlist1, boxlist2): - pass - - -class IouSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on Intersection over Union (IOU) metric. - - This class computes pairwise similarity between two BoxLists based on IOU. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute pairwise IOU similarity between the two BoxLists. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing pairwise iou scores. - """ - return box_list_ops.iou(boxlist1, boxlist2) - - -class NegSqDistSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on the squared distance metric. - - This class computes pairwise similarity between two BoxLists based on the - negative squared distance metric. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute matrix of (negated) sq distances. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing negated pairwise squared distance. - """ - return -1 * box_list_ops.sq_dist(boxlist1, boxlist2) - - -class IoaSimilarity(RegionSimilarityCalculator): - """Class to compute similarity based on Intersection over Area (IOA) metric. - - This class computes pairwise similarity between two BoxLists based on their - pairwise intersections divided by the areas of second BoxLists. - """ - - def _compare(self, boxlist1, boxlist2): - """Compute pairwise IOA similarity between the two BoxLists. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - - Returns: - A tensor with shape [N, M] representing pairwise IOA scores. - """ - return box_list_ops.ioa(boxlist1, boxlist2) diff --git a/object_detection/core/region_similarity_calculator_test.py b/object_detection/core/region_similarity_calculator_test.py deleted file mode 100644 index 162151a3..00000000 --- a/object_detection/core/region_similarity_calculator_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for region_similarity_calculator.""" -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import region_similarity_calculator - - -class RegionSimilarityCalculatorTest(tf.test.TestCase): - - def test_get_correct_pairwise_similarity_based_on_iou(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - iou_similarity_calculator = region_similarity_calculator.IouSimilarity() - iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2) - with self.test_session() as sess: - iou_output = sess.run(iou_similarity) - self.assertAllClose(iou_output, exp_output) - - def test_get_correct_pairwise_similarity_based_on_squared_distances(self): - corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 0.0, 2.0]]) - corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0], - [-4.0, 0.0, 0.0, 3.0], - [0.0, 0.0, 0.0, 0.0]]) - exp_output = [[-26, -25, 0], [-18, -27, -6]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - dist_similarity = dist_similarity_calc.compare(boxes1, boxes2) - with self.test_session() as sess: - dist_output = sess.run(dist_similarity) - self.assertAllClose(dist_output, exp_output) - - def test_get_correct_pairwise_similarity_based_on_ioa(self): - corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) - corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]]) - exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0], - [1.0 / 12.0, 0.0, 5.0 / 400.0]] - exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0], - [0, 0], - [6.0 / 6.0, 5.0 / 5.0]] - boxes1 = box_list.BoxList(corners1) - boxes2 = box_list.BoxList(corners2) - ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity() - ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2) - ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1) - with self.test_session() as sess: - iou_output_1, iou_output_2 = sess.run( - [ioa_similarity_1, ioa_similarity_2]) - self.assertAllClose(iou_output_1, exp_output_1) - self.assertAllClose(iou_output_2, exp_output_2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/core/standard_fields.py b/object_detection/core/standard_fields.py deleted file mode 100644 index 7cbf5ee8..00000000 --- a/object_detection/core/standard_fields.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains classes specifying naming conventions used for object detection. - - -Specifies: - InputDataFields: standard fields used by reader/preprocessor/batcher. - DetectionResultFields: standard fields returned by object detector. - BoxListFields: standard field used by BoxList - TfExampleFields: standard fields for tf-example data format (go/tf-example). -""" - - -class InputDataFields(object): - """Names for the input tensors. - - Holds the standard data field names to use for identifying input tensors. This - should be used by the decoder to identify keys for the returned tensor_dict - containing input tensors. And it should be used by the model to identify the - tensors it needs. - - Attributes: - image: image. - original_image: image in the original input size. - key: unique key corresponding to image. - source_id: source of the original image. - filename: original filename of the dataset (without common path). - groundtruth_image_classes: image-level class labels. - groundtruth_boxes: coordinates of the ground truth boxes in the image. - groundtruth_classes: box-level class labels. - groundtruth_label_types: box-level label types (e.g. explicit negative). - groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead] - is the groundtruth a single object or a crowd. - groundtruth_area: area of a groundtruth segment. - groundtruth_difficult: is a `difficult` object - groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the - same class, forming a connected group, where instances are heavily - occluding each other. - proposal_boxes: coordinates of object proposal boxes. - proposal_objectness: objectness score of each proposal. - groundtruth_instance_masks: ground truth instance masks. - groundtruth_instance_boundaries: ground truth instance boundaries. - groundtruth_instance_classes: instance mask-level class labels. - groundtruth_keypoints: ground truth keypoints. - groundtruth_keypoint_visibilities: ground truth keypoint visibilities. - groundtruth_label_scores: groundtruth label scores. - """ - image = 'image' - original_image = 'original_image' - key = 'key' - source_id = 'source_id' - filename = 'filename' - groundtruth_image_classes = 'groundtruth_image_classes' - groundtruth_boxes = 'groundtruth_boxes' - groundtruth_classes = 'groundtruth_classes' - groundtruth_label_types = 'groundtruth_label_types' - groundtruth_is_crowd = 'groundtruth_is_crowd' - groundtruth_area = 'groundtruth_area' - groundtruth_difficult = 'groundtruth_difficult' - groundtruth_group_of = 'groundtruth_group_of' - proposal_boxes = 'proposal_boxes' - proposal_objectness = 'proposal_objectness' - groundtruth_instance_masks = 'groundtruth_instance_masks' - groundtruth_instance_boundaries = 'groundtruth_instance_boundaries' - groundtruth_instance_classes = 'groundtruth_instance_classes' - groundtruth_keypoints = 'groundtruth_keypoints' - groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' - groundtruth_label_scores = 'groundtruth_label_scores' - - -class DetectionResultFields(object): - """Naming converntions for storing the output of the detector. - - Attributes: - source_id: source of the original image. - key: unique key corresponding to image. - detection_boxes: coordinates of the detection boxes in the image. - detection_scores: detection scores for the detection boxes in the image. - detection_classes: detection-level class labels. - detection_masks: contains a segmentation mask for each detection box. - detection_boundaries: contains an object boundary for each detection box. - detection_keypoints: contains detection keypoints for each detection box. - num_detections: number of detections in the batch. - """ - - source_id = 'source_id' - key = 'key' - detection_boxes = 'detection_boxes' - detection_scores = 'detection_scores' - detection_classes = 'detection_classes' - detection_masks = 'detection_masks' - detection_boundaries = 'detection_boundaries' - detection_keypoints = 'detection_keypoints' - num_detections = 'num_detections' - - -class BoxListFields(object): - """Naming conventions for BoxLists. - - Attributes: - boxes: bounding box coordinates. - classes: classes per bounding box. - scores: scores per bounding box. - weights: sample weights per bounding box. - objectness: objectness score per bounding box. - masks: masks per bounding box. - boundaries: boundaries per bounding box. - keypoints: keypoints per bounding box. - keypoint_heatmaps: keypoint heatmaps per bounding box. - """ - boxes = 'boxes' - classes = 'classes' - scores = 'scores' - weights = 'weights' - objectness = 'objectness' - masks = 'masks' - boundaries = 'boundaries' - keypoints = 'keypoints' - keypoint_heatmaps = 'keypoint_heatmaps' - - -class TfExampleFields(object): - """TF-example proto feature names for object detection. - - Holds the standard feature names to load from an Example proto for object - detection. - - Attributes: - image_encoded: JPEG encoded string - image_format: image format, e.g. "JPEG" - filename: filename - channels: number of channels of image - colorspace: colorspace, e.g. "RGB" - height: height of image in pixels, e.g. 462 - width: width of image in pixels, e.g. 581 - source_id: original source of the image - object_class_text: labels in text format, e.g. ["person", "cat"] - object_class_label: labels in numbers, e.g. [16, 8] - object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30 - object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40 - object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50 - object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70 - object_view: viewpoint of object, e.g. ["frontal", "left"] - object_truncated: is object truncated, e.g. [true, false] - object_occluded: is object occluded, e.g. [true, false] - object_difficult: is object difficult, e.g. [true, false] - object_group_of: is object a single object or a group of objects - object_depiction: is object a depiction - object_is_crowd: [DEPRECATED, use object_group_of instead] - is the object a single object or a crowd - object_segment_area: the area of the segment. - instance_masks: instance segmentation masks. - instance_boundaries: instance boundaries. - instance_classes: Classes for each instance segmentation mask. - detection_class_label: class label in numbers. - detection_bbox_ymin: ymin coordinates of a detection box. - detection_bbox_xmin: xmin coordinates of a detection box. - detection_bbox_ymax: ymax coordinates of a detection box. - detection_bbox_xmax: xmax coordinates of a detection box. - detection_score: detection score for the class label and box. - """ - image_encoded = 'image/encoded' - image_format = 'image/format' # format is reserved keyword - filename = 'image/filename' - channels = 'image/channels' - colorspace = 'image/colorspace' - height = 'image/height' - width = 'image/width' - source_id = 'image/source_id' - object_class_text = 'image/object/class/text' - object_class_label = 'image/object/class/label' - object_bbox_ymin = 'image/object/bbox/ymin' - object_bbox_xmin = 'image/object/bbox/xmin' - object_bbox_ymax = 'image/object/bbox/ymax' - object_bbox_xmax = 'image/object/bbox/xmax' - object_view = 'image/object/view' - object_truncated = 'image/object/truncated' - object_occluded = 'image/object/occluded' - object_difficult = 'image/object/difficult' - object_group_of = 'image/object/group_of' - object_depiction = 'image/object/depiction' - object_is_crowd = 'image/object/is_crowd' - object_segment_area = 'image/object/segment/area' - instance_masks = 'image/segmentation/object' - instance_boundaries = 'image/boundaries/object' - instance_classes = 'image/segmentation/object/class' - detection_class_label = 'image/detection/label' - detection_bbox_ymin = 'image/detection/bbox/ymin' - detection_bbox_xmin = 'image/detection/bbox/xmin' - detection_bbox_ymax = 'image/detection/bbox/ymax' - detection_bbox_xmax = 'image/detection/bbox/xmax' - detection_score = 'image/detection/score' diff --git a/object_detection/core/target_assigner.py b/object_detection/core/target_assigner.py deleted file mode 100644 index d028dd59..00000000 --- a/object_detection/core/target_assigner.py +++ /dev/null @@ -1,455 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base target assigner module. - -The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and -groundtruth detections (bounding boxes), to assign classification and regression -targets to each anchor as well as weights to each anchor (specifying, e.g., -which anchors should not contribute to training loss). - -It assigns classification/regression targets by performing the following steps: -1) Computing pairwise similarity between anchors and groundtruth boxes using a - provided RegionSimilarity Calculator -2) Computing a matching based on the similarity matrix using a provided Matcher -3) Assigning regression targets based on the matching and a provided BoxCoder -4) Assigning classification targets based on the matching and groundtruth labels - -Note that TargetAssigners only operate on detections from a single -image at a time, so any logic for applying a TargetAssigner to multiple -images must be handled externally. -""" -import tensorflow as tf - -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_coder as bcoder -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import matcher as mat -from object_detection.core import region_similarity_calculator as sim_calc -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher - - -class TargetAssigner(object): - """Target assigner to compute classification and regression targets.""" - - def __init__(self, similarity_calc, matcher, box_coder, - positive_class_weight=1.0, negative_class_weight=1.0, - unmatched_cls_target=None): - """Construct Object Detection Target Assigner. - - Args: - similarity_calc: a RegionSimilarityCalculator - matcher: an object_detection.core.Matcher used to match groundtruth to - anchors. - box_coder: an object_detection.core.BoxCoder used to encode matching - groundtruth boxes with respect to anchors. - positive_class_weight: classification weight to be associated to positive - anchors (default: 1.0) - negative_class_weight: classification weight to be associated to negative - anchors (default: 1.0) - unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] - which is consistent with the classification target for each - anchor (and can be empty for scalar targets). This shape must thus be - compatible with the groundtruth labels that are passed to the "assign" - function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). - If set to None, unmatched_cls_target is set to be [0] for each anchor. - - Raises: - ValueError: if similarity_calc is not a RegionSimilarityCalculator or - if matcher is not a Matcher or if box_coder is not a BoxCoder - """ - if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator): - raise ValueError('similarity_calc must be a RegionSimilarityCalculator') - if not isinstance(matcher, mat.Matcher): - raise ValueError('matcher must be a Matcher') - if not isinstance(box_coder, bcoder.BoxCoder): - raise ValueError('box_coder must be a BoxCoder') - self._similarity_calc = similarity_calc - self._matcher = matcher - self._box_coder = box_coder - self._positive_class_weight = positive_class_weight - self._negative_class_weight = negative_class_weight - if unmatched_cls_target is None: - self._unmatched_cls_target = tf.constant([0], tf.float32) - else: - self._unmatched_cls_target = unmatched_cls_target - - @property - def box_coder(self): - return self._box_coder - - def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, - **params): - """Assign classification and regression targets to each anchor. - - For a given set of anchors and groundtruth detections, match anchors - to groundtruth_boxes and assign classification and regression targets to - each anchor as well as weights based on the resulting match (specifying, - e.g., which anchors should not contribute to training loss). - - Anchors that are not matched to anything are given a classification target - of self._unmatched_cls_target which can be specified via the constructor. - - Args: - anchors: a BoxList representing N anchors - groundtruth_boxes: a BoxList representing M groundtruth boxes - groundtruth_labels: a tensor of shape [M, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set - to None, groundtruth_labels assumes a binary problem where all - ground_truth boxes get a positive label (of 1). - **params: Additional keyword arguments for specific implementations of - the Matcher. - - Returns: - cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], - where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels - which has shape [num_gt_boxes, d_1, d_2, ... d_k]. - cls_weights: a float32 tensor with shape [num_anchors] - reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] - reg_weights: a float32 tensor with shape [num_anchors] - match: a matcher.Match object encoding the match between anchors and - groundtruth boxes, with rows corresponding to groundtruth boxes - and columns corresponding to anchors. - - Raises: - ValueError: if anchors or groundtruth_boxes are not of type - box_list.BoxList - """ - if not isinstance(anchors, box_list.BoxList): - raise ValueError('anchors must be an BoxList') - if not isinstance(groundtruth_boxes, box_list.BoxList): - raise ValueError('groundtruth_boxes must be an BoxList') - - if groundtruth_labels is None: - groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(), - 0)) - groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) - unmatched_shape_assert = tf.assert_equal( - tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target), - message='Unmatched class target shape incompatible ' - 'with groundtruth labels shape!') - labels_and_box_shapes_assert = tf.assert_equal( - tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(), - message='Groundtruth boxes and labels have incompatible shapes!') - - with tf.control_dependencies( - [unmatched_shape_assert, labels_and_box_shapes_assert]): - match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, - anchors) - match = self._matcher.match(match_quality_matrix, **params) - reg_targets = self._create_regression_targets(anchors, - groundtruth_boxes, - match) - cls_targets = self._create_classification_targets(groundtruth_labels, - match) - reg_weights = self._create_regression_weights(match) - cls_weights = self._create_classification_weights( - match, self._positive_class_weight, self._negative_class_weight) - - num_anchors = anchors.num_boxes_static() - if num_anchors is not None: - reg_targets = self._reset_target_shape(reg_targets, num_anchors) - cls_targets = self._reset_target_shape(cls_targets, num_anchors) - reg_weights = self._reset_target_shape(reg_weights, num_anchors) - cls_weights = self._reset_target_shape(cls_weights, num_anchors) - - return cls_targets, cls_weights, reg_targets, reg_weights, match - - def _reset_target_shape(self, target, num_anchors): - """Sets the static shape of the target. - - Args: - target: the target tensor. Its first dimension will be overwritten. - num_anchors: the number of anchors, which is used to override the target's - first dimension. - - Returns: - A tensor with the shape info filled in. - """ - target_shape = target.get_shape().as_list() - target_shape[0] = num_anchors - target.set_shape(target_shape) - return target - - def _create_regression_targets(self, anchors, groundtruth_boxes, match): - """Returns a regression target for each anchor. - - Args: - anchors: a BoxList representing N anchors - groundtruth_boxes: a BoxList representing M groundtruth_boxes - match: a matcher.Match object - - Returns: - reg_targets: a float32 tensor with shape [N, box_code_dimension] - """ - matched_anchor_indices = match.matched_column_indices() - unmatched_ignored_anchor_indices = (match. - unmatched_or_ignored_column_indices()) - matched_gt_indices = match.matched_row_indices() - matched_anchors = box_list_ops.gather(anchors, - matched_anchor_indices) - matched_gt_boxes = box_list_ops.gather(groundtruth_boxes, - matched_gt_indices) - matched_reg_targets = self._box_coder.encode(matched_gt_boxes, - matched_anchors) - unmatched_ignored_reg_targets = tf.tile( - self._default_regression_target(), - tf.stack([tf.size(unmatched_ignored_anchor_indices), 1])) - reg_targets = tf.dynamic_stitch( - [matched_anchor_indices, unmatched_ignored_anchor_indices], - [matched_reg_targets, unmatched_ignored_reg_targets]) - # TODO: summarize the number of matches on average. - return reg_targets - - def _default_regression_target(self): - """Returns the default target for anchors to regress to. - - Default regression targets are set to zero (though in - this implementation what these targets are set to should - not matter as the regression weight of any box set to - regress to the default target is zero). - - Returns: - default_target: a float32 tensor with shape [1, box_code_dimension] - """ - return tf.constant([self._box_coder.code_size*[0]], tf.float32) - - def _create_classification_targets(self, groundtruth_labels, match): - """Create classification targets for each anchor. - - Assign a classification target of for each anchor to the matching - groundtruth label that is provided by match. Anchors that are not matched - to anything are given the target self._unmatched_cls_target - - Args: - groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar labels). - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - - Returns: - cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], - where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels - which has shape [num_gt_boxes, d_1, d_2, ... d_k]. - """ - matched_anchor_indices = match.matched_column_indices() - unmatched_ignored_anchor_indices = (match. - unmatched_or_ignored_column_indices()) - matched_gt_indices = match.matched_row_indices() - matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices) - - ones = self._unmatched_cls_target.shape.ndims * [1] - unmatched_ignored_cls_targets = tf.tile( - tf.expand_dims(self._unmatched_cls_target, 0), - tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones)) - - cls_targets = tf.dynamic_stitch( - [matched_anchor_indices, unmatched_ignored_anchor_indices], - [matched_cls_targets, unmatched_ignored_cls_targets]) - return cls_targets - - def _create_regression_weights(self, match): - """Set regression weight for each anchor. - - Only positive anchors are set to contribute to the regression loss, so this - method returns a weight of 1 for every positive anchor and 0 for every - negative anchor. - - Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - - Returns: - reg_weights: a float32 tensor with shape [num_anchors] representing - regression weights - """ - reg_weights = tf.cast(match.matched_column_indicator(), tf.float32) - return reg_weights - - def _create_classification_weights(self, - match, - positive_class_weight=1.0, - negative_class_weight=1.0): - """Create classification weights for each anchor. - - Positive (matched) anchors are associated with a weight of - positive_class_weight and negative (unmatched) anchors are associated with - a weight of negative_class_weight. When anchors are ignored, weights are set - to zero. By default, both positive/negative weights are set to 1.0, - but they can be adjusted to handle class imbalance (which is almost always - the case in object detection). - - Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. - positive_class_weight: weight to be associated to positive anchors - negative_class_weight: weight to be associated to negative anchors - - Returns: - cls_weights: a float32 tensor with shape [num_anchors] representing - classification weights. - """ - matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32) - ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32) - unmatched_indicator = 1.0 - matched_indicator - ignore_indicator - cls_weights = (positive_class_weight * matched_indicator - + negative_class_weight * unmatched_indicator) - return cls_weights - - def get_box_coder(self): - """Get BoxCoder of this TargetAssigner. - - Returns: - BoxCoder: BoxCoder object. - """ - return self._box_coder - - -# TODO: This method pulls in all the implementation dependencies into -# core. Therefore its best to have this factory method outside of core. -def create_target_assigner(reference, stage=None, - positive_class_weight=1.0, - negative_class_weight=1.0, - unmatched_cls_target=None): - """Factory function for creating standard target assigners. - - Args: - reference: string referencing the type of TargetAssigner. - stage: string denoting stage: {proposal, detection}. - positive_class_weight: classification weight to be associated to positive - anchors (default: 1.0) - negative_class_weight: classification weight to be associated to negative - anchors (default: 1.0) - unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] - which is consistent with the classification target for each - anchor (and can be empty for scalar targets). This shape must thus be - compatible with the groundtruth labels that are passed to the Assign - function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). - If set to None, unmatched_cls_target is set to be 0 for each anchor. - - Returns: - TargetAssigner: desired target assigner. - - Raises: - ValueError: if combination reference+stage is invalid. - """ - if reference == 'Multibox' and stage == 'proposal': - similarity_calc = sim_calc.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - - elif reference == 'FasterRCNN' and stage == 'proposal': - similarity_calc = sim_calc.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7, - unmatched_threshold=0.3, - force_match_for_each_row=True) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=[10.0, 10.0, 5.0, 5.0]) - - elif reference == 'FasterRCNN' and stage == 'detection': - similarity_calc = sim_calc.IouSimilarity() - # Uses all proposals with IOU < 0.5 as candidate negatives. - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - negatives_lower_than_unmatched=True) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=[10.0, 10.0, 5.0, 5.0]) - - elif reference == 'FastRCNN': - similarity_calc = sim_calc.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.1, - force_match_for_each_row=False, - negatives_lower_than_unmatched=False) - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() - - else: - raise ValueError('No valid combination of reference and stage.') - - return TargetAssigner(similarity_calc, matcher, box_coder, - positive_class_weight=positive_class_weight, - negative_class_weight=negative_class_weight, - unmatched_cls_target=unmatched_cls_target) - - -def batch_assign_targets(target_assigner, - anchors_batch, - gt_box_batch, - gt_class_targets_batch): - """Batched assignment of classification and regression targets. - - Args: - target_assigner: a target assigner. - anchors_batch: BoxList representing N box anchors or list of BoxList objects - with length batch_size representing anchor sets. - gt_box_batch: a list of BoxList objects with length batch_size - representing groundtruth boxes for each image in the batch - gt_class_targets_batch: a list of tensors with length batch_size, where - each tensor has shape [num_gt_boxes_i, classification_target_size] and - num_gt_boxes_i is the number of boxes in the ith boxlist of - gt_box_batch. - - Returns: - batch_cls_targets: a tensor with shape [batch_size, num_anchors, - num_classes], - batch_cls_weights: a tensor with shape [batch_size, num_anchors], - batch_reg_targets: a tensor with shape [batch_size, num_anchors, - box_code_dimension] - batch_reg_weights: a tensor with shape [batch_size, num_anchors], - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - Raises: - ValueError: if input list lengths are inconsistent, i.e., - batch_size == len(gt_box_batch) == len(gt_class_targets_batch) - and batch_size == len(anchors_batch) unless anchors_batch is a single - BoxList. - """ - if not isinstance(anchors_batch, list): - anchors_batch = len(gt_box_batch) * [anchors_batch] - if not all( - isinstance(anchors, box_list.BoxList) for anchors in anchors_batch): - raise ValueError('anchors_batch must be a BoxList or list of BoxLists.') - if not (len(anchors_batch) - == len(gt_box_batch) - == len(gt_class_targets_batch)): - raise ValueError('batch size incompatible with lengths of anchors_batch, ' - 'gt_box_batch and gt_class_targets_batch.') - cls_targets_list = [] - cls_weights_list = [] - reg_targets_list = [] - reg_weights_list = [] - match_list = [] - for anchors, gt_boxes, gt_class_targets in zip( - anchors_batch, gt_box_batch, gt_class_targets_batch): - (cls_targets, cls_weights, reg_targets, - reg_weights, match) = target_assigner.assign( - anchors, gt_boxes, gt_class_targets) - cls_targets_list.append(cls_targets) - cls_weights_list.append(cls_weights) - reg_targets_list.append(reg_targets) - reg_weights_list.append(reg_weights) - match_list.append(match) - batch_cls_targets = tf.stack(cls_targets_list) - batch_cls_weights = tf.stack(cls_weights_list) - batch_reg_targets = tf.stack(reg_targets_list) - batch_reg_weights = tf.stack(reg_weights_list) - return (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, match_list) diff --git a/object_detection/core/target_assigner_test.py b/object_detection/core/target_assigner_test.py deleted file mode 100644 index 5055e170..00000000 --- a/object_detection/core/target_assigner_test.py +++ /dev/null @@ -1,717 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.target_assigner.""" -import numpy as np -import tensorflow as tf - -from object_detection.box_coders import mean_stddev_box_coder -from object_detection.core import box_list -from object_detection.core import region_similarity_calculator -from object_detection.core import target_assigner as targetassigner -from object_detection.matchers import argmax_matcher -from object_detection.matchers import bipartite_matcher - - -class TargetAssignerTest(tf.test.TestCase): - - def test_assign_agnostic(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, unmatched_cls_target=None) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0]]) - prior_stddevs = tf.constant(3 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]] - boxes = box_list.BoxList(tf.constant(box_corners)) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - exp_matching_anchors = [0, 1] - - result = target_assigner.assign(priors, boxes, num_valid_rows=2) - (cls_targets, cls_weights, reg_targets, reg_weights, match) = result - - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, - reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( - [cls_targets, cls_weights, reg_targets, reg_weights, - match.matched_column_indices()]) - - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(matching_anchors_out, exp_matching_anchors) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - self.assertEquals(matching_anchors_out.dtype, np.int32) - - def test_assign_with_ignored_matches(self): - # Note: test is very similar to above. The third box matched with an IOU - # of 0.35, which is between the matched and unmatched threshold. This means - # That like above the expected classification targets are [1, 1, 0]. - # Unlike above, the third target is ignored and therefore expected - # classification weights are [1, 1, 0]. - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, - unmatched_threshold=0.3) - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0.0, 0.5, .9, 1.0]]) - prior_stddevs = tf.constant(3 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9]] - boxes = box_list.BoxList(tf.constant(box_corners)) - exp_cls_targets = [[1], [1], [0]] - exp_cls_weights = [1, 1, 0] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0]] - exp_reg_weights = [1, 1, 0] - exp_matching_anchors = [0, 1] - - result = target_assigner.assign(priors, boxes) - (cls_targets, cls_weights, reg_targets, reg_weights, match) = result - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, - reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( - [cls_targets, cls_weights, reg_targets, reg_weights, - match.matched_column_indices()]) - - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(matching_anchors_out, exp_matching_anchors) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - self.assertEquals(matching_anchors_out.dtype, np.int32) - - def test_assign_multiclass(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - prior_stddevs = tf.constant(4 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], tf.float32) - - exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [1, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0], - [0, 0, -.5, .2]] - exp_reg_weights = [1, 1, 0, 1] - exp_matching_anchors = [0, 1, 3] - - result = target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - (cls_targets, cls_weights, reg_targets, reg_weights, match) = result - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, - reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( - [cls_targets, cls_weights, reg_targets, reg_weights, - match.matched_column_indices()]) - - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(matching_anchors_out, exp_matching_anchors) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - self.assertEquals(matching_anchors_out.dtype, np.int32) - - def test_assign_multiclass_unequal_class_weights(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - positive_class_weight=1.0, negative_class_weight=0.5, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - prior_stddevs = tf.constant(4 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], tf.float32) - - exp_cls_weights = [1, 1, .5, 1] - result = target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - (_, cls_weights, _, _, _) = result - with self.test_session() as sess: - cls_weights_out = sess.run(cls_weights) - self.assertAllClose(cls_weights_out, exp_cls_weights) - - def test_assign_multidimensional_class_targets(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - prior_stddevs = tf.constant(4 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[[0, 1], [1, 0]], - [[1, 0], [0, 1]], - [[0, 1], [1, .5]]], tf.float32) - - exp_cls_targets = [[[0, 1], [1, 0]], - [[1, 0], [0, 1]], - [[0, 0], [0, 0]], - [[0, 1], [1, .5]]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, -1, 1], - [0, 0, 0, 0], - [0, 0, -.5, .2]] - exp_reg_weights = [1, 1, 0, 1] - exp_matching_anchors = [0, 1, 3] - - result = target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - (cls_targets, cls_weights, reg_targets, reg_weights, match) = result - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, - reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( - [cls_targets, cls_weights, reg_targets, reg_weights, - match.matched_column_indices()]) - - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(matching_anchors_out, exp_matching_anchors) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - self.assertEquals(matching_anchors_out.dtype, np.int32) - - def test_assign_empty_groundtruth(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - prior_stddevs = tf.constant(4 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]]) - box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4]) - boxes = box_list.BoxList(box_corners) - - groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32) - groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3]) - - exp_cls_targets = [[0, 0, 0], - [0, 0, 0], - [0, 0, 0], - [0, 0, 0]] - exp_cls_weights = [1, 1, 1, 1] - exp_reg_targets = [[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]] - exp_reg_weights = [0, 0, 0, 0] - exp_matching_anchors = [] - - result = target_assigner.assign(priors, boxes, groundtruth_labels) - (cls_targets, cls_weights, reg_targets, reg_weights, match) = result - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, - reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( - [cls_targets, cls_weights, reg_targets, reg_weights, - match.matched_column_indices()]) - - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(matching_anchors_out, exp_matching_anchors) - self.assertEquals(cls_targets_out.dtype, np.float32) - self.assertEquals(cls_weights_out.dtype, np.float32) - self.assertEquals(reg_targets_out.dtype, np.float32) - self.assertEquals(reg_weights_out.dtype, np.float32) - self.assertEquals(matching_anchors_out.dtype, np.int32) - - def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 0.8], - [0, 0.5, .5, 1.0], - [.75, 0, 1.0, .25]]) - prior_stddevs = tf.constant(4 * [4 * [.1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.0, 0.0, 0.5, 0.8], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 1, 0, 0, 0]], tf.float32) - result = target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - (cls_targets, cls_weights, reg_targets, reg_weights, _) = result - with self.test_session() as sess: - with self.assertRaisesWithPredicateMatch( - tf.errors.InvalidArgumentError, - 'Groundtruth boxes and labels have incompatible shapes!'): - sess.run([cls_targets, cls_weights, reg_targets, reg_weights]) - - def test_raises_error_on_invalid_groundtruth_labels(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32) - target_assigner = targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - unmatched_cls_target=unmatched_cls_target) - - prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]]) - prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - box_corners = [[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.9, 0.9], - [.75, 0, .95, .27]] - boxes = box_list.BoxList(tf.constant(box_corners)) - - groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32) - - with self.assertRaises(ValueError): - target_assigner.assign(priors, boxes, groundtruth_labels, - num_valid_rows=3) - - -class BatchTargetAssignerTest(tf.test.TestCase): - - def _get_agnostic_target_assigner(self): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - positive_class_weight=1.0, - negative_class_weight=1.0, - unmatched_cls_target=None) - - def _get_multi_class_target_assigner(self, num_classes): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32) - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - positive_class_weight=1.0, - negative_class_weight=1.0, - unmatched_cls_target=unmatched_cls_target) - - def _get_multi_dimensional_target_assigner(self, target_dimensions): - similarity_calc = region_similarity_calculator.NegSqDistSimilarity() - matcher = bipartite_matcher.GreedyBipartiteMatcher() - box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() - unmatched_cls_target = tf.constant(np.zeros(target_dimensions), - tf.float32) - return targetassigner.TargetAssigner( - similarity_calc, matcher, box_coder, - positive_class_weight=1.0, - negative_class_weight=1.0, - unmatched_cls_target=unmatched_cls_target) - - def test_batch_assign_targets(self): - box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]])) - box_list2 = box_list.BoxList(tf.constant( - [[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]] - )) - - gt_box_batch = [box_list1, box_list2] - gt_class_targets = [None, None] - - prior_means = tf.constant([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]]) - prior_stddevs = tf.constant([[.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0,], - [0, 0, 0, 0,],], - [[0, 0, 0, 0,], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[1], [0], [0], [0]], - [[0], [1], [1], [0]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - exp_match_0 = [0] - exp_match_1 = [1, 2] - - agnostic_target_assigner = self._get_agnostic_target_assigner() - (cls_targets, cls_weights, reg_targets, reg_weights, - match_list) = targetassigner.batch_assign_targets( - agnostic_target_assigner, priors, gt_box_batch, gt_class_targets) - self.assertTrue(isinstance(match_list, list) and len(match_list) == 2) - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out, - match_out_0, match_out_1) = sess.run([ - cls_targets, cls_weights, reg_targets, reg_weights] + [ - match.matched_column_indices() for match in match_list]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(match_out_0, exp_match_0) - self.assertAllClose(match_out_1, exp_match_1) - - def test_batch_assign_multiclass_targets(self): - box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]])) - - box_list2 = box_list.BoxList(tf.constant( - [[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]] - )) - - gt_box_batch = [box_list1, box_list2] - - class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32) - class_targets2 = tf.constant([[0, 0, 0, 1], - [0, 0, 1, 0]], tf.float32) - - gt_class_targets = [class_targets1, class_targets2] - - prior_means = tf.constant([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]]) - prior_stddevs = tf.constant([[.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 0, 0, 0], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - exp_cls_targets = [[[0, 1, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0], - [1, 0, 0, 0]], - [[1, 0, 0, 0], - [0, 0, 0, 1], - [0, 0, 1, 0], - [1, 0, 0, 0]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - exp_match_0 = [0] - exp_match_1 = [1, 2] - - multiclass_target_assigner = self._get_multi_class_target_assigner( - num_classes=3) - - (cls_targets, cls_weights, reg_targets, reg_weights, - match_list) = targetassigner.batch_assign_targets( - multiclass_target_assigner, priors, gt_box_batch, gt_class_targets) - self.assertTrue(isinstance(match_list, list) and len(match_list) == 2) - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out, - match_out_0, match_out_1) = sess.run([ - cls_targets, cls_weights, reg_targets, reg_weights] + [ - match.matched_column_indices() for match in match_list]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(match_out_0, exp_match_0) - self.assertAllClose(match_out_1, exp_match_1) - - def test_batch_assign_multidimensional_targets(self): - box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]])) - - box_list2 = box_list.BoxList(tf.constant( - [[0, 0.25123152, 1, 1], - [0.015789, 0.0985, 0.55789, 0.3842]] - )) - - gt_box_batch = [box_list1, box_list2] - class_targets1 = tf.constant([[[0, 1, 1], - [1, 1, 0]]], tf.float32) - class_targets2 = tf.constant([[[0, 1, 1], - [1, 1, 0]], - [[0, 0, 1], - [0, 0, 1]]], tf.float32) - - gt_class_targets = [class_targets1, class_targets2] - - prior_means = tf.constant([[0, 0, .25, .25], - [0, .25, 1, 1], - [0, .1, .5, .5], - [.75, .75, 1, 1]]) - prior_stddevs = tf.constant([[.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1], - [.1, .1, .1, .1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - exp_reg_targets = [[[0, 0, -0.5, -0.5], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 0, 0, 0], - [0, 0.01231521, 0, 0], - [0.15789001, -0.01500003, 0.57889998, -1.15799987], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1, 1, 1], - [1, 1, 1, 1]] - - exp_cls_targets = [[[[0., 1., 1.], - [1., 1., 0.]], - [[0., 0., 0.], - [0., 0., 0.]], - [[0., 0., 0.], - [0., 0., 0.]], - [[0., 0., 0.], - [0., 0., 0.]]], - [[[0., 0., 0.], - [0., 0., 0.]], - [[0., 1., 1.], - [1., 1., 0.]], - [[0., 0., 1.], - [0., 0., 1.]], - [[0., 0., 0.], - [0., 0., 0.]]]] - exp_reg_weights = [[1, 0, 0, 0], - [0, 1, 1, 0]] - exp_match_0 = [0] - exp_match_1 = [1, 2] - - multiclass_target_assigner = self._get_multi_dimensional_target_assigner( - target_dimensions=(2, 3)) - - (cls_targets, cls_weights, reg_targets, reg_weights, - match_list) = targetassigner.batch_assign_targets( - multiclass_target_assigner, priors, gt_box_batch, gt_class_targets) - self.assertTrue(isinstance(match_list, list) and len(match_list) == 2) - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out, - match_out_0, match_out_1) = sess.run([ - cls_targets, cls_weights, reg_targets, reg_weights] + [ - match.matched_column_indices() for match in match_list]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(match_out_0, exp_match_0) - self.assertAllClose(match_out_1, exp_match_1) - - def test_batch_assign_empty_groundtruth(self): - box_coords_expanded = tf.zeros((1, 4), tf.float32) - box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4]) - box_list1 = box_list.BoxList(box_coords) - gt_box_batch = [box_list1] - - prior_means = tf.constant([[0, 0, .25, .25], - [0, .25, 1, 1]]) - prior_stddevs = tf.constant([[.1, .1, .1, .1], - [.1, .1, .1, .1]]) - priors = box_list.BoxList(prior_means) - priors.add_field('stddev', prior_stddevs) - - exp_reg_targets = [[[0, 0, 0, 0], - [0, 0, 0, 0]]] - exp_cls_weights = [[1, 1]] - exp_cls_targets = [[[1, 0, 0, 0], - [1, 0, 0, 0]]] - exp_reg_weights = [[0, 0]] - exp_match_0 = [] - - num_classes = 3 - pad = 1 - gt_class_targets = tf.zeros((0, num_classes + pad)) - gt_class_targets_batch = [gt_class_targets] - - multiclass_target_assigner = self._get_multi_class_target_assigner( - num_classes=3) - - (cls_targets, cls_weights, reg_targets, reg_weights, - match_list) = targetassigner.batch_assign_targets( - multiclass_target_assigner, priors, - gt_box_batch, gt_class_targets_batch) - self.assertTrue(isinstance(match_list, list) and len(match_list) == 1) - with self.test_session() as sess: - (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out, - match_out_0) = sess.run([ - cls_targets, cls_weights, reg_targets, reg_weights] + [ - match.matched_column_indices() for match in match_list]) - self.assertAllClose(cls_targets_out, exp_cls_targets) - self.assertAllClose(cls_weights_out, exp_cls_weights) - self.assertAllClose(reg_targets_out, exp_reg_targets) - self.assertAllClose(reg_weights_out, exp_reg_weights) - self.assertAllClose(match_out_0, exp_match_0) - - -class CreateTargetAssignerTest(tf.test.TestCase): - - def test_create_target_assigner(self): - """Tests that named constructor gives working target assigners. - - TODO: Make this test more general. - """ - corners = [[0.0, 0.0, 1.0, 1.0]] - groundtruth = box_list.BoxList(tf.constant(corners)) - - priors = box_list.BoxList(tf.constant(corners)) - prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]]) - priors.add_field('stddev', prior_stddevs) - multibox_ta = (targetassigner - .create_target_assigner('Multibox', stage='proposal')) - multibox_ta.assign(priors, groundtruth) - # No tests on output, as that may vary arbitrarily as new target assigners - # are added. As long as it is constructed correctly and runs without errors, - # tests on the individual assigners cover correctness of the assignments. - - anchors = box_list.BoxList(tf.constant(corners)) - faster_rcnn_proposals_ta = (targetassigner - .create_target_assigner('FasterRCNN', - stage='proposal')) - faster_rcnn_proposals_ta.assign(anchors, groundtruth) - - fast_rcnn_ta = (targetassigner - .create_target_assigner('FastRCNN')) - fast_rcnn_ta.assign(anchors, groundtruth) - - faster_rcnn_detection_ta = (targetassigner - .create_target_assigner('FasterRCNN', - stage='detection')) - faster_rcnn_detection_ta.assign(anchors, groundtruth) - - with self.assertRaises(ValueError): - targetassigner.create_target_assigner('InvalidDetector', - stage='invalid_stage') - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/data/kitti_label_map.pbtxt b/object_detection/data/kitti_label_map.pbtxt deleted file mode 100644 index 0afcc693..00000000 --- a/object_detection/data/kitti_label_map.pbtxt +++ /dev/null @@ -1,9 +0,0 @@ -item { - id: 1 - name: 'car' -} - -item { - id: 2 - name: 'pedestrian' -} diff --git a/object_detection/data/mscoco_label_map.pbtxt b/object_detection/data/mscoco_label_map.pbtxt deleted file mode 100644 index 1f4872bd..00000000 --- a/object_detection/data/mscoco_label_map.pbtxt +++ /dev/null @@ -1,400 +0,0 @@ -item { - name: "/m/01g317" - id: 1 - display_name: "person" -} -item { - name: "/m/0199g" - id: 2 - display_name: "bicycle" -} -item { - name: "/m/0k4j" - id: 3 - display_name: "car" -} -item { - name: "/m/04_sv" - id: 4 - display_name: "motorcycle" -} -item { - name: "/m/05czz6l" - id: 5 - display_name: "airplane" -} -item { - name: "/m/01bjv" - id: 6 - display_name: "bus" -} -item { - name: "/m/07jdr" - id: 7 - display_name: "train" -} -item { - name: "/m/07r04" - id: 8 - display_name: "truck" -} -item { - name: "/m/019jd" - id: 9 - display_name: "boat" -} -item { - name: "/m/015qff" - id: 10 - display_name: "traffic light" -} -item { - name: "/m/01pns0" - id: 11 - display_name: "fire hydrant" -} -item { - name: "/m/02pv19" - id: 13 - display_name: "stop sign" -} -item { - name: "/m/015qbp" - id: 14 - display_name: "parking meter" -} -item { - name: "/m/0cvnqh" - id: 15 - display_name: "bench" -} -item { - name: "/m/015p6" - id: 16 - display_name: "bird" -} -item { - name: "/m/01yrx" - id: 17 - display_name: "cat" -} -item { - name: "/m/0bt9lr" - id: 18 - display_name: "dog" -} -item { - name: "/m/03k3r" - id: 19 - display_name: "horse" -} -item { - name: "/m/07bgp" - id: 20 - display_name: "sheep" -} -item { - name: "/m/01xq0k1" - id: 21 - display_name: "cow" -} -item { - name: "/m/0bwd_0j" - id: 22 - display_name: "elephant" -} -item { - name: "/m/01dws" - id: 23 - display_name: "bear" -} -item { - name: "/m/0898b" - id: 24 - display_name: "zebra" -} -item { - name: "/m/03bk1" - id: 25 - display_name: "giraffe" -} -item { - name: "/m/01940j" - id: 27 - display_name: "backpack" -} -item { - name: "/m/0hnnb" - id: 28 - display_name: "umbrella" -} -item { - name: "/m/080hkjn" - id: 31 - display_name: "handbag" -} -item { - name: "/m/01rkbr" - id: 32 - display_name: "tie" -} -item { - name: "/m/01s55n" - id: 33 - display_name: "suitcase" -} -item { - name: "/m/02wmf" - id: 34 - display_name: "frisbee" -} -item { - name: "/m/071p9" - id: 35 - display_name: "skis" -} -item { - name: "/m/06__v" - id: 36 - display_name: "snowboard" -} -item { - name: "/m/018xm" - id: 37 - display_name: "sports ball" -} -item { - name: "/m/02zt3" - id: 38 - display_name: "kite" -} -item { - name: "/m/03g8mr" - id: 39 - display_name: "baseball bat" -} -item { - name: "/m/03grzl" - id: 40 - display_name: "baseball glove" -} -item { - name: "/m/06_fw" - id: 41 - display_name: "skateboard" -} -item { - name: "/m/019w40" - id: 42 - display_name: "surfboard" -} -item { - name: "/m/0dv9c" - id: 43 - display_name: "tennis racket" -} -item { - name: "/m/04dr76w" - id: 44 - display_name: "bottle" -} -item { - name: "/m/09tvcd" - id: 46 - display_name: "wine glass" -} -item { - name: "/m/08gqpm" - id: 47 - display_name: "cup" -} -item { - name: "/m/0dt3t" - id: 48 - display_name: "fork" -} -item { - name: "/m/04ctx" - id: 49 - display_name: "knife" -} -item { - name: "/m/0cmx8" - id: 50 - display_name: "spoon" -} -item { - name: "/m/04kkgm" - id: 51 - display_name: "bowl" -} -item { - name: "/m/09qck" - id: 52 - display_name: "banana" -} -item { - name: "/m/014j1m" - id: 53 - display_name: "apple" -} -item { - name: "/m/0l515" - id: 54 - display_name: "sandwich" -} -item { - name: "/m/0cyhj_" - id: 55 - display_name: "orange" -} -item { - name: "/m/0hkxq" - id: 56 - display_name: "broccoli" -} -item { - name: "/m/0fj52s" - id: 57 - display_name: "carrot" -} -item { - name: "/m/01b9xk" - id: 58 - display_name: "hot dog" -} -item { - name: "/m/0663v" - id: 59 - display_name: "pizza" -} -item { - name: "/m/0jy4k" - id: 60 - display_name: "donut" -} -item { - name: "/m/0fszt" - id: 61 - display_name: "cake" -} -item { - name: "/m/01mzpv" - id: 62 - display_name: "chair" -} -item { - name: "/m/02crq1" - id: 63 - display_name: "couch" -} -item { - name: "/m/03fp41" - id: 64 - display_name: "potted plant" -} -item { - name: "/m/03ssj5" - id: 65 - display_name: "bed" -} -item { - name: "/m/04bcr3" - id: 67 - display_name: "dining table" -} -item { - name: "/m/09g1w" - id: 70 - display_name: "toilet" -} -item { - name: "/m/07c52" - id: 72 - display_name: "tv" -} -item { - name: "/m/01c648" - id: 73 - display_name: "laptop" -} -item { - name: "/m/020lf" - id: 74 - display_name: "mouse" -} -item { - name: "/m/0qjjc" - id: 75 - display_name: "remote" -} -item { - name: "/m/01m2v" - id: 76 - display_name: "keyboard" -} -item { - name: "/m/050k8" - id: 77 - display_name: "cell phone" -} -item { - name: "/m/0fx9l" - id: 78 - display_name: "microwave" -} -item { - name: "/m/029bxz" - id: 79 - display_name: "oven" -} -item { - name: "/m/01k6s3" - id: 80 - display_name: "toaster" -} -item { - name: "/m/0130jx" - id: 81 - display_name: "sink" -} -item { - name: "/m/040b_t" - id: 82 - display_name: "refrigerator" -} -item { - name: "/m/0bt_c3" - id: 84 - display_name: "book" -} -item { - name: "/m/01x3z" - id: 85 - display_name: "clock" -} -item { - name: "/m/02s195" - id: 86 - display_name: "vase" -} -item { - name: "/m/01lsmm" - id: 87 - display_name: "scissors" -} -item { - name: "/m/0kmg4" - id: 88 - display_name: "teddy bear" -} -item { - name: "/m/03wvsk" - id: 89 - display_name: "hair drier" -} -item { - name: "/m/012xff" - id: 90 - display_name: "toothbrush" -} diff --git a/object_detection/data/oid_bbox_trainable_label_map.pbtxt b/object_detection/data/oid_bbox_trainable_label_map.pbtxt deleted file mode 100644 index 863e4f31..00000000 --- a/object_detection/data/oid_bbox_trainable_label_map.pbtxt +++ /dev/null @@ -1,2725 +0,0 @@ -item { - name: "/m/01g317" - id: 1 - display_name: "Person" -} -item { - name: "/m/09j2d" - id: 2 - display_name: "Clothing" -} -item { - name: "/m/04yx4" - id: 3 - display_name: "Man" -} -item { - name: "/m/0dzct" - id: 4 - display_name: "Face" -} -item { - name: "/m/07j7r" - id: 5 - display_name: "Tree" -} -item { - name: "/m/05s2s" - id: 6 - display_name: "Plant" -} -item { - name: "/m/03bt1vf" - id: 7 - display_name: "Woman" -} -item { - name: "/m/07yv9" - id: 8 - display_name: "Vehicle" -} -item { - name: "/m/0cgh4" - id: 9 - display_name: "Building" -} -item { - name: "/m/01prls" - id: 10 - display_name: "Land vehicle" -} -item { - name: "/m/09j5n" - id: 11 - display_name: "Footwear" -} -item { - name: "/m/05r655" - id: 12 - display_name: "Girl" -} -item { - name: "/m/0jbk" - id: 13 - display_name: "Animal" -} -item { - name: "/m/0k4j" - id: 14 - display_name: "Car" -} -item { - name: "/m/02wbm" - id: 15 - display_name: "Food" -} -item { - name: "/m/083wq" - id: 16 - display_name: "Wheel" -} -item { - name: "/m/0c9ph5" - id: 17 - display_name: "Flower" -} -item { - name: "/m/0c_jw" - id: 18 - display_name: "Furniture" -} -item { - name: "/m/0d4v4" - id: 19 - display_name: "Window" -} -item { - name: "/m/03jm5" - id: 20 - display_name: "House" -} -item { - name: "/m/01bl7v" - id: 21 - display_name: "Boy" -} -item { - name: "/m/0463sg" - id: 22 - display_name: "Fashion accessory" -} -item { - name: "/m/04bcr3" - id: 23 - display_name: "Table" -} -item { - name: "/m/0jyfg" - id: 24 - display_name: "Glasses" -} -item { - name: "/m/01xyhv" - id: 25 - display_name: "Suit" -} -item { - name: "/m/08dz3q" - id: 26 - display_name: "Auto part" -} -item { - name: "/m/015p6" - id: 27 - display_name: "Bird" -} -item { - name: "/m/05y5lj" - id: 28 - display_name: "Sports equipment" -} -item { - name: "/m/01d40f" - id: 29 - display_name: "Dress" -} -item { - name: "/m/0bt9lr" - id: 30 - display_name: "Dog" -} -item { - name: "/m/01lrl" - id: 31 - display_name: "Carnivore" -} -item { - name: "/m/02p0tk3" - id: 32 - display_name: "Human body" -} -item { - name: "/m/0fly7" - id: 33 - display_name: "Jeans" -} -item { - name: "/m/04szw" - id: 34 - display_name: "Musical instrument" -} -item { - name: "/m/0271t" - id: 35 - display_name: "Drink" -} -item { - name: "/m/019jd" - id: 36 - display_name: "Boat" -} -item { - name: "/m/03q69" - id: 37 - display_name: "Hair" -} -item { - name: "/m/0h9mv" - id: 38 - display_name: "Tire" -} -item { - name: "/m/04hgtk" - id: 39 - display_name: "Head" -} -item { - name: "/m/01yrx" - id: 40 - display_name: "Cat" -} -item { - name: "/m/01rzcn" - id: 41 - display_name: "Watercraft" -} -item { - name: "/m/01mzpv" - id: 42 - display_name: "Chair" -} -item { - name: "/m/0199g" - id: 43 - display_name: "Bike" -} -item { - name: "/m/01fdzj" - id: 44 - display_name: "Tower" -} -item { - name: "/m/04rky" - id: 45 - display_name: "Mammal" -} -item { - name: "/m/079cl" - id: 46 - display_name: "Skyscraper" -} -item { - name: "/m/0dzf4" - id: 47 - display_name: "Arm" -} -item { - name: "/m/0138tl" - id: 48 - display_name: "Toy" -} -item { - name: "/m/06msq" - id: 49 - display_name: "Sculpture" -} -item { - name: "/m/03xxp" - id: 50 - display_name: "Invertebrate" -} -item { - name: "/m/0hg7b" - id: 51 - display_name: "Microphone" -} -item { - name: "/m/01n5jq" - id: 52 - display_name: "Poster" -} -item { - name: "/m/03vt0" - id: 53 - display_name: "Insect" -} -item { - name: "/m/0342h" - id: 54 - display_name: "Guitar" -} -item { - name: "/m/0k0pj" - id: 55 - display_name: "Nose" -} -item { - name: "/m/02dl1y" - id: 56 - display_name: "Hat" -} -item { - name: "/m/04brg2" - id: 57 - display_name: "Tableware" -} -item { - name: "/m/02dgv" - id: 58 - display_name: "Door" -} -item { - name: "/m/01bqk0" - id: 59 - display_name: "Bicycle wheel" -} -item { - name: "/m/017ftj" - id: 60 - display_name: "Sunglasses" -} -item { - name: "/m/052lwg6" - id: 61 - display_name: "Baked goods" -} -item { - name: "/m/014sv8" - id: 62 - display_name: "Eye" -} -item { - name: "/m/0270h" - id: 63 - display_name: "Dessert" -} -item { - name: "/m/0283dt1" - id: 64 - display_name: "Mouth" -} -item { - name: "/m/0k5j" - id: 65 - display_name: "Aircraft" -} -item { - name: "/m/0cmf2" - id: 66 - display_name: "Airplane" -} -item { - name: "/m/07jdr" - id: 67 - display_name: "Train" -} -item { - name: "/m/032b3c" - id: 68 - display_name: "Jacket" -} -item { - name: "/m/033rq4" - id: 69 - display_name: "Street light" -} -item { - name: "/m/0k65p" - id: 70 - display_name: "Hand" -} -item { - name: "/m/01ww8y" - id: 71 - display_name: "Snack" -} -item { - name: "/m/0zvk5" - id: 72 - display_name: "Helmet" -} -item { - name: "/m/07mhn" - id: 73 - display_name: "Trousers" -} -item { - name: "/m/04dr76w" - id: 74 - display_name: "Bottle" -} -item { - name: "/m/03fp41" - id: 75 - display_name: "Houseplant" -} -item { - name: "/m/03k3r" - id: 76 - display_name: "Horse" -} -item { - name: "/m/01y9k5" - id: 77 - display_name: "Desk" -} -item { - name: "/m/0cdl1" - id: 78 - display_name: "Palm tree" -} -item { - name: "/m/0f4s2w" - id: 79 - display_name: "Vegetable" -} -item { - name: "/m/02xwb" - id: 80 - display_name: "Fruit" -} -item { - name: "/m/035r7c" - id: 81 - display_name: "Leg" -} -item { - name: "/m/0bt_c3" - id: 82 - display_name: "Book" -} -item { - name: "/m/01_bhs" - id: 83 - display_name: "Fast food" -} -item { - name: "/m/01599" - id: 84 - display_name: "Beer" -} -item { - name: "/m/03120" - id: 85 - display_name: "Flag" -} -item { - name: "/m/026t6" - id: 86 - display_name: "Drum" -} -item { - name: "/m/01bjv" - id: 87 - display_name: "Bus" -} -item { - name: "/m/07r04" - id: 88 - display_name: "Truck" -} -item { - name: "/m/018xm" - id: 89 - display_name: "Ball" -} -item { - name: "/m/01rkbr" - id: 90 - display_name: "Tie" -} -item { - name: "/m/0fm3zh" - id: 91 - display_name: "Flowerpot" -} -item { - name: "/m/02_n6y" - id: 92 - display_name: "Goggles" -} -item { - name: "/m/04_sv" - id: 93 - display_name: "Motorcycle" -} -item { - name: "/m/06z37_" - id: 94 - display_name: "Picture frame" -} -item { - name: "/m/01bfm9" - id: 95 - display_name: "Shorts" -} -item { - name: "/m/0h8mhzd" - id: 96 - display_name: "Sports uniform" -} -item { - name: "/m/0d_2m" - id: 97 - display_name: "Moths and butterflies" -} -item { - name: "/m/0gjbg72" - id: 98 - display_name: "Shelf" -} -item { - name: "/m/01n4qj" - id: 99 - display_name: "Shirt" -} -item { - name: "/m/0ch_cf" - id: 100 - display_name: "Fish" -} -item { - name: "/m/06m11" - id: 101 - display_name: "Rose" -} -item { - name: "/m/01jfm_" - id: 102 - display_name: "Licence plate" -} -item { - name: "/m/02crq1" - id: 103 - display_name: "Couch" -} -item { - name: "/m/083kb" - id: 104 - display_name: "Weapon" -} -item { - name: "/m/01c648" - id: 105 - display_name: "Laptop" -} -item { - name: "/m/09tvcd" - id: 106 - display_name: "Wine glass" -} -item { - name: "/m/0h2r6" - id: 107 - display_name: "Van" -} -item { - name: "/m/081qc" - id: 108 - display_name: "Wine" -} -item { - name: "/m/09ddx" - id: 109 - display_name: "Duck" -} -item { - name: "/m/03p3bw" - id: 110 - display_name: "Bicycle helmet" -} -item { - name: "/m/0cyf8" - id: 111 - display_name: "Butterfly" -} -item { - name: "/m/0b_rs" - id: 112 - display_name: "Swimming pool" -} -item { - name: "/m/039xj_" - id: 113 - display_name: "Ear" -} -item { - name: "/m/021sj1" - id: 114 - display_name: "Office" -} -item { - name: "/m/0dv5r" - id: 115 - display_name: "Camera" -} -item { - name: "/m/01lynh" - id: 116 - display_name: "Stairs" -} -item { - name: "/m/06bt6" - id: 117 - display_name: "Reptile" -} -item { - name: "/m/01226z" - id: 118 - display_name: "Football" -} -item { - name: "/m/0fszt" - id: 119 - display_name: "Cake" -} -item { - name: "/m/050k8" - id: 120 - display_name: "Mobile phone" -} -item { - name: "/m/02wbtzl" - id: 121 - display_name: "Sun hat" -} -item { - name: "/m/02p5f1q" - id: 122 - display_name: "Coffee cup" -} -item { - name: "/m/025nd" - id: 123 - display_name: "Christmas tree" -} -item { - name: "/m/02522" - id: 124 - display_name: "Computer monitor" -} -item { - name: "/m/09ct_" - id: 125 - display_name: "Helicopter" -} -item { - name: "/m/0cvnqh" - id: 126 - display_name: "Bench" -} -item { - name: "/m/0d5gx" - id: 127 - display_name: "Castle" -} -item { - name: "/m/01xygc" - id: 128 - display_name: "Coat" -} -item { - name: "/m/04m6gz" - id: 129 - display_name: "Porch" -} -item { - name: "/m/01gkx_" - id: 130 - display_name: "Swimwear" -} -item { - name: "/m/01s105" - id: 131 - display_name: "Cabinetry" -} -item { - name: "/m/01j61q" - id: 132 - display_name: "Tent" -} -item { - name: "/m/0hnnb" - id: 133 - display_name: "Umbrella" -} -item { - name: "/m/01j51" - id: 134 - display_name: "Balloon" -} -item { - name: "/m/01knjb" - id: 135 - display_name: "Billboard" -} -item { - name: "/m/03__z0" - id: 136 - display_name: "Bookcase" -} -item { - name: "/m/01m2v" - id: 137 - display_name: "Computer keyboard" -} -item { - name: "/m/0167gd" - id: 138 - display_name: "Doll" -} -item { - name: "/m/0284d" - id: 139 - display_name: "Dairy" -} -item { - name: "/m/03ssj5" - id: 140 - display_name: "Bed" -} -item { - name: "/m/02fq_6" - id: 141 - display_name: "Fedora" -} -item { - name: "/m/06nwz" - id: 142 - display_name: "Seafood" -} -item { - name: "/m/0220r2" - id: 143 - display_name: "Fountain" -} -item { - name: "/m/01mqdt" - id: 144 - display_name: "Traffic sign" -} -item { - name: "/m/0268lbt" - id: 145 - display_name: "Hiking equipment" -} -item { - name: "/m/07c52" - id: 146 - display_name: "Television" -} -item { - name: "/m/0grw1" - id: 147 - display_name: "Salad" -} -item { - name: "/m/01h3n" - id: 148 - display_name: "Bee" -} -item { - name: "/m/078n6m" - id: 149 - display_name: "Coffee table" -} -item { - name: "/m/01xq0k1" - id: 150 - display_name: "Cattle" -} -item { - name: "/m/0gd2v" - id: 151 - display_name: "Marine mammal" -} -item { - name: "/m/0dbvp" - id: 152 - display_name: "Goose" -} -item { - name: "/m/03rszm" - id: 153 - display_name: "Curtain" -} -item { - name: "/m/0h8n5zk" - id: 154 - display_name: "Kitchen & dining room table" -} -item { - name: "/m/019dx1" - id: 155 - display_name: "Home appliance" -} -item { - name: "/m/03hl4l9" - id: 156 - display_name: "Marine invertebrates" -} -item { - name: "/m/0b3fp9" - id: 157 - display_name: "Countertop" -} -item { - name: "/m/02rdsp" - id: 158 - display_name: "Office supplies" -} -item { - name: "/m/0hf58v5" - id: 159 - display_name: "Luggage and bags" -} -item { - name: "/m/04h7h" - id: 160 - display_name: "Lighthouse" -} -item { - name: "/m/024g6" - id: 161 - display_name: "Cocktail" -} -item { - name: "/m/0cffdh" - id: 162 - display_name: "Maple" -} -item { - name: "/m/03q5c7" - id: 163 - display_name: "Saucer" -} -item { - name: "/m/014y4n" - id: 164 - display_name: "Paddle" -} -item { - name: "/m/01yx86" - id: 165 - display_name: "Bronze sculpture" -} -item { - name: "/m/020jm" - id: 166 - display_name: "Beetle" -} -item { - name: "/m/025dyy" - id: 167 - display_name: "Box" -} -item { - name: "/m/01llwg" - id: 168 - display_name: "Necklace" -} -item { - name: "/m/08pbxl" - id: 169 - display_name: "Monkey" -} -item { - name: "/m/02d9qx" - id: 170 - display_name: "Whiteboard" -} -item { - name: "/m/02pkr5" - id: 171 - display_name: "Plumbing fixture" -} -item { - name: "/m/0h99cwc" - id: 172 - display_name: "Kitchen appliance" -} -item { - name: "/m/050gv4" - id: 173 - display_name: "Plate" -} -item { - name: "/m/02vqfm" - id: 174 - display_name: "Coffee" -} -item { - name: "/m/09kx5" - id: 175 - display_name: "Deer" -} -item { - name: "/m/019w40" - id: 176 - display_name: "Surfboard" -} -item { - name: "/m/09dzg" - id: 177 - display_name: "Turtle" -} -item { - name: "/m/07k1x" - id: 178 - display_name: "Tool" -} -item { - name: "/m/080hkjn" - id: 179 - display_name: "Handbag" -} -item { - name: "/m/07qxg_" - id: 180 - display_name: "Football helmet" -} -item { - name: "/m/0ph39" - id: 181 - display_name: "Canoe" -} -item { - name: "/m/018p4k" - id: 182 - display_name: "Cart" -} -item { - name: "/m/02h19r" - id: 183 - display_name: "Scarf" -} -item { - name: "/m/015h_t" - id: 184 - display_name: "Beard" -} -item { - name: "/m/0fqfqc" - id: 185 - display_name: "Drawer" -} -item { - name: "/m/025rp__" - id: 186 - display_name: "Cowboy hat" -} -item { - name: "/m/01x3z" - id: 187 - display_name: "Clock" -} -item { - name: "/m/0crjs" - id: 188 - display_name: "Convenience store" -} -item { - name: "/m/0l515" - id: 189 - display_name: "Sandwich" -} -item { - name: "/m/015qff" - id: 190 - display_name: "Traffic light" -} -item { - name: "/m/09kmb" - id: 191 - display_name: "Spider" -} -item { - name: "/m/09728" - id: 192 - display_name: "Bread" -} -item { - name: "/m/071qp" - id: 193 - display_name: "Squirrel" -} -item { - name: "/m/02s195" - id: 194 - display_name: "Vase" -} -item { - name: "/m/06c54" - id: 195 - display_name: "Rifle" -} -item { - name: "/m/01xqw" - id: 196 - display_name: "Cello" -} -item { - name: "/m/05zsy" - id: 197 - display_name: "Pumpkin" -} -item { - name: "/m/0bwd_0j" - id: 198 - display_name: "Elephant" -} -item { - name: "/m/04m9y" - id: 199 - display_name: "Lizard" -} -item { - name: "/m/052sf" - id: 200 - display_name: "Mushroom" -} -item { - name: "/m/03grzl" - id: 201 - display_name: "Baseball glove" -} -item { - name: "/m/01z1kdw" - id: 202 - display_name: "Juice" -} -item { - name: "/m/02wv6h6" - id: 203 - display_name: "Skirt" -} -item { - name: "/m/016m2d" - id: 204 - display_name: "Skull" -} -item { - name: "/m/0dtln" - id: 205 - display_name: "Lamp" -} -item { - name: "/m/057cc" - id: 206 - display_name: "Musical keyboard" -} -item { - name: "/m/06k2mb" - id: 207 - display_name: "High heels" -} -item { - name: "/m/0f6wt" - id: 208 - display_name: "Falcon" -} -item { - name: "/m/0cxn2" - id: 209 - display_name: "Ice cream" -} -item { - name: "/m/02jvh9" - id: 210 - display_name: "Mug" -} -item { - name: "/m/0gjkl" - id: 211 - display_name: "Watch" -} -item { - name: "/m/01b638" - id: 212 - display_name: "Boot" -} -item { - name: "/m/071p9" - id: 213 - display_name: "Ski" -} -item { - name: "/m/0pg52" - id: 214 - display_name: "Taxi" -} -item { - name: "/m/0ftb8" - id: 215 - display_name: "Sunflower" -} -item { - name: "/m/0hnyx" - id: 216 - display_name: "Pastry" -} -item { - name: "/m/02jz0l" - id: 217 - display_name: "Tap" -} -item { - name: "/m/04kkgm" - id: 218 - display_name: "Bowl" -} -item { - name: "/m/0174n1" - id: 219 - display_name: "Glove" -} -item { - name: "/m/0gv1x" - id: 220 - display_name: "Parrot" -} -item { - name: "/m/09csl" - id: 221 - display_name: "Eagle" -} -item { - name: "/m/02jnhm" - id: 222 - display_name: "Tin can" -} -item { - name: "/m/099ssp" - id: 223 - display_name: "Platter" -} -item { - name: "/m/03nfch" - id: 224 - display_name: "Sandal" -} -item { - name: "/m/07y_7" - id: 225 - display_name: "Violin" -} -item { - name: "/m/05z6w" - id: 226 - display_name: "Penguin" -} -item { - name: "/m/03m3pdh" - id: 227 - display_name: "Sofa bed" -} -item { - name: "/m/09ld4" - id: 228 - display_name: "Frog" -} -item { - name: "/m/09b5t" - id: 229 - display_name: "Chicken" -} -item { - name: "/m/054xkw" - id: 230 - display_name: "Lifejacket" -} -item { - name: "/m/0130jx" - id: 231 - display_name: "Sink" -} -item { - name: "/m/07fbm7" - id: 232 - display_name: "Strawberry" -} -item { - name: "/m/01dws" - id: 233 - display_name: "Bear" -} -item { - name: "/m/01tcjp" - id: 234 - display_name: "Muffin" -} -item { - name: "/m/0dftk" - id: 235 - display_name: "Swan" -} -item { - name: "/m/0c06p" - id: 236 - display_name: "Candle" -} -item { - name: "/m/034c16" - id: 237 - display_name: "Pillow" -} -item { - name: "/m/09d5_" - id: 238 - display_name: "Owl" -} -item { - name: "/m/03hlz0c" - id: 239 - display_name: "Kitchen utensil" -} -item { - name: "/m/0ft9s" - id: 240 - display_name: "Dragonfly" -} -item { - name: "/m/011k07" - id: 241 - display_name: "Tortoise" -} -item { - name: "/m/054_l" - id: 242 - display_name: "Mirror" -} -item { - name: "/m/0jqgx" - id: 243 - display_name: "Lily" -} -item { - name: "/m/0663v" - id: 244 - display_name: "Pizza" -} -item { - name: "/m/0242l" - id: 245 - display_name: "Coin" -} -item { - name: "/m/014trl" - id: 246 - display_name: "Cosmetics" -} -item { - name: "/m/05r5c" - id: 247 - display_name: "Piano" -} -item { - name: "/m/07j87" - id: 248 - display_name: "Tomato" -} -item { - name: "/m/05kyg_" - id: 249 - display_name: "Chest of drawers" -} -item { - name: "/m/0kmg4" - id: 250 - display_name: "Teddy bear" -} -item { - name: "/m/07cmd" - id: 251 - display_name: "Tank" -} -item { - name: "/m/0dv77" - id: 252 - display_name: "Squash" -} -item { - name: "/m/096mb" - id: 253 - display_name: "Lion" -} -item { - name: "/m/01gmv2" - id: 254 - display_name: "Brassiere" -} -item { - name: "/m/07bgp" - id: 255 - display_name: "Sheep" -} -item { - name: "/m/0cmx8" - id: 256 - display_name: "Spoon" -} -item { - name: "/m/029tx" - id: 257 - display_name: "Dinosaur" -} -item { - name: "/m/073bxn" - id: 258 - display_name: "Tripod" -} -item { - name: "/m/0bh9flk" - id: 259 - display_name: "Tablet computer" -} -item { - name: "/m/06mf6" - id: 260 - display_name: "Rabbit" -} -item { - name: "/m/06_fw" - id: 261 - display_name: "Skateboard" -} -item { - name: "/m/078jl" - id: 262 - display_name: "Snake" -} -item { - name: "/m/0fbdv" - id: 263 - display_name: "Shellfish" -} -item { - name: "/m/0h23m" - id: 264 - display_name: "Sparrow" -} -item { - name: "/m/014j1m" - id: 265 - display_name: "Apple" -} -item { - name: "/m/03fwl" - id: 266 - display_name: "Goat" -} -item { - name: "/m/02y6n" - id: 267 - display_name: "French fries" -} -item { - name: "/m/06c7f7" - id: 268 - display_name: "Lipstick" -} -item { - name: "/m/026qbn5" - id: 269 - display_name: "studio couch" -} -item { - name: "/m/0cdn1" - id: 270 - display_name: "Hamburger" -} -item { - name: "/m/07clx" - id: 271 - display_name: "Tea" -} -item { - name: "/m/07cx4" - id: 272 - display_name: "Telephone" -} -item { - name: "/m/03g8mr" - id: 273 - display_name: "Baseball bat" -} -item { - name: "/m/0cnyhnx" - id: 274 - display_name: "Bull" -} -item { - name: "/m/01b7fy" - id: 275 - display_name: "Headphones" -} -item { - name: "/m/04gth" - id: 276 - display_name: "Lavender" -} -item { - name: "/m/0cyfs" - id: 277 - display_name: "Parachute" -} -item { - name: "/m/021mn" - id: 278 - display_name: "Cookie" -} -item { - name: "/m/07dm6" - id: 279 - display_name: "Tiger" -} -item { - name: "/m/0k1tl" - id: 280 - display_name: "Pen" -} -item { - name: "/m/0dv9c" - id: 281 - display_name: "Racket" -} -item { - name: "/m/0dt3t" - id: 282 - display_name: "Fork" -} -item { - name: "/m/04yqq2" - id: 283 - display_name: "Bust" -} -item { - name: "/m/01cmb2" - id: 284 - display_name: "Miniskirt" -} -item { - name: "/m/0gd36" - id: 285 - display_name: "Sea lion" -} -item { - name: "/m/033cnk" - id: 286 - display_name: "Egg" -} -item { - name: "/m/06ncr" - id: 287 - display_name: "Saxophone" -} -item { - name: "/m/03bk1" - id: 288 - display_name: "Giraffe" -} -item { - name: "/m/0bjyj5" - id: 289 - display_name: "Waste container" -} -item { - name: "/m/06__v" - id: 290 - display_name: "Snowboard" -} -item { - name: "/m/0qmmr" - id: 291 - display_name: "Wheelchair" -} -item { - name: "/m/01xgg_" - id: 292 - display_name: "Medical equipment" -} -item { - name: "/m/0czz2" - id: 293 - display_name: "Antelope" -} -item { - name: "/m/02l8p9" - id: 294 - display_name: "Harbor seal" -} -item { - name: "/m/09g1w" - id: 295 - display_name: "Toilet" -} -item { - name: "/m/0ll1f78" - id: 296 - display_name: "Shrimp" -} -item { - name: "/m/0cyhj_" - id: 297 - display_name: "Orange" -} -item { - name: "/m/0642b4" - id: 298 - display_name: "Cupboard" -} -item { - name: "/m/0h8mzrc" - id: 299 - display_name: "Wall clock" -} -item { - name: "/m/068zj" - id: 300 - display_name: "Pig" -} -item { - name: "/m/02z51p" - id: 301 - display_name: "Nightstand" -} -item { - name: "/m/0h8nr_l" - id: 302 - display_name: "Bathroom accessory" -} -item { - name: "/m/0388q" - id: 303 - display_name: "Grape" -} -item { - name: "/m/02hj4" - id: 304 - display_name: "Dolphin" -} -item { - name: "/m/01jfsr" - id: 305 - display_name: "Lantern" -} -item { - name: "/m/07gql" - id: 306 - display_name: "Trumpet" -} -item { - name: "/m/0h8my_4" - id: 307 - display_name: "Tennis racket" -} -item { - name: "/m/0n28_" - id: 308 - display_name: "Crab" -} -item { - name: "/m/0120dh" - id: 309 - display_name: "Sea turtle" -} -item { - name: "/m/020kz" - id: 310 - display_name: "Cannon" -} -item { - name: "/m/0mkg" - id: 311 - display_name: "Accordion" -} -item { - name: "/m/03c7gz" - id: 312 - display_name: "Door handle" -} -item { - name: "/m/09k_b" - id: 313 - display_name: "Lemon" -} -item { - name: "/m/031n1" - id: 314 - display_name: "Foot" -} -item { - name: "/m/04rmv" - id: 315 - display_name: "Mouse" -} -item { - name: "/m/084rd" - id: 316 - display_name: "Wok" -} -item { - name: "/m/02rgn06" - id: 317 - display_name: "Volleyball" -} -item { - name: "/m/05z55" - id: 318 - display_name: "Pasta" -} -item { - name: "/m/01r546" - id: 319 - display_name: "Earrings" -} -item { - name: "/m/09qck" - id: 320 - display_name: "Banana" -} -item { - name: "/m/012w5l" - id: 321 - display_name: "Ladder" -} -item { - name: "/m/01940j" - id: 322 - display_name: "Backpack" -} -item { - name: "/m/09f_2" - id: 323 - display_name: "Crocodile" -} -item { - name: "/m/02p3w7d" - id: 324 - display_name: "Roller skates" -} -item { - name: "/m/057p5t" - id: 325 - display_name: "Scoreboard" -} -item { - name: "/m/0d8zb" - id: 326 - display_name: "Jellyfish" -} -item { - name: "/m/01nq26" - id: 327 - display_name: "Sock" -} -item { - name: "/m/01x_v" - id: 328 - display_name: "Camel" -} -item { - name: "/m/05gqfk" - id: 329 - display_name: "Plastic bag" -} -item { - name: "/m/0cydv" - id: 330 - display_name: "Caterpillar" -} -item { - name: "/m/07030" - id: 331 - display_name: "Sushi" -} -item { - name: "/m/084zz" - id: 332 - display_name: "Whale" -} -item { - name: "/m/0c29q" - id: 333 - display_name: "Leopard" -} -item { - name: "/m/02zn6n" - id: 334 - display_name: "Barrel" -} -item { - name: "/m/03tw93" - id: 335 - display_name: "Fireplace" -} -item { - name: "/m/0fqt361" - id: 336 - display_name: "Stool" -} -item { - name: "/m/0f9_l" - id: 337 - display_name: "Snail" -} -item { - name: "/m/0gm28" - id: 338 - display_name: "Candy" -} -item { - name: "/m/09rvcxw" - id: 339 - display_name: "Rocket" -} -item { - name: "/m/01nkt" - id: 340 - display_name: "Cheese" -} -item { - name: "/m/04p0qw" - id: 341 - display_name: "Billiard table" -} -item { - name: "/m/03hj559" - id: 342 - display_name: "Mixing bowl" -} -item { - name: "/m/07pj7bq" - id: 343 - display_name: "Bowling equipment" -} -item { - name: "/m/04ctx" - id: 344 - display_name: "Knife" -} -item { - name: "/m/0703r8" - id: 345 - display_name: "Loveseat" -} -item { - name: "/m/03qrc" - id: 346 - display_name: "Hamster" -} -item { - name: "/m/020lf" - id: 347 - display_name: "Mouse" -} -item { - name: "/m/0by6g" - id: 348 - display_name: "Shark" -} -item { - name: "/m/01fh4r" - id: 349 - display_name: "Teapot" -} -item { - name: "/m/07c6l" - id: 350 - display_name: "Trombone" -} -item { - name: "/m/03bj1" - id: 351 - display_name: "Panda" -} -item { - name: "/m/0898b" - id: 352 - display_name: "Zebra" -} -item { - name: "/m/02x984l" - id: 353 - display_name: "Mechanical fan" -} -item { - name: "/m/0fj52s" - id: 354 - display_name: "Carrot" -} -item { - name: "/m/0cd4d" - id: 355 - display_name: "Cheetah" -} -item { - name: "/m/02068x" - id: 356 - display_name: "Gondola" -} -item { - name: "/m/01vbnl" - id: 357 - display_name: "Bidet" -} -item { - name: "/m/0449p" - id: 358 - display_name: "Jaguar" -} -item { - name: "/m/0gj37" - id: 359 - display_name: "Ladybug" -} -item { - name: "/m/0nl46" - id: 360 - display_name: "Crown" -} -item { - name: "/m/0152hh" - id: 361 - display_name: "Snowman" -} -item { - name: "/m/03dnzn" - id: 362 - display_name: "Bathtub" -} -item { - name: "/m/05_5p_0" - id: 363 - display_name: "Table tennis racket" -} -item { - name: "/m/02jfl0" - id: 364 - display_name: "Sombrero" -} -item { - name: "/m/01dxs" - id: 365 - display_name: "Brown bear" -} -item { - name: "/m/0cjq5" - id: 366 - display_name: "Lobster" -} -item { - name: "/m/040b_t" - id: 367 - display_name: "Refrigerator" -} -item { - name: "/m/0_cp5" - id: 368 - display_name: "Oyster" -} -item { - name: "/m/0gxl3" - id: 369 - display_name: "Handgun" -} -item { - name: "/m/029bxz" - id: 370 - display_name: "Oven" -} -item { - name: "/m/02zt3" - id: 371 - display_name: "Kite" -} -item { - name: "/m/03d443" - id: 372 - display_name: "Rhinoceros" -} -item { - name: "/m/0306r" - id: 373 - display_name: "Fox" -} -item { - name: "/m/0h8l4fh" - id: 374 - display_name: "Light bulb" -} -item { - name: "/m/0633h" - id: 375 - display_name: "Polar bear" -} -item { - name: "/m/01s55n" - id: 376 - display_name: "Suitcase" -} -item { - name: "/m/0hkxq" - id: 377 - display_name: "Broccoli" -} -item { - name: "/m/0cn6p" - id: 378 - display_name: "Otter" -} -item { - name: "/m/0dbzx" - id: 379 - display_name: "Mule" -} -item { - name: "/m/01dy8n" - id: 380 - display_name: "Woodpecker" -} -item { - name: "/m/01h8tj" - id: 381 - display_name: "Starfish" -} -item { - name: "/m/03s_tn" - id: 382 - display_name: "Kettle" -} -item { - name: "/m/01xs3r" - id: 383 - display_name: "Jet ski" -} -item { - name: "/m/031b6r" - id: 384 - display_name: "Window blind" -} -item { - name: "/m/06j2d" - id: 385 - display_name: "Raven" -} -item { - name: "/m/0hqkz" - id: 386 - display_name: "Grapefruit" -} -item { - name: "/m/01_5g" - id: 387 - display_name: "Chopsticks" -} -item { - name: "/m/02zvsm" - id: 388 - display_name: "Tart" -} -item { - name: "/m/0kpqd" - id: 389 - display_name: "Watermelon" -} -item { - name: "/m/015x4r" - id: 390 - display_name: "Cucumber" -} -item { - name: "/m/061hd_" - id: 391 - display_name: "Infant bed" -} -item { - name: "/m/04ylt" - id: 392 - display_name: "Missile" -} -item { - name: "/m/02wv84t" - id: 393 - display_name: "Gas stove" -} -item { - name: "/m/04y4h8h" - id: 394 - display_name: "Bathroom cabinet" -} -item { - name: "/m/01gllr" - id: 395 - display_name: "Beehive" -} -item { - name: "/m/0pcr" - id: 396 - display_name: "Alpaca" -} -item { - name: "/m/0jy4k" - id: 397 - display_name: "Doughnut" -} -item { - name: "/m/09f20" - id: 398 - display_name: "Hippopotamus" -} -item { - name: "/m/0mcx2" - id: 399 - display_name: "Ipod" -} -item { - name: "/m/04c0y" - id: 400 - display_name: "Kangaroo" -} -item { - name: "/m/0_k2" - id: 401 - display_name: "Ant" -} -item { - name: "/m/0jg57" - id: 402 - display_name: "Bell pepper" -} -item { - name: "/m/03fj2" - id: 403 - display_name: "Goldfish" -} -item { - name: "/m/03ldnb" - id: 404 - display_name: "Ceiling fan" -} -item { - name: "/m/06nrc" - id: 405 - display_name: "Shotgun" -} -item { - name: "/m/01btn" - id: 406 - display_name: "Barge" -} -item { - name: "/m/05vtc" - id: 407 - display_name: "Potato" -} -item { - name: "/m/08hvt4" - id: 408 - display_name: "Jug" -} -item { - name: "/m/0fx9l" - id: 409 - display_name: "Microwave oven" -} -item { - name: "/m/01h44" - id: 410 - display_name: "Bat" -} -item { - name: "/m/05n4y" - id: 411 - display_name: "Ostrich" -} -item { - name: "/m/0jly1" - id: 412 - display_name: "Turkey" -} -item { - name: "/m/06y5r" - id: 413 - display_name: "Sword" -} -item { - name: "/m/05ctyq" - id: 414 - display_name: "Tennis ball" -} -item { - name: "/m/0fp6w" - id: 415 - display_name: "Pineapple" -} -item { - name: "/m/0d4w1" - id: 416 - display_name: "Closet" -} -item { - name: "/m/02pv19" - id: 417 - display_name: "Stop sign" -} -item { - name: "/m/07crc" - id: 418 - display_name: "Taco" -} -item { - name: "/m/01dwwc" - id: 419 - display_name: "Pancake" -} -item { - name: "/m/01b9xk" - id: 420 - display_name: "Hot dog" -} -item { - name: "/m/013y1f" - id: 421 - display_name: "Organ" -} -item { - name: "/m/0m53l" - id: 422 - display_name: "Rays and skates" -} -item { - name: "/m/0174k2" - id: 423 - display_name: "Washing machine" -} -item { - name: "/m/01dwsz" - id: 424 - display_name: "Waffle" -} -item { - name: "/m/04vv5k" - id: 425 - display_name: "Snowplow" -} -item { - name: "/m/04cp_" - id: 426 - display_name: "Koala" -} -item { - name: "/m/0fz0h" - id: 427 - display_name: "Honeycomb" -} -item { - name: "/m/0llzx" - id: 428 - display_name: "Sewing machine" -} -item { - name: "/m/0319l" - id: 429 - display_name: "Horn" -} -item { - name: "/m/04v6l4" - id: 430 - display_name: "Frying pan" -} -item { - name: "/m/0dkzw" - id: 431 - display_name: "Seat belt" -} -item { - name: "/m/027pcv" - id: 432 - display_name: "Zucchini" -} -item { - name: "/m/0323sq" - id: 433 - display_name: "Golf cart" -} -item { - name: "/m/054fyh" - id: 434 - display_name: "Pitcher" -} -item { - name: "/m/01pns0" - id: 435 - display_name: "Fire hydrant" -} -item { - name: "/m/012n7d" - id: 436 - display_name: "Ambulance" -} -item { - name: "/m/044r5d" - id: 437 - display_name: "Golf ball" -} -item { - name: "/m/01krhy" - id: 438 - display_name: "Tiara" -} -item { - name: "/m/0dq75" - id: 439 - display_name: "Raccoon" -} -item { - name: "/m/0176mf" - id: 440 - display_name: "Belt" -} -item { - name: "/m/0h8lkj8" - id: 441 - display_name: "Corded phone" -} -item { - name: "/m/04tn4x" - id: 442 - display_name: "Swim cap" -} -item { - name: "/m/06l9r" - id: 443 - display_name: "Red panda" -} -item { - name: "/m/0cjs7" - id: 444 - display_name: "Asparagus" -} -item { - name: "/m/01lsmm" - id: 445 - display_name: "Scissors" -} -item { - name: "/m/01lcw4" - id: 446 - display_name: "Limousine" -} -item { - name: "/m/047j0r" - id: 447 - display_name: "Filing cabinet" -} -item { - name: "/m/01fb_0" - id: 448 - display_name: "Bagel" -} -item { - name: "/m/04169hn" - id: 449 - display_name: "Wood-burning stove" -} -item { - name: "/m/076bq" - id: 450 - display_name: "Segway" -} -item { - name: "/m/0hdln" - id: 451 - display_name: "Ruler" -} -item { - name: "/m/01g3x7" - id: 452 - display_name: "Bow and arrow" -} -item { - name: "/m/0l3ms" - id: 453 - display_name: "Balance beam" -} -item { - name: "/m/058qzx" - id: 454 - display_name: "Kitchen knife" -} -item { - name: "/m/0h8n6ft" - id: 455 - display_name: "Cake stand" -} -item { - name: "/m/018j2" - id: 456 - display_name: "Banjo" -} -item { - name: "/m/0l14j_" - id: 457 - display_name: "Flute" -} -item { - name: "/m/0wdt60w" - id: 458 - display_name: "Rugby ball" -} -item { - name: "/m/02gzp" - id: 459 - display_name: "Dagger" -} -item { - name: "/m/0h8n6f9" - id: 460 - display_name: "Dog bed" -} -item { - name: "/m/0fbw6" - id: 461 - display_name: "Cabbage" -} -item { - name: "/m/07kng9" - id: 462 - display_name: "Picnic basket" -} -item { - name: "/m/0dj6p" - id: 463 - display_name: "Peach" -} -item { - name: "/m/06pcq" - id: 464 - display_name: "Submarine sandwich" -} -item { - name: "/m/061_f" - id: 465 - display_name: "Pear" -} -item { - name: "/m/04g2r" - id: 466 - display_name: "Lynx" -} -item { - name: "/m/0jwn_" - id: 467 - display_name: "Pomegranate" -} -item { - name: "/m/02f9f_" - id: 468 - display_name: "Shower" -} -item { - name: "/m/01f8m5" - id: 469 - display_name: "Blue jay" -} -item { - name: "/m/01m4t" - id: 470 - display_name: "Printer" -} -item { - name: "/m/0cl4p" - id: 471 - display_name: "Hedgehog" -} -item { - name: "/m/07xyvk" - id: 472 - display_name: "Coffeemaker" -} -item { - name: "/m/084hf" - id: 473 - display_name: "Worm" -} -item { - name: "/m/03v5tg" - id: 474 - display_name: "Drinking straw" -} -item { - name: "/m/0qjjc" - id: 475 - display_name: "Remote control" -} -item { - name: "/m/015x5n" - id: 476 - display_name: "Radish" -} -item { - name: "/m/0ccs93" - id: 477 - display_name: "Canary" -} -item { - name: "/m/0nybt" - id: 478 - display_name: "Seahorse" -} -item { - name: "/m/02vkqh8" - id: 479 - display_name: "Wardrobe" -} -item { - name: "/m/09gtd" - id: 480 - display_name: "Toilet paper" -} -item { - name: "/m/019h78" - id: 481 - display_name: "Centipede" -} -item { - name: "/m/015wgc" - id: 482 - display_name: "Croissant" -} -item { - name: "/m/01x3jk" - id: 483 - display_name: "Snowmobile" -} -item { - name: "/m/01j3zr" - id: 484 - display_name: "Burrito" -} -item { - name: "/m/0c568" - id: 485 - display_name: "Porcupine" -} -item { - name: "/m/02pdsw" - id: 486 - display_name: "Cutting board" -} -item { - name: "/m/029b3" - id: 487 - display_name: "Dice" -} -item { - name: "/m/03q5t" - id: 488 - display_name: "Harpsichord" -} -item { - name: "/m/0p833" - id: 489 - display_name: "Perfume" -} -item { - name: "/m/01d380" - id: 490 - display_name: "Drill" -} -item { - name: "/m/024d2" - id: 491 - display_name: "Calculator" -} -item { - name: "/m/0mw_6" - id: 492 - display_name: "Willow" -} -item { - name: "/m/01f91_" - id: 493 - display_name: "Pretzel" -} -item { - name: "/m/02g30s" - id: 494 - display_name: "Guacamole" -} -item { - name: "/m/01hrv5" - id: 495 - display_name: "Popcorn" -} -item { - name: "/m/03m5k" - id: 496 - display_name: "Harp" -} -item { - name: "/m/0162_1" - id: 497 - display_name: "Towel" -} -item { - name: "/m/063rgb" - id: 498 - display_name: "Mixer" -} -item { - name: "/m/06_72j" - id: 499 - display_name: "Digital clock" -} -item { - name: "/m/046dlr" - id: 500 - display_name: "Alarm clock" -} -item { - name: "/m/047v4b" - id: 501 - display_name: "Artichoke" -} -item { - name: "/m/04zpv" - id: 502 - display_name: "Milk" -} -item { - name: "/m/043nyj" - id: 503 - display_name: "Common fig" -} -item { - name: "/m/03bbps" - id: 504 - display_name: "Power plugs and sockets" -} -item { - name: "/m/02w3r3" - id: 505 - display_name: "Paper towel" -} -item { - name: "/m/02pjr4" - id: 506 - display_name: "Blender" -} -item { - name: "/m/0755b" - id: 507 - display_name: "Scorpion" -} -item { - name: "/m/02lbcq" - id: 508 - display_name: "Stretcher" -} -item { - name: "/m/0fldg" - id: 509 - display_name: "Mango" -} -item { - name: "/m/012074" - id: 510 - display_name: "Magpie" -} -item { - name: "/m/035vxb" - id: 511 - display_name: "Isopod" -} -item { - name: "/m/02w3_ws" - id: 512 - display_name: "Personal care" -} -item { - name: "/m/0f6nr" - id: 513 - display_name: "Unicycle" -} -item { - name: "/m/0420v5" - id: 514 - display_name: "Punching bag" -} -item { - name: "/m/0frqm" - id: 515 - display_name: "Envelope" -} -item { - name: "/m/03txqz" - id: 516 - display_name: "Scale" -} -item { - name: "/m/0271qf7" - id: 517 - display_name: "Wine rack" -} -item { - name: "/m/074d1" - id: 518 - display_name: "Submarine" -} -item { - name: "/m/08p92x" - id: 519 - display_name: "Cream" -} -item { - name: "/m/01j4z9" - id: 520 - display_name: "Chainsaw" -} -item { - name: "/m/0kpt_" - id: 521 - display_name: "Cantaloupe" -} -item { - name: "/m/0h8n27j" - id: 522 - display_name: "Serving tray" -} -item { - name: "/m/03y6mg" - id: 523 - display_name: "Food processor" -} -item { - name: "/m/04h8sr" - id: 524 - display_name: "Dumbbell" -} -item { - name: "/m/065h6l" - id: 525 - display_name: "Jacuzzi" -} -item { - name: "/m/02tsc9" - id: 526 - display_name: "Slow cooker" -} -item { - name: "/m/012ysf" - id: 527 - display_name: "Syringe" -} -item { - name: "/m/0ky7b" - id: 528 - display_name: "Dishwasher" -} -item { - name: "/m/02wg_p" - id: 529 - display_name: "Tree house" -} -item { - name: "/m/0584n8" - id: 530 - display_name: "Briefcase" -} -item { - name: "/m/03kt2w" - id: 531 - display_name: "Stationary bicycle" -} -item { - name: "/m/05kms" - id: 532 - display_name: "Oboe" -} -item { - name: "/m/030610" - id: 533 - display_name: "Treadmill" -} -item { - name: "/m/0lt4_" - id: 534 - display_name: "Binoculars" -} -item { - name: "/m/076lb9" - id: 535 - display_name: "Bench" -} -item { - name: "/m/02ctlc" - id: 536 - display_name: "Cricket ball" -} -item { - name: "/m/02x8cch" - id: 537 - display_name: "Salt and pepper shakers" -} -item { - name: "/m/09gys" - id: 538 - display_name: "Squid" -} -item { - name: "/m/03jbxj" - id: 539 - display_name: "Light switch" -} -item { - name: "/m/012xff" - id: 540 - display_name: "Toothbrush" -} -item { - name: "/m/0h8kx63" - id: 541 - display_name: "Spice rack" -} -item { - name: "/m/073g6" - id: 542 - display_name: "Stethoscope" -} -item { - name: "/m/02cvgx" - id: 543 - display_name: "Winter melon" -} -item { - name: "/m/027rl48" - id: 544 - display_name: "Ladle" -} -item { - name: "/m/01kb5b" - id: 545 - display_name: "Flashlight" -} diff --git a/object_detection/data/pascal_label_map.pbtxt b/object_detection/data/pascal_label_map.pbtxt deleted file mode 100644 index c9e9e2af..00000000 --- a/object_detection/data/pascal_label_map.pbtxt +++ /dev/null @@ -1,99 +0,0 @@ -item { - id: 1 - name: 'aeroplane' -} - -item { - id: 2 - name: 'bicycle' -} - -item { - id: 3 - name: 'bird' -} - -item { - id: 4 - name: 'boat' -} - -item { - id: 5 - name: 'bottle' -} - -item { - id: 6 - name: 'bus' -} - -item { - id: 7 - name: 'car' -} - -item { - id: 8 - name: 'cat' -} - -item { - id: 9 - name: 'chair' -} - -item { - id: 10 - name: 'cow' -} - -item { - id: 11 - name: 'diningtable' -} - -item { - id: 12 - name: 'dog' -} - -item { - id: 13 - name: 'horse' -} - -item { - id: 14 - name: 'motorbike' -} - -item { - id: 15 - name: 'person' -} - -item { - id: 16 - name: 'pottedplant' -} - -item { - id: 17 - name: 'sheep' -} - -item { - id: 18 - name: 'sofa' -} - -item { - id: 19 - name: 'train' -} - -item { - id: 20 - name: 'tvmonitor' -} diff --git a/object_detection/data/pet_label_map.pbtxt b/object_detection/data/pet_label_map.pbtxt deleted file mode 100644 index 54d7d351..00000000 --- a/object_detection/data/pet_label_map.pbtxt +++ /dev/null @@ -1,184 +0,0 @@ -item { - id: 1 - name: 'Abyssinian' -} - -item { - id: 2 - name: 'american_bulldog' -} - -item { - id: 3 - name: 'american_pit_bull_terrier' -} - -item { - id: 4 - name: 'basset_hound' -} - -item { - id: 5 - name: 'beagle' -} - -item { - id: 6 - name: 'Bengal' -} - -item { - id: 7 - name: 'Birman' -} - -item { - id: 8 - name: 'Bombay' -} - -item { - id: 9 - name: 'boxer' -} - -item { - id: 10 - name: 'British_Shorthair' -} - -item { - id: 11 - name: 'chihuahua' -} - -item { - id: 12 - name: 'Egyptian_Mau' -} - -item { - id: 13 - name: 'english_cocker_spaniel' -} - -item { - id: 14 - name: 'english_setter' -} - -item { - id: 15 - name: 'german_shorthaired' -} - -item { - id: 16 - name: 'great_pyrenees' -} - -item { - id: 17 - name: 'havanese' -} - -item { - id: 18 - name: 'japanese_chin' -} - -item { - id: 19 - name: 'keeshond' -} - -item { - id: 20 - name: 'leonberger' -} - -item { - id: 21 - name: 'Maine_Coon' -} - -item { - id: 22 - name: 'miniature_pinscher' -} - -item { - id: 23 - name: 'newfoundland' -} - -item { - id: 24 - name: 'Persian' -} - -item { - id: 25 - name: 'pomeranian' -} - -item { - id: 26 - name: 'pug' -} - -item { - id: 27 - name: 'Ragdoll' -} - -item { - id: 28 - name: 'Russian_Blue' -} - -item { - id: 29 - name: 'saint_bernard' -} - -item { - id: 30 - name: 'samoyed' -} - -item { - id: 31 - name: 'scottish_terrier' -} - -item { - id: 32 - name: 'shiba_inu' -} - -item { - id: 33 - name: 'Siamese' -} - -item { - id: 34 - name: 'Sphynx' -} - -item { - id: 35 - name: 'staffordshire_bull_terrier' -} - -item { - id: 36 - name: 'wheaten_terrier' -} - -item { - id: 37 - name: 'yorkshire_terrier' -} diff --git a/object_detection/data_decoders/BUILD b/object_detection/data_decoders/BUILD deleted file mode 100644 index d6b48ac0..00000000 --- a/object_detection/data_decoders/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -# Tensorflow Object Detection API: data decoders. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) -# Apache 2.0 - -py_library( - name = "tf_example_decoder", - srcs = ["tf_example_decoder.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:data_decoder", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) - -py_test( - name = "tf_example_decoder_test", - srcs = ["tf_example_decoder_test.py"], - deps = [ - ":tf_example_decoder", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) diff --git a/object_detection/data_decoders/__init__.py b/object_detection/data_decoders/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc b/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index dd75c900..00000000 Binary files a/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc b/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc deleted file mode 100644 index 9e5bf93a..00000000 Binary files a/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc and /dev/null differ diff --git a/object_detection/data_decoders/tf_example_decoder.py b/object_detection/data_decoders/tf_example_decoder.py deleted file mode 100644 index 4dc3dc5c..00000000 --- a/object_detection/data_decoders/tf_example_decoder.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tensorflow Example proto decoder for object detection. - -A decoder to decode string tensors containing serialized tensorflow.Example -protos for object detection. -""" -import tensorflow as tf - -from object_detection.core import data_decoder -from object_detection.core import standard_fields as fields -from object_detection.utils import label_map_util - -slim_example_decoder = tf.contrib.slim.tfexample_decoder - - -class TfExampleDecoder(data_decoder.DataDecoder): - """Tensorflow Example proto decoder.""" - - def __init__(self, - load_instance_masks=False, - label_map_proto_file=None, - use_display_name=False): - """Constructor sets keys_to_features and items_to_handlers. - - Args: - load_instance_masks: whether or not to load and handle instance masks. - label_map_proto_file: a file path to a - object_detection.protos.StringIntLabelMap proto. If provided, then the - mapped IDs of 'image/object/class/text' will take precedence over the - existing 'image/object/class/label' ID. Also, if provided, it is - assumed that 'image/object/class/text' will be in the data. - use_display_name: whether or not to use the `display_name` for label - mapping (instead of `name`). Only used if label_map_proto_file is - provided. - """ - self.keys_to_features = { - 'image/encoded': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': - tf.FixedLenFeature((), tf.string, default_value='jpeg'), - 'image/filename': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/key/sha256': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/source_id': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/height': - tf.FixedLenFeature((), tf.int64, 1), - 'image/width': - tf.FixedLenFeature((), tf.int64, 1), - # Object boxes and classes. - 'image/object/bbox/xmin': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/xmax': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/ymin': - tf.VarLenFeature(tf.float32), - 'image/object/bbox/ymax': - tf.VarLenFeature(tf.float32), - 'image/object/class/label': - tf.VarLenFeature(tf.int64), - 'image/object/class/text': - tf.VarLenFeature(tf.string), - 'image/object/area': - tf.VarLenFeature(tf.float32), - 'image/object/is_crowd': - tf.VarLenFeature(tf.int64), - 'image/object/difficult': - tf.VarLenFeature(tf.int64), - 'image/object/group_of': - tf.VarLenFeature(tf.int64), - } - self.items_to_handlers = { - fields.InputDataFields.image: slim_example_decoder.Image( - image_key='image/encoded', format_key='image/format', channels=3), - fields.InputDataFields.source_id: ( - slim_example_decoder.Tensor('image/source_id')), - fields.InputDataFields.key: ( - slim_example_decoder.Tensor('image/key/sha256')), - fields.InputDataFields.filename: ( - slim_example_decoder.Tensor('image/filename')), - # Object boxes and classes. - fields.InputDataFields.groundtruth_boxes: ( - slim_example_decoder.BoundingBox( - ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')), - fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor( - 'image/object/area'), - fields.InputDataFields.groundtruth_is_crowd: ( - slim_example_decoder.Tensor('image/object/is_crowd')), - fields.InputDataFields.groundtruth_difficult: ( - slim_example_decoder.Tensor('image/object/difficult')), - fields.InputDataFields.groundtruth_group_of: ( - slim_example_decoder.Tensor('image/object/group_of')) - } - if load_instance_masks: - self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.float32) - self.items_to_handlers[ - fields.InputDataFields.groundtruth_instance_masks] = ( - slim_example_decoder.ItemHandlerCallback( - ['image/object/mask', 'image/height', 'image/width'], - self._reshape_instance_masks)) - # TODO: Add label_handler that decodes from 'image/object/class/text' - # primarily after the recent tf.contrib.slim changes make into a release - # supported by cloudml. - label_handler = slim_example_decoder.Tensor('image/object/class/label') - self.items_to_handlers[ - fields.InputDataFields.groundtruth_classes] = label_handler - - def decode(self, tf_example_string_tensor): - """Decodes serialized tensorflow example and returns a tensor dictionary. - - Args: - tf_example_string_tensor: a string tensor holding a serialized tensorflow - example proto. - - Returns: - A dictionary of the following tensors. - fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] - containing image. - fields.InputDataFields.source_id - string tensor containing original - image id. - fields.InputDataFields.key - string tensor with unique sha256 hash key. - fields.InputDataFields.filename - string tensor with original dataset - filename. - fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape - [None, 4] containing box corners. - fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape - [None] containing classes for the boxes. - fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape - [None] containing containing object mask area in pixel squared. - fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape - [None] indicating if the boxes enclose a crowd. - Optional: - fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape - [None] indicating if the boxes represent `difficult` instances. - fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape - [None] indicating if the boxes represent `group_of` instances. - fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of - shape [None, None, None] containing instance masks. - """ - serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) - decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, - self.items_to_handlers) - keys = decoder.list_items() - tensors = decoder.decode(serialized_example, items=keys) - tensor_dict = dict(zip(keys, tensors)) - is_crowd = fields.InputDataFields.groundtruth_is_crowd - tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) - tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) - return tensor_dict - - def _reshape_instance_masks(self, keys_to_tensors): - """Reshape instance segmentation masks. - - The instance segmentation masks are reshaped to [num_instances, height, - width] and cast to boolean type to save memory. - - Args: - keys_to_tensors: a dictionary from keys to tensors. - - Returns: - A 3-D float tensor of shape [num_instances, height, width] with values - in {0, 1}. - """ - height = keys_to_tensors['image/height'] - width = keys_to_tensors['image/width'] - to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32) - masks = keys_to_tensors['image/object/mask'] - if isinstance(masks, tf.SparseTensor): - masks = tf.sparse_tensor_to_dense(masks) - masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape) - return tf.cast(masks, tf.float32) diff --git a/object_detection/data_decoders/tf_example_decoder_test.py b/object_detection/data_decoders/tf_example_decoder_test.py deleted file mode 100644 index 04d00531..00000000 --- a/object_detection/data_decoders/tf_example_decoder_test.py +++ /dev/null @@ -1,350 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.data_decoders.tf_example_decoder.""" - -import os -import numpy as np -import tensorflow as tf - -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder - - -class TfExampleDecoderTest(tf.test.TestCase): - - def _EncodeImage(self, image_tensor, encoding_type='jpeg'): - with self.test_session(): - if encoding_type == 'jpeg': - image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() - elif encoding_type == 'png': - image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval() - else: - raise ValueError('Invalid encoding type.') - return image_encoded - - def _DecodeImage(self, image_encoded, encoding_type='jpeg'): - with self.test_session(): - if encoding_type == 'jpeg': - image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval() - elif encoding_type == 'png': - image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval() - else: - raise ValueError('Invalid encoding type.') - return image_decoded - - def _Int64Feature(self, value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - def _FloatFeature(self, value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - def _BytesFeature(self, value): - if isinstance(value, list): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def testDecodeJpegImage(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - decoded_jpeg = self._DecodeImage(encoded_jpeg) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/source_id': self._BytesFeature('image_id'), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. - get_shape().as_list()), [None, None, 3]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) - self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) - - def testDecodeImageKeyAndFilename(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/key/sha256': self._BytesFeature('abc'), - 'image/filename': self._BytesFeature('filename') - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertEqual('abc', tensor_dict[fields.InputDataFields.key]) - self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename]) - - def testDecodePngImage(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_png = self._EncodeImage(image_tensor, encoding_type='png') - decoded_png = self._DecodeImage(encoded_png, encoding_type='png') - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_png), - 'image/format': self._BytesFeature('png'), - 'image/source_id': self._BytesFeature('image_id') - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. - get_shape().as_list()), [None, None, 3]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) - self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) - - def testDecodeBoundingBox(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_ymins = [0.0, 4.0] - bbox_xmins = [1.0, 5.0] - bbox_ymaxs = [2.0, 6.0] - bbox_xmaxs = [3.0, 7.0] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), - 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), - 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), - 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. - get_shape().as_list()), [None, 4]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - expected_boxes = np.vstack([bbox_ymins, bbox_xmins, - bbox_ymaxs, bbox_xmaxs]).transpose() - self.assertAllEqual(expected_boxes, - tensor_dict[fields.InputDataFields.groundtruth_boxes]) - - def testDecodeObjectLabel(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - bbox_classes = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/class/label': self._Int64Feature(bbox_classes), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_classes].get_shape().as_list()), - [None]) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(bbox_classes, - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testDecodeObjectArea(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_area = [100., 174.] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/area': self._FloatFeature(object_area), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]. - get_shape().as_list()), [None]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual(object_area, - tensor_dict[fields.InputDataFields.groundtruth_area]) - - def testDecodeObjectIsCrowd(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_is_crowd = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/is_crowd': self._Int64Feature(object_is_crowd), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()), - [None]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([bool(item) for item in object_is_crowd], - tensor_dict[ - fields.InputDataFields.groundtruth_is_crowd]) - - def testDecodeObjectDifficult(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_difficult = [0, 1] - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/difficult': self._Int64Feature(object_difficult), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_difficult].get_shape().as_list()), - [None]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual([bool(item) for item in object_difficult], - tensor_dict[ - fields.InputDataFields.groundtruth_difficult]) - - def testDecodeObjectGroupOf(self): - image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - object_group_of = [0, 1] - example = tf.train.Example(features=tf.train.Features( - feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/object/group_of': self._Int64Feature(object_group_of), - })).SerializeToString() - - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual((tensor_dict[ - fields.InputDataFields.groundtruth_group_of].get_shape().as_list()), - [None]) - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - [bool(item) for item in object_group_of], - tensor_dict[fields.InputDataFields.groundtruth_group_of]) - - def testDecodeInstanceSegmentation(self): - num_instances = 4 - image_height = 5 - image_width = 3 - - # Randomly generate image. - image_tensor = np.random.randint(255, size=(image_height, - image_width, - 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - - # Randomly generate instance segmentation masks. - instance_masks = ( - np.random.randint(2, size=(num_instances, - image_height, - image_width)).astype(np.float32)) - instance_masks_flattened = np.reshape(instance_masks, [-1]) - - # Randomly generate class labels for each instance. - object_classes = np.random.randint( - 100, size=(num_instances)).astype(np.int64) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/height': self._Int64Feature([image_height]), - 'image/width': self._Int64Feature([image_width]), - 'image/object/mask': self._FloatFeature(instance_masks_flattened), - 'image/object/class/label': self._Int64Feature( - object_classes)})).SerializeToString() - example_decoder = tf_example_decoder.TfExampleDecoder( - load_instance_masks=True) - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - - self.assertAllEqual(( - tensor_dict[fields.InputDataFields.groundtruth_instance_masks]. - get_shape().as_list()), [None, None, None]) - - self.assertAllEqual(( - tensor_dict[fields.InputDataFields.groundtruth_classes]. - get_shape().as_list()), [None]) - - with self.test_session() as sess: - tensor_dict = sess.run(tensor_dict) - - self.assertAllEqual( - instance_masks.astype(np.float32), - tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) - self.assertAllEqual( - object_classes, - tensor_dict[fields.InputDataFields.groundtruth_classes]) - - def testInstancesNotAvailableByDefault(self): - num_instances = 4 - image_height = 5 - image_width = 3 - # Randomly generate image. - image_tensor = np.random.randint(255, size=(image_height, - image_width, - 3)).astype(np.uint8) - encoded_jpeg = self._EncodeImage(image_tensor) - - # Randomly generate instance segmentation masks. - instance_masks = ( - np.random.randint(2, size=(num_instances, - image_height, - image_width)).astype(np.float32)) - instance_masks_flattened = np.reshape(instance_masks, [-1]) - - # Randomly generate class labels for each instance. - object_classes = np.random.randint( - 100, size=(num_instances)).astype(np.int64) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': self._BytesFeature(encoded_jpeg), - 'image/format': self._BytesFeature('jpeg'), - 'image/height': self._Int64Feature([image_height]), - 'image/width': self._Int64Feature([image_width]), - 'image/object/mask': self._FloatFeature(instance_masks_flattened), - 'image/object/class/label': self._Int64Feature( - object_classes)})).SerializeToString() - example_decoder = tf_example_decoder.TfExampleDecoder() - tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) - self.assertTrue(fields.InputDataFields.groundtruth_instance_masks - not in tensor_dict) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/dataset_tools/BUILD b/object_detection/dataset_tools/BUILD deleted file mode 100644 index bb5ce2e5..00000000 --- a/object_detection/dataset_tools/BUILD +++ /dev/null @@ -1,107 +0,0 @@ -# Tensorflow Object Detection API: main runnables. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_binary( - name = "create_kitti_tf_record", - srcs = [ - "create_kitti_tf_record.py", - ], - deps = [ - "//third_party/py/PIL:pil", - "//third_party/py/lxml", - "//tensorflow", - "//tensorflow_models/object_detection/utils:dataset_util", - "//tensorflow_models/object_detection/utils:label_map_util", - "//tensorflow_models/object_detection/utils:np_box_ops", - ], -) - -py_test( - name = "create_kitti_tf_record_test", - srcs = [ - "create_kitti_tf_record_test.py", - ], - deps = [ - ":create_kitti_tf_record", - "//tensorflow", - ], -) - -py_binary( - name = "create_pascal_tf_record", - srcs = [ - "create_pascal_tf_record.py", - ], - deps = [ - "//third_party/py/PIL:pil", - "//third_party/py/lxml", - "//tensorflow", - "//tensorflow_models/object_detection/utils:dataset_util", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) - -py_test( - name = "create_pascal_tf_record_test", - srcs = [ - "create_pascal_tf_record_test.py", - ], - deps = [ - ":create_pascal_tf_record", - "//tensorflow", - ], -) - -py_binary( - name = "create_pet_tf_record", - srcs = [ - "create_pet_tf_record.py", - ], - deps = [ - "//third_party/py/PIL:pil", - "//third_party/py/lxml", - "//tensorflow", - "//tensorflow_models/object_detection/utils:dataset_util", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) - -py_library( - name = "oid_tfrecord_creation", - srcs = ["oid_tfrecord_creation.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:dataset_util", - ], -) - -py_test( - name = "oid_tfrecord_creation_test", - srcs = ["oid_tfrecord_creation_test.py"], - deps = [ - ":oid_tfrecord_creation", - "//third_party/py/contextlib2", - "//third_party/py/pandas", - "//third_party/py/tensorflow", - ], -) - -py_binary( - name = "create_oid_tf_record", - srcs = ["create_oid_tf_record.py"], - deps = [ - ":oid_tfrecord_creation", - "//third_party/py/contextlib2", - "//third_party/py/pandas", - "//tensorflow", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) diff --git a/object_detection/dataset_tools/create_kitti_tf_record.py b/object_detection/dataset_tools/create_kitti_tf_record.py deleted file mode 100644 index 2bf2ff34..00000000 --- a/object_detection/dataset_tools/create_kitti_tf_record.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert raw KITTI detection dataset to TFRecord for object_detection. - -Converts KITTI detection dataset to TFRecords with a standard format allowing - to use this dataset to train object detectors. The raw dataset can be - downloaded from: - http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip. - http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip - Permission can be requested at the main website. - - KITTI detection dataset contains 7481 training images. Using this code with - the default settings will set aside the first 500 images as a validation set. - This can be altered using the flags, see details below. - -Example usage: - python object_detection/dataset_tools/create_kitti_tf_record.py \ - --data_dir=/home/user/kitti \ - --output_path=/home/user/kitti.record -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import hashlib -import io -import os - -import numpy as np -import PIL.Image as pil -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util -from object_detection.utils.np_box_ops import iou - -tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the ' - 'data. Folder structure is assumed to be:' - '/training/label_2 (annotations) and' - '/data_object_image_2/training/image_2' - '(images).') -tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files' - 'will be written. The TFRecord with the training set' - 'will be located at: _train.tfrecord.' - 'And the TFRecord with the validation set will be' - 'located at: _val.tfrecord') -tf.app.flags.DEFINE_list('classes_to_use', ['car', 'pedestrian', 'dontcare'], - 'Which classes of bounding boxes to use. Adding the' - 'dontcare class will remove all bboxs in the dontcare' - 'regions.') -tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt', - 'Path to label map proto.') -tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to' - 'be used as a validation set.') -FLAGS = tf.app.flags.FLAGS - - -def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use, - label_map_path, validation_set_size): - """Convert the KITTI detection dataset to TFRecords. - - Args: - data_dir: The full path to the unzipped folder containing the unzipped data - from data_object_image_2 and data_object_label_2.zip. - Folder structure is assumed to be: data_dir/training/label_2 (annotations) - and data_dir/data_object_image_2/training/image_2 (images). - output_path: The path to which TFRecord files will be written. The TFRecord - with the training set will be located at: _train.tfrecord - And the TFRecord with the validation set will be located at: - _val.tfrecord - classes_to_use: List of strings naming the classes for which data should be - converted. Use the same names as presented in the KIITI README file. - Adding dontcare class will remove all other bounding boxes that overlap - with areas marked as dontcare regions. - label_map_path: Path to label map proto - validation_set_size: How many images should be left as the validation set. - (Ffirst `validation_set_size` examples are selected to be in the - validation set). - """ - label_map_dict = label_map_util.get_label_map_dict(label_map_path) - train_count = 0 - val_count = 0 - - annotation_dir = os.path.join(data_dir, - 'training', - 'label_2') - - image_dir = os.path.join(data_dir, - 'data_object_image_2', - 'training', - 'image_2') - - train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'% - output_path) - val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'% - output_path) - - images = sorted(tf.gfile.ListDirectory(image_dir)) - for img_name in images: - img_num = int(img_name.split('.')[0]) - is_validation_img = img_num < validation_set_size - img_anno = read_annotation_file(os.path.join(annotation_dir, - str(img_num).zfill(6)+'.txt')) - - image_path = os.path.join(image_dir, img_name) - - # Filter all bounding boxes of this frame that are of a legal class, and - # don't overlap with a dontcare region. - # TODO(talremez) filter out targets that are truncated or heavily occluded. - annotation_for_image = filter_annotations(img_anno, classes_to_use) - - example = prepare_example(image_path, annotation_for_image, label_map_dict) - if is_validation_img: - val_writer.write(example.SerializeToString()) - val_count += 1 - else: - train_writer.write(example.SerializeToString()) - train_count += 1 - - train_writer.close() - val_writer.close() - - -def prepare_example(image_path, annotations, label_map_dict): - """Converts a dictionary with annotations for an image to tf.Example proto. - - Args: - image_path: The complete path to image. - annotations: A dictionary representing the annotation of a single object - that appears in the image. - label_map_dict: A map from string label names to integer ids. - - Returns: - example: The converted tf.Example. - """ - with tf.gfile.GFile(image_path, 'rb') as fid: - encoded_png = fid.read() - encoded_png_io = io.BytesIO(encoded_png) - image = pil.open(encoded_png_io) - image = np.asarray(image) - - key = hashlib.sha256(encoded_png).hexdigest() - - width = int(image.shape[1]) - height = int(image.shape[0]) - - xmin_norm = annotations['2d_bbox_left'] / float(width) - ymin_norm = annotations['2d_bbox_top'] / float(height) - xmax_norm = annotations['2d_bbox_right'] / float(width) - ymax_norm = annotations['2d_bbox_bottom'] / float(height) - - difficult_obj = [0]*len(xmin_norm) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), - 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_png), - 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), - 'image/object/class/text': dataset_util.bytes_list_feature( - [x.encode('utf8') for x in annotations['type']]), - 'image/object/class/label': dataset_util.int64_list_feature( - [label_map_dict[x] for x in annotations['type']]), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.float_list_feature( - annotations['truncated']), - 'image/object/alpha': dataset_util.float_list_feature( - annotations['alpha']), - 'image/object/3d_bbox/height': dataset_util.float_list_feature( - annotations['3d_bbox_height']), - 'image/object/3d_bbox/width': dataset_util.float_list_feature( - annotations['3d_bbox_width']), - 'image/object/3d_bbox/length': dataset_util.float_list_feature( - annotations['3d_bbox_length']), - 'image/object/3d_bbox/x': dataset_util.float_list_feature( - annotations['3d_bbox_x']), - 'image/object/3d_bbox/y': dataset_util.float_list_feature( - annotations['3d_bbox_y']), - 'image/object/3d_bbox/z': dataset_util.float_list_feature( - annotations['3d_bbox_z']), - 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( - annotations['3d_bbox_rot_y']), - })) - - return example - - -def filter_annotations(img_all_annotations, used_classes): - """Filters out annotations from the unused classes and dontcare regions. - - Filters out the annotations that belong to classes we do now wish to use and - (optionally) also removes all boxes that overlap with dontcare regions. - - Args: - img_all_annotations: A list of annotation dictionaries. See documentation of - read_annotation_file for more details about the format of the annotations. - used_classes: A list of strings listing the classes we want to keep, if the - list contains "dontcare", all bounding boxes with overlapping with dont - care regions will also be filtered out. - - Returns: - img_filtered_annotations: A list of annotation dictionaries that have passed - the filtering. - """ - - img_filtered_annotations = {} - - # Filter the type of the objects. - relevant_annotation_indices = [ - i for i, x in enumerate(img_all_annotations['type']) if x in used_classes - ] - - for key in img_all_annotations.keys(): - img_filtered_annotations[key] = ( - img_all_annotations[key][relevant_annotation_indices]) - - if 'dontcare' in used_classes: - dont_care_indices = [i for i, - x in enumerate(img_filtered_annotations['type']) - if x == 'dontcare'] - - # bounding box format [y_min, x_min, y_max, x_max] - all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'], - img_filtered_annotations['2d_bbox_left'], - img_filtered_annotations['2d_bbox_bottom'], - img_filtered_annotations['2d_bbox_right']], - axis=1) - - ious = iou(boxes1=all_boxes, - boxes2=all_boxes[dont_care_indices]) - - # Remove all bounding boxes that overlap with a dontcare region. - if ious.size > 0: - boxes_to_remove = np.amax(ious, axis=1) > 0.0 - for key in img_all_annotations.keys(): - img_filtered_annotations[key] = ( - img_filtered_annotations[key][np.logical_not(boxes_to_remove)]) - - return img_filtered_annotations - - -def read_annotation_file(filename): - """Reads a KITTI annotation file. - - Converts a KITTI annotation file into a dictionary containing all the - relevant information. - - Args: - filename: the path to the annotataion text file. - - Returns: - anno: A dictionary with the converted annotation information. See annotation - README file for details on the different fields. - """ - with open(filename) as f: - content = f.readlines() - content = [x.strip().split(' ') for x in content] - - anno = {} - anno['type'] = np.array([x[0].lower() for x in content]) - anno['truncated'] = np.array([float(x[1]) for x in content]) - anno['occluded'] = np.array([int(x[2]) for x in content]) - anno['alpha'] = np.array([float(x[3]) for x in content]) - - anno['2d_bbox_left'] = np.array([float(x[4]) for x in content]) - anno['2d_bbox_top'] = np.array([float(x[5]) for x in content]) - anno['2d_bbox_right'] = np.array([float(x[6]) for x in content]) - anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content]) - - anno['3d_bbox_height'] = np.array([float(x[8]) for x in content]) - anno['3d_bbox_width'] = np.array([float(x[9]) for x in content]) - anno['3d_bbox_length'] = np.array([float(x[10]) for x in content]) - anno['3d_bbox_x'] = np.array([float(x[11]) for x in content]) - anno['3d_bbox_y'] = np.array([float(x[12]) for x in content]) - anno['3d_bbox_z'] = np.array([float(x[13]) for x in content]) - anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content]) - - return anno - - -def main(_): - convert_kitti_to_tfrecords( - data_dir=FLAGS.data_dir, - output_path=FLAGS.output_path, - classes_to_use=FLAGS.classes_to_use, - label_map_path=FLAGS.label_map_path, - validation_set_size=FLAGS.validation_set_size) - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/dataset_tools/create_kitti_tf_record_test.py b/object_detection/dataset_tools/create_kitti_tf_record_test.py deleted file mode 100644 index 22f27f1a..00000000 --- a/object_detection/dataset_tools/create_kitti_tf_record_test.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Test for create_kitti_tf_record.py.""" - -import os - -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import create_kitti_tf_record - - -class DictToTFExampleTest(tf.test.TestCase): - - def _assertProtoEqual(self, proto_field, expectation): - """Helper function to assert if a proto field equals some value. - - Args: - proto_field: The protobuf field to compare. - expectation: The expected value of the protobuf field. - """ - proto_list = [p for p in proto_field] - self.assertListEqual(proto_list, expectation) - - def test_dict_to_tf_example(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(256, 256, 3) - save_path = os.path.join(self.get_temp_dir(), image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - annotations = {} - annotations['2d_bbox_left'] = np.array([64]) - annotations['2d_bbox_top'] = np.array([64]) - annotations['2d_bbox_right'] = np.array([192]) - annotations['2d_bbox_bottom'] = np.array([192]) - annotations['type'] = ['car'] - annotations['truncated'] = np.array([1]) - annotations['alpha'] = np.array([2]) - annotations['3d_bbox_height'] = np.array([10]) - annotations['3d_bbox_width'] = np.array([11]) - annotations['3d_bbox_length'] = np.array([12]) - annotations['3d_bbox_x'] = np.array([13]) - annotations['3d_bbox_y'] = np.array([14]) - annotations['3d_bbox_z'] = np.array([15]) - annotations['3d_bbox_rot_y'] = np.array([4]) - - label_map_dict = { - 'background': 0, - 'car': 1, - } - - example = create_kitti_tf_record.prepare_example( - save_path, - annotations, - label_map_dict) - - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [save_path]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [save_path]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['png']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/class/text'].bytes_list.value, - ['car']) - self._assertProtoEqual( - example.features.feature['image/object/class/label'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/truncated'].float_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/alpha'].float_list.value, - [2]) - self._assertProtoEqual(example.features.feature[ - 'image/object/3d_bbox/height'].float_list.value, [10]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/width'].float_list.value, - [11]) - self._assertProtoEqual(example.features.feature[ - 'image/object/3d_bbox/length'].float_list.value, [12]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/x'].float_list.value, - [13]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/y'].float_list.value, - [14]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/z'].float_list.value, - [15]) - self._assertProtoEqual( - example.features.feature['image/object/3d_bbox/rot_y'].float_list.value, - [4]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/dataset_tools/create_oid_tf_record.py b/object_detection/dataset_tools/create_oid_tf_record.py deleted file mode 100644 index f58efee2..00000000 --- a/object_detection/dataset_tools/create_oid_tf_record.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Creates TFRecords of Open Images dataset for object detection. - -Example usage: - python object_detection/dataset_tools/create_oid_tf_record.py \ - --input_annotations_csv=/path/to/input/annotations-human-bbox.csv \ - --input_images_directory=/path/to/input/image_pixels_directory \ - --input_label_map=/path/to/input/labels_bbox_545.labelmap \ - --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord - -CSVs with bounding box annotations and image metadata (including the image URLs) -can be downloaded from the Open Images GitHub repository: -https://github.com/openimages/dataset - -This script will include every image found in the input_images_directory in the -output TFRecord, even if the image has no corresponding bounding box annotations -in the input_annotations_csv. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import contextlib2 -import pandas as pd -import tensorflow as tf - -from object_detection.dataset_tools import oid_tfrecord_creation -from object_detection.utils import label_map_util - -tf.flags.DEFINE_string('input_annotations_csv', None, - 'Path to CSV containing image bounding box annotations') -tf.flags.DEFINE_string('input_images_directory', None, - 'Directory containing the image pixels ' - 'downloaded from the OpenImages GitHub repository.') -tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto') -tf.flags.DEFINE_string( - 'output_tf_record_path_prefix', None, - 'Path to the output TFRecord. The shard index and the number of shards ' - 'will be appended for each output shard.') -tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards') - -FLAGS = tf.flags.FLAGS - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - required_flags = [ - 'input_annotations_csv', 'input_images_directory', 'input_label_map', - 'output_tf_record_path_prefix' - ] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) - all_annotations = pd.read_csv(FLAGS.input_annotations_csv) - all_images = tf.gfile.Glob( - os.path.join(FLAGS.input_images_directory, '*.jpg')) - all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images] - all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) - all_annotations = pd.concat([all_annotations, all_image_ids]) - - tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) - - with contextlib2.ExitStack() as tf_record_close_stack: - output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords( - tf_record_close_stack, FLAGS.output_tf_record_path_prefix, - FLAGS.num_shards) - - for counter, image_data in enumerate(all_annotations.groupby('ImageID')): - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, - counter) - - image_id, image_annotations = image_data - # In OID image file names are formed by appending ".jpg" to the image ID. - image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg') - with tf.gfile.Open(image_path) as image_file: - encoded_image = image_file.read() - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - image_annotations, label_map, encoded_image) - if tf_example: - shard_idx = long(image_id, 16) % FLAGS.num_shards - output_tfrecords[shard_idx].write(tf_example.SerializeToString()) - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/dataset_tools/create_pascal_tf_record.py b/object_detection/dataset_tools/create_pascal_tf_record.py deleted file mode 100644 index 83d2b128..00000000 --- a/object_detection/dataset_tools/create_pascal_tf_record.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert raw PASCAL dataset to TFRecord for object_detection. - -Example usage: - python object_detection/dataset_tools/create_pascal_tf_record.py \ - --data_dir=/home/user/VOCdevkit \ - --year=VOC2012 \ - --output_path=/home/user/pascal.record -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import hashlib -import io -import logging -import os - -from lxml import etree -import PIL.Image -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - - -flags = tf.app.flags -flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.') -flags.DEFINE_string('set', 'train', 'Convert training set, validation set or ' - 'merged set.') -flags.DEFINE_string('annotations_dir', 'Annotations', - '(Relative) path to annotations directory.') -flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.') -flags.DEFINE_string('output_path', '', 'Path to output TFRecord') -flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt', - 'Path to label map proto') -flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore ' - 'difficult instances') -FLAGS = flags.FLAGS - -SETS = ['train', 'val', 'trainval', 'test'] -YEARS = ['VOC2007', 'VOC2012', 'merged'] - - -def dict_to_tf_example(data, - dataset_directory, - label_map_dict, - ignore_difficult_instances=False, - image_subdirectory='JPEGImages'): - """Convert XML derived dict to tf.Example proto. - - Notice that this function normalizes the bounding box coordinates provided - by the raw data. - - Args: - data: dict holding PASCAL XML fields for a single image (obtained by - running dataset_util.recursive_parse_xml_to_dict) - dataset_directory: Path to root directory holding PASCAL dataset - label_map_dict: A map from string label names to integers ids. - ignore_difficult_instances: Whether to skip difficult instances in the - dataset (default: False). - image_subdirectory: String specifying subdirectory within the - PASCAL dataset directory holding the actual image data. - - Returns: - example: The converted tf.Example. - - Raises: - ValueError: if the image pointed to by data['filename'] is not a valid JPEG - """ - img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) - full_path = os.path.join(dataset_directory, img_path) - with tf.gfile.GFile(full_path, 'rb') as fid: - encoded_jpg = fid.read() - encoded_jpg_io = io.BytesIO(encoded_jpg) - image = PIL.Image.open(encoded_jpg_io) - if image.format != 'JPEG': - raise ValueError('Image format not JPEG') - key = hashlib.sha256(encoded_jpg).hexdigest() - - width = int(data['size']['width']) - height = int(data['size']['height']) - - xmin = [] - ymin = [] - xmax = [] - ymax = [] - classes = [] - classes_text = [] - truncated = [] - poses = [] - difficult_obj = [] - for obj in data['object']: - difficult = bool(int(obj['difficult'])) - if ignore_difficult_instances and difficult: - continue - - difficult_obj.append(int(difficult)) - - xmin.append(float(obj['bndbox']['xmin']) / width) - ymin.append(float(obj['bndbox']['ymin']) / height) - xmax.append(float(obj['bndbox']['xmax']) / width) - ymax.append(float(obj['bndbox']['ymax']) / height) - classes_text.append(obj['name'].encode('utf8')) - classes.append(label_map_dict[obj['name']]) - truncated.append(int(obj['truncated'])) - poses.append(obj['pose'].encode('utf8')) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/source_id': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_jpg), - 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.int64_list_feature(truncated), - 'image/object/view': dataset_util.bytes_list_feature(poses), - })) - return example - - -def main(_): - if FLAGS.set not in SETS: - raise ValueError('set must be in : {}'.format(SETS)) - if FLAGS.year not in YEARS: - raise ValueError('year must be in : {}'.format(YEARS)) - - data_dir = FLAGS.data_dir - years = ['VOC2007', 'VOC2012'] - if FLAGS.year != 'merged': - years = [FLAGS.year] - - writer = tf.python_io.TFRecordWriter(FLAGS.output_path) - - label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) - - for year in years: - logging.info('Reading from PASCAL %s dataset.', year) - examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', - 'aeroplane_' + FLAGS.set + '.txt') - annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) - examples_list = dataset_util.read_examples_list(examples_path) - for idx, example in enumerate(examples_list): - if idx % 100 == 0: - logging.info('On image %d of %d', idx, len(examples_list)) - path = os.path.join(annotations_dir, example + '.xml') - with tf.gfile.GFile(path, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] - - tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, - FLAGS.ignore_difficult_instances) - writer.write(tf_example.SerializeToString()) - - writer.close() - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/dataset_tools/create_pascal_tf_record_test.py b/object_detection/dataset_tools/create_pascal_tf_record_test.py deleted file mode 100644 index a1c31fac..00000000 --- a/object_detection/dataset_tools/create_pascal_tf_record_test.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Test for create_pascal_tf_record.py.""" - -import os - -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.dataset_tools import create_pascal_tf_record - - -class DictToTFExampleTest(tf.test.TestCase): - - def _assertProtoEqual(self, proto_field, expectation): - """Helper function to assert if a proto field equals some value. - - Args: - proto_field: The protobuf field to compare. - expectation: The expected value of the protobuf field. - """ - proto_list = [p for p in proto_field] - self.assertListEqual(proto_list, expectation) - - def test_dict_to_tf_example(self): - image_file_name = 'tmp_image.jpg' - image_data = np.random.rand(256, 256, 3) - save_path = os.path.join(self.get_temp_dir(), image_file_name) - image = PIL.Image.fromarray(image_data, 'RGB') - image.save(save_path) - - data = { - 'folder': '', - 'filename': image_file_name, - 'size': { - 'height': 256, - 'width': 256, - }, - 'object': [ - { - 'difficult': 1, - 'bndbox': { - 'xmin': 64, - 'ymin': 64, - 'xmax': 192, - 'ymax': 192, - }, - 'name': 'person', - 'truncated': 0, - 'pose': '', - }, - ], - } - - label_map_dict = { - 'background': 0, - 'person': 1, - 'notperson': 2, - } - - example = create_pascal_tf_record.dict_to_tf_example( - data, self.get_temp_dir(), label_map_dict, image_subdirectory='') - self._assertProtoEqual( - example.features.feature['image/height'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/width'].int64_list.value, [256]) - self._assertProtoEqual( - example.features.feature['image/filename'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/source_id'].bytes_list.value, - [image_file_name]) - self._assertProtoEqual( - example.features.feature['image/format'].bytes_list.value, ['jpeg']) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.25]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.75]) - self._assertProtoEqual( - example.features.feature['image/object/class/text'].bytes_list.value, - ['person']) - self._assertProtoEqual( - example.features.feature['image/object/class/label'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/difficult'].int64_list.value, - [1]) - self._assertProtoEqual( - example.features.feature['image/object/truncated'].int64_list.value, - [0]) - self._assertProtoEqual( - example.features.feature['image/object/view'].bytes_list.value, ['']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/dataset_tools/create_pet_tf_record.py b/object_detection/dataset_tools/create_pet_tf_record.py deleted file mode 100644 index a8663297..00000000 --- a/object_detection/dataset_tools/create_pet_tf_record.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Convert the Oxford pet dataset to TFRecord for object_detection. - -See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar - Cats and Dogs - IEEE Conference on Computer Vision and Pattern Recognition, 2012 - http://www.robots.ox.ac.uk/~vgg/data/pets/ - -Example usage: - python object_detection/dataset_tools/create_pet_tf_record.py \ - --data_dir=/home/user/pet \ - --output_dir=/home/user/pet/output -""" - -import hashlib -import io -import logging -import os -import random -import re - -from lxml import etree -import numpy as np -import PIL.Image -import tensorflow as tf - -from object_detection.utils import dataset_util -from object_detection.utils import label_map_util - -flags = tf.app.flags -flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.') -flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.') -flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt', - 'Path to label map proto') -flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes ' - 'for pet faces. Otherwise generates bounding boxes (as ' - 'well as segmentations for full pet bodies). Note that ' - 'in the latter case, the resulting files are much larger.') -FLAGS = flags.FLAGS - - -def get_class_name_from_filename(file_name): - """Gets the class name from a file. - - Args: - file_name: The file name to get the class name from. - ie. "american_pit_bull_terrier_105.jpg" - - Returns: - A string of the class name. - """ - match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I) - return match.groups()[0] - - -def dict_to_tf_example(data, - mask_path, - label_map_dict, - image_subdirectory, - ignore_difficult_instances=False, - faces_only=True): - """Convert XML derived dict to tf.Example proto. - - Notice that this function normalizes the bounding box coordinates provided - by the raw data. - - Args: - data: dict holding PASCAL XML fields for a single image (obtained by - running dataset_util.recursive_parse_xml_to_dict) - mask_path: String path to PNG encoded mask. - label_map_dict: A map from string label names to integers ids. - image_subdirectory: String specifying subdirectory within the - Pascal dataset directory holding the actual image data. - ignore_difficult_instances: Whether to skip difficult instances in the - dataset (default: False). - faces_only: If True, generates bounding boxes for pet faces. Otherwise - generates bounding boxes (as well as segmentations for full pet bodies). - - Returns: - example: The converted tf.Example. - - Raises: - ValueError: if the image pointed to by data['filename'] is not a valid JPEG - """ - img_path = os.path.join(image_subdirectory, data['filename']) - with tf.gfile.GFile(img_path, 'rb') as fid: - encoded_jpg = fid.read() - encoded_jpg_io = io.BytesIO(encoded_jpg) - image = PIL.Image.open(encoded_jpg_io) - if image.format != 'JPEG': - raise ValueError('Image format not JPEG') - key = hashlib.sha256(encoded_jpg).hexdigest() - - with tf.gfile.GFile(mask_path, 'rb') as fid: - encoded_mask_png = fid.read() - encoded_png_io = io.BytesIO(encoded_mask_png) - mask = PIL.Image.open(encoded_png_io) - if mask.format != 'PNG': - raise ValueError('Mask format not PNG') - - mask_np = np.asarray(mask) - nonbackground_indices_x = np.any(mask_np != 2, axis=0) - nonbackground_indices_y = np.any(mask_np != 2, axis=1) - nonzero_x_indices = np.where(nonbackground_indices_x) - nonzero_y_indices = np.where(nonbackground_indices_y) - - width = int(data['size']['width']) - height = int(data['size']['height']) - - xmins = [] - ymins = [] - xmaxs = [] - ymaxs = [] - classes = [] - classes_text = [] - truncated = [] - poses = [] - difficult_obj = [] - masks = [] - for obj in data['object']: - difficult = bool(int(obj['difficult'])) - if ignore_difficult_instances and difficult: - continue - difficult_obj.append(int(difficult)) - - if faces_only: - xmin = float(obj['bndbox']['xmin']) - xmax = float(obj['bndbox']['xmax']) - ymin = float(obj['bndbox']['ymin']) - ymax = float(obj['bndbox']['ymax']) - else: - xmin = float(np.min(nonzero_x_indices)) - xmax = float(np.max(nonzero_x_indices)) - ymin = float(np.min(nonzero_y_indices)) - ymax = float(np.max(nonzero_y_indices)) - - xmins.append(xmin / width) - ymins.append(ymin / height) - xmaxs.append(xmax / width) - ymaxs.append(ymax / height) - class_name = get_class_name_from_filename(data['filename']) - classes_text.append(class_name.encode('utf8')) - classes.append(label_map_dict[class_name]) - truncated.append(int(obj['truncated'])) - poses.append(obj['pose'].encode('utf8')) - if not faces_only: - mask_remapped = mask_np != 2 - masks.append(mask_remapped) - - feature_dict = { - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/source_id': dataset_util.bytes_feature( - data['filename'].encode('utf8')), - 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), - 'image/encoded': dataset_util.bytes_feature(encoded_jpg), - 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), - 'image/object/truncated': dataset_util.int64_list_feature(truncated), - 'image/object/view': dataset_util.bytes_list_feature(poses), - } - if not faces_only: - mask_stack = np.stack(masks).astype(np.float32) - masks_flattened = np.reshape(mask_stack, [-1]) - feature_dict['image/object/mask'] = ( - dataset_util.float_list_feature(masks_flattened.tolist())) - - example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) - return example - - -def create_tf_record(output_filename, - label_map_dict, - annotations_dir, - image_dir, - examples, - faces_only=True): - """Creates a TFRecord file from examples. - - Args: - output_filename: Path to where output file is saved. - label_map_dict: The label map dictionary. - annotations_dir: Directory where annotation files are stored. - image_dir: Directory where image files are stored. - examples: Examples to parse and save to tf record. - faces_only: If True, generates bounding boxes for pet faces. Otherwise - generates bounding boxes (as well as segmentations for full pet bodies). - """ - writer = tf.python_io.TFRecordWriter(output_filename) - for idx, example in enumerate(examples): - if idx % 100 == 0: - logging.info('On image %d of %d', idx, len(examples)) - xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') - mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') - - if not os.path.exists(xml_path): - logging.warning('Could not find %s, ignoring example.', xml_path) - continue - with tf.gfile.GFile(xml_path, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] - - try: - tf_example = dict_to_tf_example( - data, mask_path, label_map_dict, image_dir, faces_only=faces_only) - writer.write(tf_example.SerializeToString()) - except ValueError: - logging.warning('Invalid example: %s, ignoring.', xml_path) - - writer.close() - - -# TODO(derekjchow): Add test for pet/PASCAL main files. -def main(_): - data_dir = FLAGS.data_dir - label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) - - logging.info('Reading from Pet dataset.') - image_dir = os.path.join(data_dir, 'images') - annotations_dir = os.path.join(data_dir, 'annotations') - examples_path = os.path.join(annotations_dir, 'trainval.txt') - examples_list = dataset_util.read_examples_list(examples_path) - - # Test images are not included in the downloaded data set, so we shall perform - # our own split. - random.seed(42) - random.shuffle(examples_list) - num_examples = len(examples_list) - num_train = int(0.7 * num_examples) - train_examples = examples_list[:num_train] - val_examples = examples_list[num_train:] - logging.info('%d training and %d validation examples.', - len(train_examples), len(val_examples)) - - train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') - val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') - if FLAGS.faces_only: - train_output_path = os.path.join(FLAGS.output_dir, - 'pet_train_with_masks.record') - val_output_path = os.path.join(FLAGS.output_dir, - 'pet_val_with_masks.record') - create_tf_record(train_output_path, label_map_dict, annotations_dir, - image_dir, train_examples, faces_only=FLAGS.faces_only) - create_tf_record(val_output_path, label_map_dict, annotations_dir, - image_dir, val_examples, faces_only=FLAGS.faces_only) - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/dataset_tools/oid_tfrecord_creation.py b/object_detection/dataset_tools/oid_tfrecord_creation.py deleted file mode 100644 index 1bc41c0b..00000000 --- a/object_detection/dataset_tools/oid_tfrecord_creation.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Utilities for creating TFRecords of TF examples for the Open Images dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.utils import dataset_util - - -def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, - encoded_image): - """Populates a TF Example message with image annotations from a data frame. - - Args: - annotations_data_frame: Data frame containing the annotations for a single - image. - label_map: String to integer label map. - encoded_image: The encoded image string - - Returns: - The populated TF Example, if the label of at least one object is present in - label_map. Otherwise, returns None. - """ - - filtered_data_frame = annotations_data_frame[ - annotations_data_frame.LabelName.isin(label_map)] - - image_id = annotations_data_frame.ImageID.iloc[0] - - feature_map = { - standard_fields.TfExampleFields.object_bbox_ymin: - dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), - standard_fields.TfExampleFields.object_bbox_xmin: - dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), - standard_fields.TfExampleFields.object_bbox_ymax: - dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), - standard_fields.TfExampleFields.object_bbox_xmax: - dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), - standard_fields.TfExampleFields.object_class_text: - dataset_util.bytes_list_feature( - filtered_data_frame.LabelName.as_matrix()), - standard_fields.TfExampleFields.object_class_label: - dataset_util.int64_list_feature( - filtered_data_frame.LabelName.map(lambda x: label_map[x]) - .as_matrix()), - standard_fields.TfExampleFields.filename: - dataset_util.bytes_feature('{}.jpg'.format(image_id)), - standard_fields.TfExampleFields.source_id: - dataset_util.bytes_feature(image_id), - standard_fields.TfExampleFields.image_encoded: - dataset_util.bytes_feature(encoded_image), - } - - if 'IsGroupOf' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_group_of] = dataset_util.int64_list_feature( - filtered_data_frame.IsGroupOf.as_matrix().astype(int)) - if 'IsOccluded' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_occluded] = dataset_util.int64_list_feature( - filtered_data_frame.IsOccluded.as_matrix().astype(int)) - if 'IsTruncated' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_truncated] = dataset_util.int64_list_feature( - filtered_data_frame.IsTruncated.as_matrix().astype(int)) - if 'IsDepiction' in filtered_data_frame.columns: - feature_map[standard_fields.TfExampleFields. - object_depiction] = dataset_util.int64_list_feature( - filtered_data_frame.IsDepiction.as_matrix().astype(int)) - - return tf.train.Example(features=tf.train.Features(feature=feature_map)) - - -def open_sharded_output_tfrecords(exit_stack, base_path, num_shards): - """Opens all TFRecord shards for writing and adds them to an exit stack. - - Args: - exit_stack: A context2.ExitStack used to automatically closed the TFRecords - opened in this function. - base_path: The base path for all shards - num_shards: The number of shards - - Returns: - The list of opened TFRecords. Position k in the list corresponds to shard k. - """ - tf_record_output_filenames = [ - '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards) - for idx in xrange(num_shards) - ] - - tfrecords = [ - exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name)) - for file_name in tf_record_output_filenames - ] - - return tfrecords diff --git a/object_detection/dataset_tools/oid_tfrecord_creation_test.py b/object_detection/dataset_tools/oid_tfrecord_creation_test.py deleted file mode 100644 index 383af8a8..00000000 --- a/object_detection/dataset_tools/oid_tfrecord_creation_test.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for oid_tfrecord_creation.py.""" - -import os -import contextlib2 -import pandas as pd -import tensorflow as tf - -from object_detection.dataset_tools import oid_tfrecord_creation - - -def create_test_data(): - data = { - 'ImageID': ['i1', 'i1', 'i1', 'i1', 'i2', 'i2'], - 'LabelName': ['a', 'a', 'b', 'b', 'b', 'c'], - 'YMin': [0.3, 0.6, 0.8, 0.1, 0.0, 0.0], - 'XMin': [0.1, 0.3, 0.7, 0.0, 0.1, 0.1], - 'XMax': [0.2, 0.3, 0.8, 0.5, 0.9, 0.9], - 'YMax': [0.3, 0.6, 1, 0.8, 0.8, 0.8], - 'IsOccluded': [0, 1, 1, 0, 0, 0], - 'IsTruncated': [0, 0, 0, 1, 0, 0], - 'IsGroupOf': [0, 0, 0, 0, 0, 1], - 'IsDepiction': [1, 0, 0, 0, 0, 0], - } - df = pd.DataFrame(data=data) - label_map = {'a': 0, 'b': 1, 'c': 2} - return label_map, df - - -class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): - - def test_simple(self): - label_map, df = create_test_data() - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i1'], label_map, 'encoded_image_test') - self.assertProtoEquals(""" - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i1.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [0, 0, 1, 1] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["a", "a", "b", "b"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i1" } } } - feature { - key: "image/object/depiction" - value { int64_list { value: [1, 0, 0, 0] } } } - feature { - key: "image/object/group_of" - value { int64_list { value: [0, 0, 0, 0] } } } - feature { - key: "image/object/occluded" - value { int64_list { value: [0, 1, 1, 0] } } } - feature { - key: "image/object/truncated" - value { int64_list { value: [0, 0, 0, 1] } } } } - """, tf_example) - - def test_no_attributes(self): - label_map, df = create_test_data() - - del df['IsDepiction'] - del df['IsGroupOf'] - del df['IsOccluded'] - del df['IsTruncated'] - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i2'], label_map, 'encoded_image_test') - self.assertProtoEquals(""" - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i2.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.0, 0.0] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.1] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.8, 0.8] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.9, 0.9] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [1, 2] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["b", "c"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i2" } } } } - """, tf_example) - - def test_label_filtering(self): - label_map, df = create_test_data() - - label_map = {'a': 0} - - tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( - df[df.ImageID == 'i1'], label_map, 'encoded_image_test') - self.assertProtoEquals(""" - features { - feature { - key: "image/encoded" - value { bytes_list { value: "encoded_image_test" } } } - feature { - key: "image/filename" - value { bytes_list { value: "i1.jpg" } } } - feature { - key: "image/object/bbox/ymin" - value { float_list { value: [0.3, 0.6] } } } - feature { - key: "image/object/bbox/xmin" - value { float_list { value: [0.1, 0.3] } } } - feature { - key: "image/object/bbox/ymax" - value { float_list { value: [0.3, 0.6] } } } - feature { - key: "image/object/bbox/xmax" - value { float_list { value: [0.2, 0.3] } } } - feature { - key: "image/object/class/label" - value { int64_list { value: [0, 0] } } } - feature { - key: "image/object/class/text" - value { bytes_list { value: ["a", "a"] } } } - feature { - key: "image/source_id" - value { bytes_list { value: "i1" } } } - feature { - key: "image/object/depiction" - value { int64_list { value: [1, 0] } } } - feature { - key: "image/object/group_of" - value { int64_list { value: [0, 0] } } } - feature { - key: "image/object/occluded" - value { int64_list { value: [0, 1] } } } - feature { - key: "image/object/truncated" - value { int64_list { value: [0, 0] } } } } - """, tf_example) - - -class OpenOutputTfrecordsTests(tf.test.TestCase): - - def test_sharded_tfrecord_writes(self): - with contextlib2.ExitStack() as tf_record_close_stack: - output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords( - tf_record_close_stack, - os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10) - for idx in range(10): - output_tfrecords[idx].write('test_{}'.format(idx)) - - for idx in range(10): - tf_record_path = '{}-{:05d}-of-00010'.format( - os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx) - records = list(tf.python_io.tf_record_iterator(tf_record_path)) - self.assertAllEqual(records, ['test_{}'.format(idx)]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/eval.py b/object_detection/eval.py deleted file mode 100644 index 175ac1ee..00000000 --- a/object_detection/eval.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Evaluation executable for detection models. - -This executable is used to evaluate DetectionModels. There are two ways of -configuring the eval job. - -1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead. -In this mode, the --eval_training_data flag may be given to force the pipeline -to evaluate on training data instead. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --pipeline_config_path=pipeline_config.pbtxt - -2) Three configuration files may be provided: a model_pb2.DetectionModel -configuration file to define what type of DetectionModel is being evaluated, an -input_reader_pb2.InputReader file to specify what data the model is evaluating -and an eval_pb2.EvalConfig file to configure evaluation parameters. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --eval_config_path=eval_config.pbtxt \ - --model_config_path=model_config.pbtxt \ - --input_config_path=eval_input_config.pbtxt -""" -import functools -import os -import tensorflow as tf - -from object_detection import evaluator -from object_detection.builders import input_reader_builder -from object_detection.builders import model_builder -from object_detection.utils import config_util -from object_detection.utils import label_map_util - - -tf.logging.set_verbosity(tf.logging.INFO) - -flags = tf.app.flags -flags.DEFINE_boolean('eval_training_data', False, - 'If training data should be evaluated for this job.') -flags.DEFINE_string('checkpoint_dir', '', - 'Directory containing checkpoints to evaluate, typically ' - 'set to `train_dir` used in the training job.') -flags.DEFINE_string('eval_dir', '', - 'Directory to write eval summaries to.') -flags.DEFINE_string('pipeline_config_path', '', - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file. If provided, other configs are ignored') -flags.DEFINE_string('eval_config_path', '', - 'Path to an eval_pb2.EvalConfig config file.') -flags.DEFINE_string('input_config_path', '', - 'Path to an input_reader_pb2.InputReader config file.') -flags.DEFINE_string('model_config_path', '', - 'Path to a model_pb2.DetectionModel config file.') -flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' - 'evaluation. Overrides the `max_evals` parameter in the ' - 'provided config.') -FLAGS = flags.FLAGS - - -def main(unused_argv): - assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' - assert FLAGS.eval_dir, '`eval_dir` is missing.' - tf.gfile.MakeDirs(FLAGS.eval_dir) - if FLAGS.pipeline_config_path: - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.eval_dir, 'pipeline.config'), - overwrite=True) - else: - configs = config_util.get_configs_from_multiple_files( - model_config_path=FLAGS.model_config_path, - eval_config_path=FLAGS.eval_config_path, - eval_input_config_path=FLAGS.input_config_path) - for name, config in [('model.config', FLAGS.model_config_path), - ('eval.config', FLAGS.eval_config_path), - ('input.config', FLAGS.input_config_path)]: - tf.gfile.Copy(config, - os.path.join(FLAGS.eval_dir, name), - overwrite=True) - - model_config = configs['model'] - eval_config = configs['eval_config'] - if FLAGS.eval_training_data: - input_config = configs['train_input_config'] - else: - input_config = configs['eval_input_config'] - - model_fn = functools.partial( - model_builder.build, - model_config=model_config, - is_training=False) - - create_input_dict_fn = functools.partial( - input_reader_builder.build, - input_config) - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - if FLAGS.run_once: - eval_config.max_evals = 1 - - evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, - FLAGS.checkpoint_dir, FLAGS.eval_dir) - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/eval_util.py b/object_detection/eval_util.py deleted file mode 100644 index 6a37be76..00000000 --- a/object_detection/eval_util.py +++ /dev/null @@ -1,516 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Common functions for repeatedly evaluating a checkpoint.""" -import logging -import os -import time - -import numpy as np -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import keypoint_ops -from object_detection.core import standard_fields as fields -from object_detection.utils import label_map_util -from object_detection.utils import ops -from object_detection.utils import visualization_utils as vis_utils - -slim = tf.contrib.slim - - -def write_metrics(metrics, global_step, summary_dir): - """Write metrics to a summary directory. - - Args: - metrics: A dictionary containing metric names and values. - global_step: Global step at which the metrics are computed. - summary_dir: Directory to write tensorflow summaries to. - """ - logging.info('Writing metrics to tf summary.') - summary_writer = tf.summary.FileWriter(summary_dir) - for key in sorted(metrics): - summary = tf.Summary(value=[ - tf.Summary.Value(tag=key, simple_value=metrics[key]), - ]) - summary_writer.add_summary(summary, global_step) - logging.info('%s: %f', key, metrics[key]) - summary_writer.close() - logging.info('Metrics written to tf summary.') - - -# TODO: Add tests. -def visualize_detection_results(result_dict, - tag, - global_step, - categories, - summary_dir='', - export_dir='', - agnostic_mode=False, - show_groundtruth=False, - min_score_thresh=.5, - max_num_predictions=20): - """Visualizes detection results and writes visualizations to image summaries. - - This function visualizes an image with its detected bounding boxes and writes - to image summaries which can be viewed on tensorboard. It optionally also - writes images to a directory. In the case of missing entry in the label map, - unknown class name in the visualization is shown as "N/A". - - Args: - result_dict: a dictionary holding groundtruth and detection - data corresponding to each image being evaluated. The following keys - are required: - 'original_image': a numpy array representing the image with shape - [1, height, width, 3] - 'detection_boxes': a numpy array of shape [N, 4] - 'detection_scores': a numpy array of shape [N] - 'detection_classes': a numpy array of shape [N] - The following keys are optional: - 'groundtruth_boxes': a numpy array of shape [N, 4] - 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] - Detections are assumed to be provided in decreasing order of score and for - display, and we assume that scores are probabilities between 0 and 1. - tag: tensorboard tag (string) to associate with image. - global_step: global step at which the visualization are generated. - categories: a list of dictionaries representing all possible categories. - Each dict in this list has the following keys: - 'id': (required) an integer id uniquely identifying this category - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza' - 'supercategory': (optional) string representing the supercategory - e.g., 'animal', 'vehicle', 'food', etc - summary_dir: the output directory to which the image summaries are written. - export_dir: the output directory to which images are written. If this is - empty (default), then images are not exported. - agnostic_mode: boolean (default: False) controlling whether to evaluate in - class-agnostic mode or not. - show_groundtruth: boolean (default: False) controlling whether to show - groundtruth boxes in addition to detected boxes - min_score_thresh: minimum score threshold for a box to be visualized - max_num_predictions: maximum number of detections to visualize - Raises: - ValueError: if result_dict does not contain the expected keys (i.e., - 'original_image', 'detection_boxes', 'detection_scores', - 'detection_classes') - """ - if not set([ - 'original_image', 'detection_boxes', 'detection_scores', - 'detection_classes' - ]).issubset(set(result_dict.keys())): - raise ValueError('result_dict does not contain all expected keys.') - if show_groundtruth and 'groundtruth_boxes' not in result_dict: - raise ValueError('If show_groundtruth is enabled, result_dict must contain ' - 'groundtruth_boxes.') - logging.info('Creating detection visualizations.') - category_index = label_map_util.create_category_index(categories) - - image = np.squeeze(result_dict['original_image'], axis=0) - detection_boxes = result_dict['detection_boxes'] - detection_scores = result_dict['detection_scores'] - detection_classes = np.int32((result_dict['detection_classes'])) - detection_keypoints = result_dict.get('detection_keypoints', None) - detection_masks = result_dict.get('detection_masks', None) - - # Plot groundtruth underneath detections - if show_groundtruth: - groundtruth_boxes = result_dict['groundtruth_boxes'] - groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None) - vis_utils.visualize_boxes_and_labels_on_image_array( - image, - groundtruth_boxes, - None, - None, - category_index, - keypoints=groundtruth_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=None) - vis_utils.visualize_boxes_and_labels_on_image_array( - image, - detection_boxes, - detection_classes, - detection_scores, - category_index, - instance_masks=detection_masks, - keypoints=detection_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=max_num_predictions, - min_score_thresh=min_score_thresh, - agnostic_mode=agnostic_mode) - - if export_dir: - export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) - vis_utils.save_image_array_as_png(image, export_path) - - summary = tf.Summary(value=[ - tf.Summary.Value( - tag=tag, - image=tf.Summary.Image( - encoded_image_string=vis_utils.encode_image_array_as_png_str( - image))) - ]) - summary_writer = tf.summary.FileWriter(summary_dir) - summary_writer.add_summary(summary, global_step) - summary_writer.close() - - logging.info('Detection visualizations written to summary with tag %s.', tag) - - -def _run_checkpoint_once(tensor_dict, - evaluators=None, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - master='', - save_graph=False, - save_graph_dir=''): - """Evaluates metrics defined in evaluators. - - This function loads the latest checkpoint in checkpoint_dirs and evaluates - all metrics defined in evaluators. The metrics are processed in batch by the - batch_processor. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking four arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - To skip an image, it suffices to return an empty dictionary in place of - result_dict. - checkpoint_dirs: list of directories to load into an EnsembleModel. If it - has only one directory, EnsembleModel will not be used -- - a DetectionModel - will be instantiated directly. Not used if restore_fn is set. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: None, or a function that takes a tf.Session object and correctly - restores all necessary variables from the correct checkpoint file. If - None, attempts to restore from the first directory in checkpoint_dirs. - num_batches: the number of batches to use for evaluation. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. - save_graph_dir: where to store the Tensorflow graph on disk. If save_graph - is True this must be non-empty. - - Returns: - global_step: the count of global steps. - all_evaluator_metrics: A dictionary containing metric names and values. - - Raises: - ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least - one element. - ValueError: if save_graph is True and save_graph_dir is not defined. - """ - if save_graph and not save_graph_dir: - raise ValueError('`save_graph_dir` must be defined.') - sess = tf.Session(master, graph=tf.get_default_graph()) - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - sess.run(tf.tables_initializer()) - if restore_fn: - restore_fn(sess) - else: - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) - saver = tf.train.Saver(variables_to_restore) - saver.restore(sess, checkpoint_file) - - if save_graph: - tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') - - counters = {'skipped': 0, 'success': 0} - with tf.contrib.slim.queues.QueueRunners(sess): - try: - for batch in range(int(num_batches)): - if (batch + 1) % 100 == 0: - logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) - if not batch_processor: - try: - result_dict = sess.run(tensor_dict) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - result_dict = {} - else: - result_dict = batch_processor(tensor_dict, sess, batch, counters) - for evaluator in evaluators: - # TODO: Use image_id tensor once we fix the input data - # decoders to return correct image_id. - # TODO: result_dict contains batches of images, while - # add_single_ground_truth_image_info expects a single image. Fix - evaluator.add_single_ground_truth_image_info( - image_id=batch, groundtruth_dict=result_dict) - evaluator.add_single_detected_image_info( - image_id=batch, detections_dict=result_dict) - logging.info('Running eval batches done.') - except tf.errors.OutOfRangeError: - logging.info('Done evaluating -- epoch limit reached') - finally: - # When done, ask the threads to stop. - logging.info('# success: %d', counters['success']) - logging.info('# skipped: %d', counters['skipped']) - all_evaluator_metrics = {} - for evaluator in evaluators: - metrics = evaluator.evaluate() - evaluator.clear() - if any(key in all_evaluator_metrics for key in metrics): - raise ValueError('Metric names between evaluators must not collide.') - all_evaluator_metrics.update(metrics) - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - sess.close() - return (global_step, all_evaluator_metrics) - - -# TODO: Add tests. -def repeated_checkpoint_run(tensor_dict, - summary_dir, - evaluators, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - eval_interval_secs=120, - max_number_of_evaluations=None, - master='', - save_graph=False, - save_graph_dir=''): - """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. - - This function repeatedly loads a checkpoint and evaluates a desired - set of tensors (provided by tensor_dict) and hands the resulting numpy - arrays to a function result_processor which can be used to further - process/save/visualize the results. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - summary_dir: a directory to write metrics summaries. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking three arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - checkpoint_dirs: list of directories to load into a DetectionModel or an - EnsembleModel if restore_fn isn't set. Also used to determine when to run - next evaluation. Must have at least one element. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: a function that takes a tf.Session object and correctly restores - all necessary variables from the correct checkpoint file. - num_batches: the number of batches to use for evaluation. - eval_interval_secs: the number of seconds between each evaluation run. - max_number_of_evaluations: the max number of iterations of the evaluation. - If the value is left as None the evaluation continues indefinitely. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. - save_graph_dir: where to save on disk the Tensorflow graph. If store_graph - is True this must be non-empty. - - Returns: - metrics: A dictionary containing metric names and values in the latest - evaluation. - - Raises: - ValueError: if max_num_of_evaluations is not None or a positive number. - ValueError: if checkpoint_dirs doesn't have at least one element. - """ - if max_number_of_evaluations and max_number_of_evaluations <= 0: - raise ValueError( - '`number_of_steps` must be either None or a positive number.') - - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - - last_evaluated_model_path = None - number_of_evaluations = 0 - while True: - start = time.time() - logging.info('Starting evaluation at ' + time.strftime( - '%Y-%m-%d-%H:%M:%S', time.gmtime())) - model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) - if not model_path: - logging.info('No model found in %s. Will try again in %d seconds', - checkpoint_dirs[0], eval_interval_secs) - elif model_path == last_evaluated_model_path: - logging.info('Found already evaluated checkpoint. Will try again in %d ' - 'seconds', eval_interval_secs) - else: - last_evaluated_model_path = model_path - global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, - batch_processor, - checkpoint_dirs, - variables_to_restore, - restore_fn, num_batches, - master, save_graph, - save_graph_dir) - write_metrics(metrics, global_step, summary_dir) - number_of_evaluations += 1 - - if (max_number_of_evaluations and - number_of_evaluations >= max_number_of_evaluations): - logging.info('Finished evaluation!') - break - time_to_next_eval = start + eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - return metrics - - -def result_dict_for_single_example(image, - key, - detections, - groundtruth=None, - class_agnostic=False, - scale_to_absolute=False): - """Merges all detection and groundtruth information for a single example. - - Note that evaluation tools require classes that are 1-indexed, and so this - function performs the offset. If `class_agnostic` is True, all output classes - have label 1. - - Args: - image: A single 4D image tensor of shape [1, H, W, C]. - key: A single string tensor identifying the image. - detections: A dictionary of detections, returned from - DetectionModel.postprocess(). - groundtruth: (Optional) Dictionary of groundtruth items, with fields: - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized coordinates. - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - class_agnostic: Boolean indicating whether the detections are class-agnostic - (i.e. binary). Default False. - scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should - be scaled to absolute coordinates. Note that for IoU based evaluations, - it does not matter whether boxes are expressed in absolute or relative - coordinates. Default False. - - Returns: - A dictionary with: - 'original_image': A [1, H, W, C] uint8 image tensor. - 'key': A string tensor with image identifier. - 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. - 'detection_scores': [max_detections] float32 tensor of scores. - 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. - 'detection_masks': [max_detections, None, None] float32 tensor of binarized - masks. (Only present if available in `detections`) - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. (Optional) - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - (Optional) - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - - """ - label_id_offset = 1 # Applying label id offset (b/63711816) - - input_data_fields = fields.InputDataFields() - output_dict = { - input_data_fields.original_image: image, - input_data_fields.key: key, - } - - detection_fields = fields.DetectionResultFields - detection_boxes = detections[detection_fields.detection_boxes][0] - output_dict[detection_fields.detection_boxes] = detection_boxes - image_shape = tf.shape(image) - if scale_to_absolute: - absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_boxes] = ( - absolute_detection_boxlist.get()) - detection_scores = detections[detection_fields.detection_scores][0] - output_dict[detection_fields.detection_scores] = detection_scores - - if class_agnostic: - detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) - else: - detection_classes = ( - tf.to_int64(detections[detection_fields.detection_classes][0]) + - label_id_offset) - output_dict[detection_fields.detection_classes] = detection_classes - - if detection_fields.detection_masks in detections: - detection_masks = detections[detection_fields.detection_masks][0] - output_dict[detection_fields.detection_masks] = detection_masks - if scale_to_absolute: - # TODO: This should be done in model's postprocess - # function ideally. - detection_masks_reframed = ops.reframe_box_masks_to_image_masks( - detection_masks, detection_boxes, image_shape[1], image_shape[2]) - detection_masks_reframed = tf.to_float( - tf.greater(detection_masks_reframed, 0.5)) - output_dict[detection_fields.detection_masks] = detection_masks_reframed - if detection_fields.detection_keypoints in detections: - detection_keypoints = detections[detection_fields.detection_keypoints][0] - output_dict[detection_fields.detection_keypoints] = detection_keypoints - if scale_to_absolute: - absolute_detection_keypoints = keypoint_ops.scale( - detection_keypoints, image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_keypoints] = ( - absolute_detection_keypoints) - - if groundtruth: - output_dict.update(groundtruth) - if scale_to_absolute: - groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] - absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) - output_dict[input_data_fields.groundtruth_boxes] = ( - absolute_gt_boxlist.get()) - # For class-agnostic models, groundtruth classes all become 1. - if class_agnostic: - groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] - groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) - output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes - - return output_dict diff --git a/object_detection/evaluator.py b/object_detection/evaluator.py deleted file mode 100644 index 74722d00..00000000 --- a/object_detection/evaluator.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Detection model evaluator. - -This file provides a generic evaluation method that can be used to evaluate a -DetectionModel. -""" - -import logging -import tensorflow as tf - -from object_detection import eval_util -from object_detection.core import prefetcher -from object_detection.core import standard_fields as fields -from object_detection.utils import object_detection_evaluation - -# A dictionary of metric names to classes that implement the metric. The classes -# in the dictionary must implement -# utils.object_detection_evaluation.DetectionEvaluator interface. -EVAL_METRICS_CLASS_DICT = { - 'pascal_voc_metrics': - object_detection_evaluation.PascalDetectionEvaluator, - 'weighted_pascal_voc_metrics': - object_detection_evaluation.WeightedPascalDetectionEvaluator, - 'open_images_metrics': - object_detection_evaluation.OpenImagesDetectionEvaluator -} - - -def _extract_prediction_tensors(model, - create_input_dict_fn, - ignore_groundtruth=False): - """Restores the model in a tensorflow session. - - Args: - model: model to perform predictions with. - create_input_dict_fn: function to create input tensor dictionaries. - ignore_groundtruth: whether groundtruth should be ignored. - - Returns: - tensor_dict: A tensor dictionary with evaluations. - """ - input_dict = create_input_dict_fn() - prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) - input_dict = prefetch_queue.dequeue() - original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) - preprocessed_image = model.preprocess(tf.to_float(original_image)) - prediction_dict = model.predict(preprocessed_image) - detections = model.postprocess(prediction_dict) - - groundtruth = None - if not ignore_groundtruth: - groundtruth = { - fields.InputDataFields.groundtruth_boxes: - input_dict[fields.InputDataFields.groundtruth_boxes], - fields.InputDataFields.groundtruth_classes: - input_dict[fields.InputDataFields.groundtruth_classes], - fields.InputDataFields.groundtruth_area: - input_dict[fields.InputDataFields.groundtruth_area], - fields.InputDataFields.groundtruth_is_crowd: - input_dict[fields.InputDataFields.groundtruth_is_crowd], - fields.InputDataFields.groundtruth_difficult: - input_dict[fields.InputDataFields.groundtruth_difficult] - } - if fields.InputDataFields.groundtruth_group_of in input_dict: - groundtruth[fields.InputDataFields.groundtruth_group_of] = ( - input_dict[fields.InputDataFields.groundtruth_group_of]) - if fields.DetectionResultFields.detection_masks in detections: - groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( - input_dict[fields.InputDataFields.groundtruth_instance_masks]) - - return eval_util.result_dict_for_single_example( - original_image, - input_dict[fields.InputDataFields.source_id], - detections, - groundtruth, - class_agnostic=( - fields.DetectionResultFields.detection_classes not in detections), - scale_to_absolute=True) - - -def get_evaluators(eval_config, categories): - """Returns the evaluator class according to eval_config, valid for categories. - - Args: - eval_config: evaluation configurations. - categories: a list of categories to evaluate. - Returns: - An list of instances of DetectionEvaluator. - - Raises: - ValueError: if metric is not in the metric class dictionary. - """ - eval_metric_fn_key = eval_config.metrics_set - if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: - raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) - return [ - EVAL_METRICS_CLASS_DICT[eval_metric_fn_key]( - categories=categories) - ] - - -def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories, - checkpoint_dir, eval_dir): - """Evaluation function for detection models. - - Args: - create_input_dict_fn: a function to create a tensor input dictionary. - create_model_fn: a function that creates a DetectionModel. - eval_config: a eval_pb2.EvalConfig protobuf. - categories: a list of category dictionaries. Each dict in the list should - have an integer 'id' field and string 'name' field. - checkpoint_dir: directory to load the checkpoints to evaluate from. - eval_dir: directory to write evaluation metrics summary to. - - Returns: - metrics: A dictionary containing metric names and values from the latest - run. - """ - - model = create_model_fn() - - if eval_config.ignore_groundtruth and not eval_config.export_path: - logging.fatal('If ignore_groundtruth=True then an export_path is ' - 'required. Aborting!!!') - - tensor_dict = _extract_prediction_tensors( - model=model, - create_input_dict_fn=create_input_dict_fn, - ignore_groundtruth=eval_config.ignore_groundtruth) - - def _process_batch(tensor_dict, sess, batch_index, counters): - """Evaluates tensors in tensor_dict, visualizing the first K examples. - - This function calls sess.run on tensor_dict, evaluating the original_image - tensor only on the first K examples and visualizing detections overlaid - on this original_image. - - Args: - tensor_dict: a dictionary of tensors - sess: tensorflow session - batch_index: the index of the batch amongst all batches in the run. - counters: a dictionary holding 'success' and 'skipped' fields which can - be updated to keep track of number of successful and failed runs, - respectively. If these fields are not updated, then the success/skipped - counter values shown at the end of evaluation will be incorrect. - - Returns: - result_dict: a dictionary of numpy arrays - """ - try: - result_dict = sess.run(tensor_dict) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - return {} - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - if batch_index < eval_config.num_visualizations: - tag = 'image-{}'.format(batch_index) - eval_util.visualize_detection_results( - result_dict, - tag, - global_step, - categories=categories, - summary_dir=eval_dir, - export_dir=eval_config.visualization_export_dir, - show_groundtruth=eval_config.visualization_export_dir) - return result_dict - - variables_to_restore = tf.global_variables() - global_step = tf.train.get_or_create_global_step() - variables_to_restore.append(global_step) - if eval_config.use_moving_averages: - variable_averages = tf.train.ExponentialMovingAverage(0.0) - variables_to_restore = variable_averages.variables_to_restore() - saver = tf.train.Saver(variables_to_restore) - - def _restore_latest_checkpoint(sess): - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - saver.restore(sess, latest_checkpoint) - - metrics = eval_util.repeated_checkpoint_run( - tensor_dict=tensor_dict, - summary_dir=eval_dir, - evaluators=get_evaluators(eval_config, categories), - batch_processor=_process_batch, - checkpoint_dirs=[checkpoint_dir], - variables_to_restore=None, - restore_fn=_restore_latest_checkpoint, - num_batches=eval_config.num_examples, - eval_interval_secs=eval_config.eval_interval_secs, - max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else - eval_config.max_evals - if eval_config.max_evals else None), - master=eval_config.eval_master, - save_graph=eval_config.save_graph, - save_graph_dir=(eval_dir if eval_config.save_graph else '')) - - return metrics diff --git a/object_detection/export_inference_graph.py b/object_detection/export_inference_graph.py deleted file mode 100644 index 279d1d16..00000000 --- a/object_detection/export_inference_graph.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Tool to export an object detection model for inference. - -Prepares an object detection tensorflow graph for inference using model -configuration and an optional trained checkpoint. Outputs inference -graph, associated checkpoint files, a frozen inference graph and a -SavedModel (https://tensorflow.github.io/serving/serving_basic.html). - -The inference graph contains one of three input nodes depending on the user -specified option. - * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3] - * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None] - containing encoded PNG or JPEG images. Image resolutions are expected to be - the same if more than 1 image is provided. - * `tf_example`: Accepts a 1-D string tensor of shape [None] containing - serialized TFExample protos. Image resolutions are expected to be the same - if more than 1 image is provided. - -and the following output nodes returned by the model.postprocess(..): - * `num_detections`: Outputs float32 tensors of the form [batch] - that specifies the number of valid boxes per image in the batch. - * `detection_boxes`: Outputs float32 tensors of the form - [batch, num_boxes, 4] containing detected boxes. - * `detection_scores`: Outputs float32 tensors of the form - [batch, num_boxes] containing class scores for the detections. - * `detection_classes`: Outputs float32 tensors of the form - [batch, num_boxes] containing classes for the detections. - * `detection_masks`: Outputs float32 tensors of the form - [batch, num_boxes, mask_height, mask_width] containing predicted instance - masks for each box if its present in the dictionary of postprocessed - tensors returned by the model. - -Notes: - * This tool uses `use_moving_averages` from eval_config to decide which - weights to freeze. - -Example Usage: --------------- -python export_inference_graph \ - --input_type image_tensor \ - --pipeline_config_path path/to/ssd_inception_v2.config \ - --trained_checkpoint_prefix path/to/model.ckpt \ - --output_directory path/to/exported_model_directory - -The expected output would be in the directory -path/to/exported_model_directory (which is created if it does not exist) -with contents: - - graph.pbtxt - - model.ckpt.data-00000-of-00001 - - model.ckpt.info - - model.ckpt.meta - - frozen_inference_graph.pb - + saved_model (a directory) -""" -import tensorflow as tf -from google.protobuf import text_format -from object_detection import exporter -from object_detection.protos import pipeline_pb2 - -slim = tf.contrib.slim -flags = tf.app.flags - -flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' - 'one of [`image_tensor`, `encoded_image_string_tensor`, ' - '`tf_example`]') -flags.DEFINE_string('input_shape', None, - 'If input_type is `image_tensor`, this can explicitly set ' - 'the shape of this input tensor to a fixed size. The ' - 'dimensions are to be provided as a comma-separated list ' - 'of integers. A value of -1 can be used for unknown ' - 'dimensions. If not specified, for an `image_tensor, the ' - 'default shape will be partially specified as ' - '`[None, None, None, 3]`.') -flags.DEFINE_string('pipeline_config_path', None, - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file.') -flags.DEFINE_string('trained_checkpoint_prefix', None, - 'Path to trained checkpoint, typically of the form ' - 'path/to/model.ckpt') -flags.DEFINE_string('output_directory', None, 'Path to write outputs.') - -tf.app.flags.mark_flag_as_required('pipeline_config_path') -tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix') -tf.app.flags.mark_flag_as_required('output_directory') -FLAGS = flags.FLAGS - - -def main(_): - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: - text_format.Merge(f.read(), pipeline_config) - if FLAGS.input_shape: - input_shape = [ - int(dim) if dim != '-1' else None - for dim in FLAGS.input_shape.split(',') - ] - else: - input_shape = None - exporter.export_inference_graph(FLAGS.input_type, pipeline_config, - FLAGS.trained_checkpoint_prefix, - FLAGS.output_directory, input_shape) - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/exporter.py b/object_detection/exporter.py deleted file mode 100644 index 95469e68..00000000 --- a/object_detection/exporter.py +++ /dev/null @@ -1,426 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to export object detection inference graph.""" -import logging -import os -import tempfile -import tensorflow as tf -from tensorflow.core.protobuf import rewriter_config_pb2 -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.client import session -from tensorflow.python.framework import graph_util -from tensorflow.python.platform import gfile -from tensorflow.python.saved_model import signature_constants -from tensorflow.python.training import saver as saver_lib -from object_detection.builders import model_builder -from object_detection.core import standard_fields as fields -from object_detection.data_decoders import tf_example_decoder - -slim = tf.contrib.slim - - -# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when -# newer version of Tensorflow becomes more common. -def freeze_graph_with_def_protos( - input_graph_def, - input_saver_def, - input_checkpoint, - output_node_names, - restore_op_name, - filename_tensor_name, - clear_devices, - initializer_nodes, - optimize_graph=True, - variable_names_blacklist=''): - """Converts all variables in a graph and checkpoint into constants.""" - del restore_op_name, filename_tensor_name # Unused by updated loading code. - - # 'input_checkpoint' may be a prefix if we're using Saver V2 format - if not saver_lib.checkpoint_exists(input_checkpoint): - raise ValueError( - 'Input checkpoint "' + input_checkpoint + '" does not exist!') - - if not output_node_names: - raise ValueError( - 'You must supply the name of a node to --output_node_names.') - - # Remove all the explicit device specifications for this node. This helps to - # make the graph more portable. - if clear_devices: - for node in input_graph_def.node: - node.device = '' - - with tf.Graph().as_default(): - tf.import_graph_def(input_graph_def, name='') - - if optimize_graph: - logging.info('Graph Rewriter optimizations enabled') - rewrite_options = rewriter_config_pb2.RewriterConfig() - rewrite_options.optimizers.append('pruning') - rewrite_options.optimizers.append('constfold') - rewrite_options.optimizers.append('layout') - graph_options = tf.GraphOptions( - rewrite_options=rewrite_options, infer_shapes=True) - else: - logging.info('Graph Rewriter optimizations disabled') - graph_options = tf.GraphOptions() - config = tf.ConfigProto(graph_options=graph_options) - with session.Session(config=config) as sess: - if input_saver_def: - saver = saver_lib.Saver(saver_def=input_saver_def) - saver.restore(sess, input_checkpoint) - else: - var_list = {} - reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint) - var_to_shape_map = reader.get_variable_to_shape_map() - for key in var_to_shape_map: - try: - tensor = sess.graph.get_tensor_by_name(key + ':0') - except KeyError: - # This tensor doesn't exist in the graph (for example it's - # 'global_step' or a similar housekeeping element) so skip it. - continue - var_list[key] = tensor - saver = saver_lib.Saver(var_list=var_list) - saver.restore(sess, input_checkpoint) - if initializer_nodes: - sess.run(initializer_nodes) - - variable_names_blacklist = (variable_names_blacklist.split(',') if - variable_names_blacklist else None) - output_graph_def = graph_util.convert_variables_to_constants( - sess, - input_graph_def, - output_node_names.split(','), - variable_names_blacklist=variable_names_blacklist) - - return output_graph_def - - -def replace_variable_values_with_moving_averages(graph, - current_checkpoint_file, - new_checkpoint_file): - """Replaces variable values in the checkpoint with their moving averages. - - If the current checkpoint has shadow variables maintaining moving averages of - the variables defined in the graph, this function generates a new checkpoint - where the variables contain the values of their moving averages. - - Args: - graph: a tf.Graph object. - current_checkpoint_file: a checkpoint containing both original variables and - their moving averages. - new_checkpoint_file: file path to write a new checkpoint. - """ - with graph.as_default(): - variable_averages = tf.train.ExponentialMovingAverage(0.0) - ema_variables_to_restore = variable_averages.variables_to_restore() - with tf.Session() as sess: - read_saver = tf.train.Saver(ema_variables_to_restore) - read_saver.restore(sess, current_checkpoint_file) - write_saver = tf.train.Saver() - write_saver.save(sess, new_checkpoint_file) - - -def _image_tensor_input_placeholder(input_shape=None): - """Returns input placeholder and a 4-D uint8 image tensor.""" - if input_shape is None: - input_shape = (None, None, None, 3) - input_tensor = tf.placeholder( - dtype=tf.uint8, shape=input_shape, name='image_tensor') - return input_tensor, input_tensor - - -def _tf_example_input_placeholder(): - """Returns input that accepts a batch of strings with tf examples. - - Returns: - a tuple of input placeholder and the output decoded images. - """ - batch_tf_example_placeholder = tf.placeholder( - tf.string, shape=[None], name='tf_example') - def decode(tf_example_string_tensor): - tensor_dict = tf_example_decoder.TfExampleDecoder().decode( - tf_example_string_tensor) - image_tensor = tensor_dict[fields.InputDataFields.image] - return image_tensor - return (batch_tf_example_placeholder, - tf.map_fn(decode, - elems=batch_tf_example_placeholder, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False)) - - -def _encoded_image_string_tensor_input_placeholder(): - """Returns input that accepts a batch of PNG or JPEG strings. - - Returns: - a tuple of input placeholder and the output decoded images. - """ - batch_image_str_placeholder = tf.placeholder( - dtype=tf.string, - shape=[None], - name='encoded_image_string_tensor') - def decode(encoded_image_string_tensor): - image_tensor = tf.image.decode_image(encoded_image_string_tensor, - channels=3) - image_tensor.set_shape((None, None, 3)) - return image_tensor - return (batch_image_str_placeholder, - tf.map_fn( - decode, - elems=batch_image_str_placeholder, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False)) - - -input_placeholder_fn_map = { - 'image_tensor': _image_tensor_input_placeholder, - 'encoded_image_string_tensor': - _encoded_image_string_tensor_input_placeholder, - 'tf_example': _tf_example_input_placeholder, -} - - -def _add_output_tensor_nodes(postprocessed_tensors, - output_collection_name='inference_op'): - """Adds output nodes for detection boxes and scores. - - Adds the following nodes for output tensors - - * num_detections: float32 tensor of shape [batch_size]. - * detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4] - containing detected boxes. - * detection_scores: float32 tensor of shape [batch_size, num_boxes] - containing scores for the detected boxes. - * detection_classes: float32 tensor of shape [batch_size, num_boxes] - containing class predictions for the detected boxes. - * detection_masks: (Optional) float32 tensor of shape - [batch_size, num_boxes, mask_height, mask_width] containing masks for each - detection box. - - Args: - postprocessed_tensors: a dictionary containing the following fields - 'detection_boxes': [batch, max_detections, 4] - 'detection_scores': [batch, max_detections] - 'detection_classes': [batch, max_detections] - 'detection_masks': [batch, max_detections, mask_height, mask_width] - (optional). - 'num_detections': [batch] - output_collection_name: Name of collection to add output tensors to. - - Returns: - A tensor dict containing the added output tensor nodes. - """ - label_id_offset = 1 - boxes = postprocessed_tensors.get('detection_boxes') - scores = postprocessed_tensors.get('detection_scores') - classes = postprocessed_tensors.get('detection_classes') + label_id_offset - masks = postprocessed_tensors.get('detection_masks') - num_detections = postprocessed_tensors.get('num_detections') - outputs = {} - outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes') - outputs['detection_scores'] = tf.identity(scores, name='detection_scores') - outputs['detection_classes'] = tf.identity(classes, name='detection_classes') - outputs['num_detections'] = tf.identity(num_detections, name='num_detections') - if masks is not None: - outputs['detection_masks'] = tf.identity(masks, name='detection_masks') - for output_key in outputs: - tf.add_to_collection(output_collection_name, outputs[output_key]) - if masks is not None: - tf.add_to_collection(output_collection_name, outputs['detection_masks']) - return outputs - - -def _write_frozen_graph(frozen_graph_path, frozen_graph_def): - """Writes frozen graph to disk. - - Args: - frozen_graph_path: Path to write inference graph. - frozen_graph_def: tf.GraphDef holding frozen graph. - """ - with gfile.GFile(frozen_graph_path, 'wb') as f: - f.write(frozen_graph_def.SerializeToString()) - logging.info('%d ops in the final graph.', len(frozen_graph_def.node)) - - -def _write_saved_model(saved_model_path, - frozen_graph_def, - inputs, - outputs): - """Writes SavedModel to disk. - - If checkpoint_path is not None bakes the weights into the graph thereby - eliminating the need of checkpoint files during inference. If the model - was trained with moving averages, setting use_moving_averages to true - restores the moving averages, otherwise the original set of variables - is restored. - - Args: - saved_model_path: Path to write SavedModel. - frozen_graph_def: tf.GraphDef holding frozen graph. - inputs: The input image tensor to use for detection. - outputs: A tensor dictionary containing the outputs of a DetectionModel. - """ - with tf.Graph().as_default(): - with session.Session() as sess: - - tf.import_graph_def(frozen_graph_def, name='') - - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path) - - tensor_info_inputs = { - 'inputs': tf.saved_model.utils.build_tensor_info(inputs)} - tensor_info_outputs = {} - for k, v in outputs.items(): - tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v) - - detection_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs=tensor_info_inputs, - outputs=tensor_info_outputs, - method_name=signature_constants.PREDICT_METHOD_NAME)) - - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - detection_signature, - }, - ) - builder.save() - - -def _write_graph_and_checkpoint(inference_graph_def, - model_path, - input_saver_def, - trained_checkpoint_prefix): - for node in inference_graph_def.node: - node.device = '' - with tf.Graph().as_default(): - tf.import_graph_def(inference_graph_def, name='') - with session.Session() as sess: - saver = saver_lib.Saver(saver_def=input_saver_def, - save_relative_paths=True) - saver.restore(sess, trained_checkpoint_prefix) - saver.save(sess, model_path) - - -def _export_inference_graph(input_type, - detection_model, - use_moving_averages, - trained_checkpoint_prefix, - output_directory, - additional_output_tensor_names=None, - input_shape=None, - optimize_graph=True, - output_collection_name='inference_op'): - """Export helper.""" - tf.gfile.MakeDirs(output_directory) - frozen_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - saved_model_path = os.path.join(output_directory, 'saved_model') - model_path = os.path.join(output_directory, 'model.ckpt') - - if input_type not in input_placeholder_fn_map: - raise ValueError('Unknown input type: {}'.format(input_type)) - placeholder_args = {} - if input_shape is not None: - if input_type != 'image_tensor': - raise ValueError('Can only specify input shape for `image_tensor` ' - 'inputs.') - placeholder_args['input_shape'] = input_shape - placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type]( - **placeholder_args) - inputs = tf.to_float(input_tensors) - preprocessed_inputs = detection_model.preprocess(inputs) - output_tensors = detection_model.predict(preprocessed_inputs) - postprocessed_tensors = detection_model.postprocess(output_tensors) - outputs = _add_output_tensor_nodes(postprocessed_tensors, - output_collection_name) - # Add global step to the graph. - slim.get_or_create_global_step() - - if use_moving_averages: - temp_checkpoint_file = tempfile.NamedTemporaryFile() - replace_variable_values_with_moving_averages( - tf.get_default_graph(), trained_checkpoint_prefix, - temp_checkpoint_file.name) - checkpoint_to_use = temp_checkpoint_file.name - else: - checkpoint_to_use = trained_checkpoint_prefix - - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - - _write_graph_and_checkpoint( - inference_graph_def=tf.get_default_graph().as_graph_def(), - model_path=model_path, - input_saver_def=input_saver_def, - trained_checkpoint_prefix=checkpoint_to_use) - - if additional_output_tensor_names is not None: - output_node_names = ','.join(outputs.keys()+additional_output_tensor_names) - else: - output_node_names = ','.join(outputs.keys()) - - frozen_graph_def = freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=checkpoint_to_use, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - clear_devices=True, - optimize_graph=optimize_graph, - initializer_nodes='') - _write_frozen_graph(frozen_graph_path, frozen_graph_def) - _write_saved_model(saved_model_path, frozen_graph_def, - placeholder_tensor, outputs) - - -def export_inference_graph(input_type, - pipeline_config, - trained_checkpoint_prefix, - output_directory, - input_shape=None, - optimize_graph=True, - output_collection_name='inference_op', - additional_output_tensor_names=None): - """Exports inference graph for the model specified in the pipeline config. - - Args: - input_type: Type of input for the graph. Can be one of [`image_tensor`, - `tf_example`]. - pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto. - trained_checkpoint_prefix: Path to the trained checkpoint file. - output_directory: Path to write outputs. - input_shape: Sets a fixed shape for an `image_tensor` input. If not - specified, will default to [None, None, None, 3]. - optimize_graph: Whether to optimize graph using Grappler. - output_collection_name: Name of collection to add output tensors to. - If None, does not add output tensors to a collection. - additional_output_tensor_names: list of additional output - tensors to include in the frozen graph. - """ - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - _export_inference_graph(input_type, detection_model, - pipeline_config.eval_config.use_moving_averages, - trained_checkpoint_prefix, - output_directory, additional_output_tensor_names, - input_shape, optimize_graph, output_collection_name) diff --git a/object_detection/exporter_test.py b/object_detection/exporter_test.py deleted file mode 100644 index 0a999005..00000000 --- a/object_detection/exporter_test.py +++ /dev/null @@ -1,604 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.export_inference_graph.""" -import os -import numpy as np -import six -import tensorflow as tf -from object_detection import exporter -from object_detection.builders import model_builder -from object_detection.core import model -from object_detection.protos import pipeline_pb2 - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - from unittest import mock # pylint: disable=g-import-not-at-top - -slim = tf.contrib.slim - - -class FakeModel(model.DetectionModel): - - def __init__(self, add_detection_masks=False): - self._add_detection_masks = add_detection_masks - - def preprocess(self, inputs): - return tf.identity(inputs) - - def predict(self, preprocessed_inputs): - return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)} - - def postprocess(self, prediction_dict): - with tf.control_dependencies(prediction_dict.values()): - postprocessed_tensors = { - 'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]], tf.float32), - 'detection_scores': tf.constant([[0.7, 0.6], - [0.9, 0.0]], tf.float32), - 'detection_classes': tf.constant([[0, 1], - [1, 0]], tf.float32), - 'num_detections': tf.constant([2, 1], tf.float32) - } - if self._add_detection_masks: - postprocessed_tensors['detection_masks'] = tf.constant( - np.arange(64).reshape([2, 2, 4, 4]), tf.float32) - return postprocessed_tensors - - def restore_map(self, checkpoint_path, from_detection_checkpoint): - pass - - def loss(self, prediction_dict): - pass - - -class ExportInferenceGraphTest(tf.test.TestCase): - - def _save_checkpoint_from_mock_model(self, checkpoint_path, - use_moving_averages): - g = tf.Graph() - with g.as_default(): - mock_model = FakeModel() - preprocessed_inputs = mock_model.preprocess( - tf.placeholder(tf.float32, shape=[None, None, None, 3])) - predictions = mock_model.predict(preprocessed_inputs) - mock_model.postprocess(predictions) - if use_moving_averages: - tf.train.ExponentialMovingAverage(0.0).apply() - slim.get_or_create_global_step() - saver = tf.train.Saver() - init = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init) - saver.save(sess, checkpoint_path) - - def _load_inference_graph(self, inference_graph_path): - od_graph = tf.Graph() - with od_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(inference_graph_path) as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - return od_graph - - def _create_tf_example(self, image_array): - with self.test_session(): - encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval() - def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': _bytes_feature(encoded_image), - 'image/format': _bytes_feature('jpg'), - 'image/source_id': _bytes_feature('image_id') - })).SerializeToString() - return example - - def test_export_graph_with_image_tensor_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def test_export_graph_with_fixed_size_image_tensor_input(self): - input_shape = [1, 320, 320, 3] - - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model( - trained_checkpoint_prefix, use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory, - input_shape=input_shape) - saved_model_path = os.path.join(output_directory, 'saved_model') - self.assertTrue( - os.path.exists(os.path.join(saved_model_path, 'saved_model.pb'))) - - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - meta_graph = tf.saved_model.loader.load( - sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - image_tensor = od_graph.get_tensor_by_name(input_tensor_name) - self.assertSequenceEqual(image_tensor.get_shape().as_list(), - input_shape) - - def test_export_graph_with_tf_example_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def test_export_graph_with_encoded_image_string_input(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - output_directory = os.path.join(tmp_dir, 'output') - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - - def _get_variables_in_checkpoint(self, checkpoint_file): - return set([ - var_name - for var_name, _ in tf.train.list_variables(checkpoint_file)]) - - def test_replace_variable_values_with_moving_averages(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - new_checkpoint_prefix = os.path.join(tmp_dir, 'new.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - graph = tf.Graph() - with graph.as_default(): - fake_model = FakeModel() - preprocessed_inputs = fake_model.preprocess( - tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])) - predictions = fake_model.predict(preprocessed_inputs) - fake_model.postprocess(predictions) - exporter.replace_variable_values_with_moving_averages( - graph, trained_checkpoint_prefix, new_checkpoint_prefix) - - expected_variables = set(['conv2d/bias', 'conv2d/kernel']) - variables_in_old_ckpt = self._get_variables_in_checkpoint( - trained_checkpoint_prefix) - self.assertIn('conv2d/bias/ExponentialMovingAverage', - variables_in_old_ckpt) - self.assertIn('conv2d/kernel/ExponentialMovingAverage', - variables_in_old_ckpt) - variables_in_new_ckpt = self._get_variables_in_checkpoint( - new_checkpoint_prefix) - self.assertTrue(expected_variables.issubset(variables_in_new_ckpt)) - self.assertNotIn('conv2d/bias/ExponentialMovingAverage', - variables_in_new_ckpt) - self.assertNotIn('conv2d/kernel/ExponentialMovingAverage', - variables_in_new_ckpt) - - def test_export_graph_with_moving_averages(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel() - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = True - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - self.assertTrue(os.path.exists(os.path.join( - output_directory, 'saved_model', 'saved_model.pb'))) - expected_variables = set(['conv2d/bias', 'conv2d/kernel', 'global_step']) - actual_variables = set( - [var_name for var_name, _ in tf.train.list_variables(output_directory)]) - self.assertTrue(expected_variables.issubset(actual_variables)) - - def test_export_model_with_all_output_nodes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph): - inference_graph.get_tensor_by_name('image_tensor:0') - inference_graph.get_tensor_by_name('detection_boxes:0') - inference_graph.get_tensor_by_name('detection_scores:0') - inference_graph.get_tensor_by_name('detection_classes:0') - inference_graph.get_tensor_by_name('detection_masks:0') - inference_graph.get_tensor_by_name('num_detections:0') - - def test_export_model_with_detection_only_nodes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=False) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph): - inference_graph.get_tensor_by_name('image_tensor:0') - inference_graph.get_tensor_by_name('detection_boxes:0') - inference_graph.get_tensor_by_name('detection_scores:0') - inference_graph.get_tensor_by_name('detection_classes:0') - inference_graph.get_tensor_by_name('num_detections:0') - with self.assertRaises(KeyError): - inference_graph.get_tensor_by_name('detection_masks:0') - - def test_export_and_run_inference_with_image_tensor(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='image_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - with self.test_session(graph=inference_graph) as sess: - image_tensor = inference_graph.get_tensor_by_name('image_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run( - [boxes, scores, classes, masks, num_detections], - feed_dict={image_tensor: np.ones((2, 4, 4, 3)).astype(np.uint8)}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def _create_encoded_image_string(self, image_array_np, encoding_format): - od_graph = tf.Graph() - with od_graph.as_default(): - if encoding_format == 'jpg': - encoded_string = tf.image.encode_jpeg(image_array_np) - elif encoding_format == 'png': - encoded_string = tf.image.encode_png(image_array_np) - else: - raise ValueError('Supports only the following formats: `jpg`, `png`') - with self.test_session(graph=od_graph): - return encoded_string.eval() - - def test_export_and_run_inference_with_encoded_image_string_tensor(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - jpg_image_str = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'jpg') - png_image_str = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'png') - with self.test_session(graph=inference_graph) as sess: - image_str_tensor = inference_graph.get_tensor_by_name( - 'encoded_image_string_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - for image_str in [jpg_image_str, png_image_str]: - image_str_batch_np = np.hstack([image_str]* 2) - (boxes_np, scores_np, classes_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, masks, num_detections], - feed_dict={image_str_tensor: image_str_batch_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_raise_runtime_error_on_images_with_different_sizes(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='encoded_image_string_tensor', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - large_image = self._create_encoded_image_string( - np.ones((4, 4, 3)).astype(np.uint8), 'jpg') - small_image = self._create_encoded_image_string( - np.ones((2, 2, 3)).astype(np.uint8), 'jpg') - - image_str_batch_np = np.hstack([large_image, small_image]) - with self.test_session(graph=inference_graph) as sess: - image_str_tensor = inference_graph.get_tensor_by_name( - 'encoded_image_string_tensor:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, - '^TensorArray has inconsistent shapes.'): - sess.run([boxes, scores, classes, masks, num_detections], - feed_dict={image_str_tensor: image_str_batch_np}) - - def test_export_and_run_inference_with_tf_example(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=True) - output_directory = os.path.join(tmp_dir, 'output') - inference_graph_path = os.path.join(output_directory, - 'frozen_inference_graph.pb') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - inference_graph = self._load_inference_graph(inference_graph_path) - tf_example_np = np.expand_dims(self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8)), axis=0) - with self.test_session(graph=inference_graph) as sess: - tf_example = inference_graph.get_tensor_by_name('tf_example:0') - boxes = inference_graph.get_tensor_by_name('detection_boxes:0') - scores = inference_graph.get_tensor_by_name('detection_scores:0') - classes = inference_graph.get_tensor_by_name('detection_classes:0') - masks = inference_graph.get_tensor_by_name('detection_masks:0') - num_detections = inference_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run( - [boxes, scores, classes, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_export_saved_model_and_run_inference(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - saved_model_path = os.path.join(output_directory, 'saved_model') - - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - meta_graph = tf.saved_model.loader.load( - sess, [tf.saved_model.tag_constants.SERVING], saved_model_path) - - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - tf_example = od_graph.get_tensor_by_name(input_tensor_name) - - boxes = od_graph.get_tensor_by_name( - signature.outputs['detection_boxes'].name) - scores = od_graph.get_tensor_by_name( - signature.outputs['detection_scores'].name) - classes = od_graph.get_tensor_by_name( - signature.outputs['detection_classes'].name) - masks = od_graph.get_tensor_by_name( - signature.outputs['detection_masks'].name) - num_detections = od_graph.get_tensor_by_name( - signature.outputs['num_detections'].name) - - (boxes_np, scores_np, classes_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - def test_export_checkpoint_and_run_inference(self): - tmp_dir = self.get_temp_dir() - trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(trained_checkpoint_prefix, - use_moving_averages=False) - output_directory = os.path.join(tmp_dir, 'output') - model_path = os.path.join(output_directory, 'model.ckpt') - meta_graph_path = model_path + '.meta' - - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(add_detection_masks=True) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - exporter.export_inference_graph( - input_type='tf_example', - pipeline_config=pipeline_config, - trained_checkpoint_prefix=trained_checkpoint_prefix, - output_directory=output_directory) - - tf_example_np = np.hstack([self._create_tf_example( - np.ones((4, 4, 3)).astype(np.uint8))] * 2) - with tf.Graph().as_default() as od_graph: - with self.test_session(graph=od_graph) as sess: - new_saver = tf.train.import_meta_graph(meta_graph_path) - new_saver.restore(sess, model_path) - - tf_example = od_graph.get_tensor_by_name('tf_example:0') - boxes = od_graph.get_tensor_by_name('detection_boxes:0') - scores = od_graph.get_tensor_by_name('detection_scores:0') - classes = od_graph.get_tensor_by_name('detection_classes:0') - masks = od_graph.get_tensor_by_name('detection_masks:0') - num_detections = od_graph.get_tensor_by_name('num_detections:0') - (boxes_np, scores_np, classes_np, masks_np, - num_detections_np) = sess.run( - [boxes, scores, classes, masks, num_detections], - feed_dict={tf_example: tf_example_np}) - self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 0.8, 0.8]], - [[0.5, 0.5, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0]]]) - self.assertAllClose(scores_np, [[0.7, 0.6], - [0.9, 0.0]]) - self.assertAllClose(classes_np, [[1, 2], - [2, 1]]) - self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4])) - self.assertAllClose(num_detections_np, [2, 1]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config b/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config deleted file mode 100644 index 6f9a275e..00000000 --- a/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config +++ /dev/null @@ -1,146 +0,0 @@ -# Faster R-CNN with Inception Resnet v2, Atrous version; -# Configured for Open Images Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 546 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - first_stage_features_stride: 8 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 8 - width_stride: 8 - } - } - first_stage_atrous_rate: 2 - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.00006 - schedule { - step: 0 - learning_rate: .00006 - } - schedule { - step: 6000000 - learning_rate: .000006 - } - schedule { - step: 7000000 - learning_rate: .0000006 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - # Note: The below line limits the training process to 800K steps, which we - # empirically found to be sufficient enough to train the Open Images dataset. - # This effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 8000000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_label_map.pbtxt" -} - -eval_config: { - metrics_set: "open_images_metrics" - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/g3doc/configuring_jobs.md b/object_detection/g3doc/configuring_jobs.md deleted file mode 100644 index 78f77bc4..00000000 --- a/object_detection/g3doc/configuring_jobs.md +++ /dev/null @@ -1,162 +0,0 @@ -# Configuring the Object Detection Training Pipeline - -## Overview - -The Tensorflow Object Detection API uses protobuf files to configure the -training and evaluation process. The schema for the training pipeline can be -found in object_detection/protos/pipeline.proto. At a high level, the config -file is split into 5 parts: - -1. The `model` configuration. This defines what type of model will be trained -(ie. meta-architecture, feature extractor). -2. The `train_config`, which decides what parameters should be used to train -model parameters (ie. SGD parameters, input preprocessing and feature extractor -initialization values). -3. The `eval_config`, which determines what set of metrics will be reported for -evaluation (currently we only support the PASCAL VOC metrics). -4. The `train_input_config`, which defines what dataset the model should be -trained on. -5. The `eval_input_config`, which defines what dataset the model will be -evaluated on. Typically this should be different than the training input -dataset. - -A skeleton configuration file is shown below: - -``` -model { -(... Add model config here...) -} - -train_config : { -(... Add train_config here...) -} - -train_input_reader: { -(... Add train_input configuration here...) -} - -eval_config: { -} - -eval_input_reader: { -(... Add eval_input configuration here...) -} -``` - -## Picking Model Parameters - -There are a large number of model parameters to configure. The best settings -will depend on your given application. Faster R-CNN models are better suited to -cases where high accuracy is desired and latency is of lower priority. -Conversely, if processing time is the most important factor, SSD models are -recommended. Read [our paper](https://arxiv.org/abs/1611.10012) for a more -detailed discussion on the speed vs accuracy tradeoff. - -To help new users get started, sample model configurations have been provided -in the object_detection/samples/model_configs folder. The contents of these -configuration files can be pasted into `model` field of the skeleton -configuration. Users should note that the `num_classes` field should be changed -to a value suited for the dataset the user is training on. - -## Defining Inputs - -The Tensorflow Object Detection API accepts inputs in the TFRecord file format. -Users must specify the locations of both the training and evaluation files. -Additionally, users should also specify a label map, which define the mapping -between a class id and class name. The label map should be identical between -training and evaluation datasets. - -An example input configuration looks as follows: - -``` -tf_record_input_reader { - input_path: "/usr/home/username/data/train.record" -} -label_map_path: "/usr/home/username/data/label_map.pbtxt" -``` - -Users should substitute the `input_path` and `label_map_path` arguments and -insert the input configuration into the `train_input_reader` and -`eval_input_reader` fields in the skeleton configuration. Note that the paths -can also point to Google Cloud Storage buckets (ie. -"gs://project_bucket/train.record") for use on Google Cloud. - -## Configuring the Trainer - -The `train_config` defines parts of the training process: - -1. Model parameter initialization. -2. Input preprocessing. -3. SGD parameters. - -A sample `train_config` is below: - -``` -batch_size: 1 -optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0002 - schedule { - step: 0 - learning_rate: .0002 - } - schedule { - step: 900000 - learning_rate: .00002 - } - schedule { - step: 1200000 - learning_rate: .000002 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false -} -fine_tune_checkpoint: "/usr/home/username/tmp/model.ckpt-#####" -from_detection_checkpoint: true -gradient_clipping_by_norm: 10.0 -data_augmentation_options { - random_horizontal_flip { - } -} -``` - -### Model Parameter Initialization - -While optional, it is highly recommended that users utilize other object -detection checkpoints. Training an object detector from scratch can take days. -To speed up the training process, it is recommended that users re-use the -feature extractor parameters from a pre-existing object classification or -detection checkpoint. `train_config` provides two fields to specify -pre-existing checkpoints: `fine_tune_checkpoint` and -`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to -the pre-existing checkpoint -(ie:"/usr/home/username/checkpoint/model.ckpt-#####"). -`from_detection_checkpoint` is a boolean value. If false, it assumes the -checkpoint was from an object classification checkpoint. Note that starting -from a detection checkpoint will usually result in a faster training job than -a classification checkpoint. - -The list of provided checkpoints can be found [here](detection_model_zoo.md). - -### Input Preprocessing - -The `data_augmentation_options` in `train_config` can be used to specify -how training data can be modified. This field is optional. - -### SGD Parameters - -The remainings parameters in `train_config` are hyperparameters for gradient -descent. Please note that the optimal learning rates provided in these -configuration files may depend on the specifics of the training setup (e.g. -number of workers, gpu type). - -## Configuring the Evaluator - -Currently evaluation is fixed to generating metrics as defined by the PASCAL VOC -challenge. The parameters for `eval_config` are set to reasonable defaults and -typically do not need to be configured. diff --git a/object_detection/g3doc/defining_your_own_model.md b/object_detection/g3doc/defining_your_own_model.md deleted file mode 100644 index 865f6af1..00000000 --- a/object_detection/g3doc/defining_your_own_model.md +++ /dev/null @@ -1,137 +0,0 @@ -# So you want to create a new model! - -In this section, we discuss some of the abstractions that we use -for defining detection models. If you would like to define a new model -architecture for detection and use it in the Tensorflow Detection API, -then this section should also serve as a high level guide to the files that you -will need to edit to get your new model working. - -## DetectionModels (`object_detection/core/model.py`) - -In order to be trained, evaluated, and exported for serving using our -provided binaries, all models under the Tensorflow Object Detection API must -implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular, -each of these models are responsible for implementing 5 functions: - -* `preprocess`: Run any preprocessing (e.g., scaling/shifting/reshaping) of - input values that is necessary prior to running the detector on an input - image. -* `predict`: Produce “raw” prediction tensors that can be passed to loss or - postprocess functions. -* `postprocess`: Convert predicted output tensors to final detections. -* `loss`: Compute scalar loss tensors with respect to provided groundtruth. -* `restore`: Load a checkpoint into the Tensorflow graph. - -Given a `DetectionModel` at training time, we pass each image batch through -the following sequence of functions to compute a loss which can be optimized via -SGD: - -``` -inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor) -``` - -And at eval time, we pass each image batch through the following sequence of -functions to produce a set of detections: - -``` -inputs (images tensor) -> preprocess -> predict -> postprocess -> - outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor) -``` - -Some conventions to be aware of: - -* `DetectionModel`s should make no assumptions about the input size or aspect - ratio --- they are responsible for doing any resize/reshaping necessary - (see docstring for the `preprocess` function). -* Output classes are always integers in the range `[0, num_classes)`. - Any mapping of these integers to semantic labels is to be handled outside - of this class. We never explicitly emit a “background class” --- thus 0 is - the first non-background class and any logic of predicting and removing - implicit background classes must be handled internally by the implementation. -* Detected boxes are to be interpreted as being in - `[y_min, x_min, y_max, x_max]` format and normalized relative to the - image window. -* We do not specifically assume any kind of probabilistic interpretation of the - scores --- the only important thing is their relative ordering. Thus - implementations of the postprocess function are free to output logits, - probabilities, calibrated probabilities, or anything else. - -## Defining a new Faster R-CNN or SSD Feature Extractor - -In most cases, you probably will not implement a `DetectionModel` from scratch ---- instead you might create a new feature extractor to be used by one of the -SSD or Faster R-CNN meta-architectures. (We think of meta-architectures as -classes that define entire families of models using the `DetectionModel` -abstraction). - -Note: For the following discussion to make sense, we recommend first becoming -familiar with the [Faster R-CNN](https://arxiv.org/abs/1506.01497) paper. - -Let’s now imagine that you have invented a brand new network architecture -(say, “InceptionV100”) for classification and want to see how InceptionV100 -would behave as a feature extractor for detection (say, with Faster R-CNN). -A similar procedure would hold for SSD models, but we’ll discuss Faster R-CNN. - -To use InceptionV100, we will have to define a new -`FasterRCNNFeatureExtractor` and pass it to our `FasterRCNNMetaArch` -constructor as input. See -`object_detection/meta_architectures/faster_rcnn_meta_arch.py` for definitions -of `FasterRCNNFeatureExtractor` and `FasterRCNNMetaArch`, respectively. -A `FasterRCNNFeatureExtractor` must define a few -functions: - -* `preprocess`: Run any preprocessing of input values that is necessary prior - to running the detector on an input image. -* `_extract_proposal_features`: Extract first stage Region Proposal Network - (RPN) features. -* `_extract_box_classifier_features`: Extract second stage Box Classifier - features. -* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the - Tensorflow graph. - -See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py` -definition as one example. Some remarks: - -* We typically initialize the weights of this feature extractor - using those from the - [Slim Resnet-101 classification checkpoint](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models), - and we know - that images were preprocessed when training this checkpoint - by subtracting a channel mean from each input - image. Thus, we implement the preprocess function to replicate the same - channel mean subtraction behavior. -* The “full” resnet classification network defined in slim is cut into two - parts --- all but the last “resnet block” is put into the - `_extract_proposal_features` function and the final block is separately - defined in the `_extract_box_classifier_features function`. In general, - some experimentation may be required to decide on an optimal layer at - which to “cut” your feature extractor into these two pieces for Faster R-CNN. - -## Register your model for configuration - -Assuming that your new feature extractor does not require nonstandard -configuration, you will want to ideally be able to simply change the -“feature_extractor.type” fields in your configuration protos to point to a -new feature extractor. In order for our API to know how to understand this -new type though, you will first have to register your new feature -extractor with the model builder (`object_detection/builders/model_builder.py`), -whose job is to create models from config protos.. - -Registration is simple --- just add a pointer to the new Feature Extractor -class that you have defined in one of the SSD or Faster R-CNN Feature -Extractor Class maps at the top of the -`object_detection/builders/model_builder.py` file. -We recommend adding a test in `object_detection/builders/model_builder_test.py` -to make sure that parsing your proto will work as expected. - -## Taking your new model for a spin - -After registration you are ready to go with your model! Some final tips: - -* To save time debugging, try running your configuration file locally first - (both training and evaluation). -* Do a sweep of learning rates to figure out which learning rate is best - for your model. -* A small but often important detail: you may find it necessary to disable - batchnorm training (that is, load the batch norm parameters from the - classification checkpoint, but do not update them during gradient descent). diff --git a/object_detection/g3doc/detection_model_zoo.md b/object_detection/g3doc/detection_model_zoo.md deleted file mode 100644 index d6d31e0d..00000000 --- a/object_detection/g3doc/detection_model_zoo.md +++ /dev/null @@ -1,101 +0,0 @@ -# Tensorflow detection model zoo - -We provide a collection of detection models pre-trained on the [COCO -dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/), and the -[Open Images dataset](https://github.com/openimages/dataset). These models can -be useful for -out-of-the-box inference if you are interested in categories already in COCO -(e.g., humans, cars, etc) or in Open Images (e.g., -surfboard, jacuzzi, etc). They are also useful for initializing your models when -training on novel datasets. - -In the table below, we list each such pre-trained model including: - -* a model name that corresponds to a config file that was used to train this - model in the `samples/configs` directory, -* a download link to a tar.gz file containing the pre-trained model, -* model speed --- we report running time in ms per 600x600 image (including all - pre and post-processing), but please be - aware that these timings depend highly on one's specific hardware - configuration (these timings were performed using an Nvidia - GeForce GTX TITAN X card) and should be treated more as relative timings in - many cases. -* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure. - Here, higher is better, and we only report bounding box mAP rounded to the - nearest integer. -* Output types (currently only `Boxes`) - -You can un-tar each tar.gz file via, e.g.,: - -``` -tar -xzvf ssd_mobilenet_v1_coco.tar.gz -``` - -Inside the un-tar'ed directory, you will find: - -* a graph proto (`graph.pbtxt`) -* a checkpoint - (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`) -* a frozen graph proto with weights baked into the graph as constants - (`frozen_inference_graph.pb`) to be used for out of the box inference - (try this out in the Jupyter notebook!) -* a config file (`pipeline.config`) which was used to generate the graph. These - directly correspond to a config file in the - [samples/configs](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs)) directory but often with a modified score threshold. In the case - of the heavier Faster R-CNN models, we also provide a version of the model - that uses a highly reduced number of proposals for speed. - -Some remarks on frozen inference graphs: - -* If you try to evaluate the frozen graph, you may find performance numbers for - some of the models to be slightly lower than what we report in the below - tables. This is because we discard detections with scores below a - threshold (typically 0.3) when creating the frozen graph. This corresponds - effectively to picking a point on the precision recall curve of - a detector (and discarding the part past that point), which negatively impacts - standard mAP metrics. -* Our frozen inference graphs are generated using the - [v1.4.0](https://github.com/tensorflow/tensorflow/tree/v1.4.0) - release version of Tensorflow and we do not guarantee that these will work - with other versions; this being said, each frozen inference graph can be - regenerated using your current version of Tensorflow by re-running the - [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md), - pointing it at the model directory as well as the config file inside of it. - - -## COCO-trained models {#coco-models} - -| Model name | Speed (ms) | COCO mAP[^1] | Outputs | -| ------------ | :--------------: | :--------------: | :-------------: | -| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes | -| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes | -| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2017_11_08.tar.gz) | 58 | 28 | Boxes | -| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2017_11_08.tar.gz) | 89 | 30 | Boxes | -| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2017_11_08.tar.gz) | 64 | | Boxes | -| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2017_11_08.tar.gz) | 92 | 30 | Boxes | -| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2017_11_08.tar.gz) | 106 | 32 | Boxes | -| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2017_11_08.tar.gz) | 82 | | Boxes | -| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2017_11_08.tar.gz) | 620 | 37 | Boxes | -| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2017_11_08.tar.gz) | 241 | | Boxes | -| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2017_11_08.tar.gz) | 1833 | 43 | Boxes | -| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2017_11_08.tar.gz) | 540 | | Boxes | - - - -## Kitti-trained models {#kitti-models} - -Model name | Speed (ms) | Pascal mAP@0.5 (ms) | Outputs ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----: -[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2017_11_08.tar.gz) | 79 | 87 | Boxes - -## Open Images-trained models {#open-images-models} - -Model name | Speed (ms) | Open Images mAP@0.5[^2] | Outputs ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----: -[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08.tar.gz) | 727 | 37 | Boxes -[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2017_11_08.tar.gz) | 347 | | Boxes - - -[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval). -[^2]: This is PASCAL mAP with a slightly different way of true positives computation: see [Open Images evaluation protocol](evaluation_protocols.md#open-images). - diff --git a/object_detection/g3doc/evaluation_protocols.md b/object_detection/g3doc/evaluation_protocols.md deleted file mode 100644 index 033a1adf..00000000 --- a/object_detection/g3doc/evaluation_protocols.md +++ /dev/null @@ -1,94 +0,0 @@ -# Supported object detection evaluation protocols - -The Tensorflow Object Detection API currently supports three evaluation protocols, -that can be configured in `EvalConfig` by setting `metrics_set` to the -corresponding value. - -## PASCAL VOC 2007 metric - -`EvalConfig.metrics_set='pascal_voc_metrics'` - -The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007. -The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf). - - -## Weighted PASCAL VOC metric - -`EvalConfig.metrics_set='weighted_pascal_voc_metrics'` - -The weighted PASCAL metric computes the mean average precision as the average -precision when treating all classes as a single class. In comparison, -PASCAL metrics computes the mean average precision as the mean of the -per-class average precisions. - -For example, the test set consists of two classes, "cat" and "dog", and there are ten times more boxes of "cat" than those of "dog". -According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value, -while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class. - -## Open Images metric {#open-images} - -`EvalConfig.metrics_set='open_images_metrics'` - -This metric is defined originally for evaluating detector performance on [Open Images V2 dataset](https://github.com/openimages/dataset) -and is fairly similar to the PASCAL VOC 2007 metric mentioned above. -It computes interpolated average precision (AP) for each class and averages it among all classes (mAP). - -The difference to the PASCAL VOC 2007 metric is the following: Open Images -annotations contain `group-of` ground-truth boxes (see [Open Images data -description](https://github.com/openimages/dataset#annotations-human-bboxcsv)), -that are treated differently for the purpose of deciding whether detections are -"true positives", "ignored", "false positives". Here we define these three -cases: - -A detection is a "true positive" if there is a non-group-of ground-truth box, -such that: - -* The detection box and the ground-truth box are of the same class, and - intersection-over-union (IoU) between the detection box and the ground-truth - box is greater than the IoU threshold (default value 0.5). \ - Illustration of handling non-group-of boxes: \ - ![alt - groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives."){width="500" height="270"} - - * yellow box - ground-truth box; - * green box - true positive; - * red boxes - false positives. - -* This is the highest scoring detection for this ground truth box that - satisfies the criteria above. - -A detection is "ignored" if it is not a true positive, and there is a `group-of` -ground-truth box such that: - -* The detection box and the ground-truth box are of the same class, and the - area of intersection between the detection box and the ground-truth box - divided by the area of the detection is greater than 0.5. This is intended - to measure whether the detection box is approximately inside the group-of - ground-truth box. \ - Illustration of handling `group-of` boxes: \ - ![alt - groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive."){width="500" height="270"} - - * yellow box - ground-truth box; - * grey boxes - two detections on cars, that are ignored; - * red box - false positive. - -A detection is a "false positive" if it is neither a "true positive" nor -"ignored". - -Precision and recall are defined as: - -* Precision = number-of-true-positives/(number-of-true-positives + number-of-false-positives) -* Recall = number-of-true-positives/number-of-non-group-of-boxes - -Note that detections ignored as firing on a `group-of` ground-truth box do not -contribute to the number of true positives. - -The labels in Open Images are organized in a -[hierarchy](https://storage.googleapis.com/openimages/2017_07/bbox_labels_vis/bbox_labels_vis.html). -Ground-truth bounding-boxes are annotated with the most specific class available -in the hierarchy. For example, "car" has two children "limousine" and "van". Any -other kind of car is annotated as "car" (for example, a sedan). Given this -convention, the evaluation software treats all classes independently, ignoring -the hierarchy. To achieve high performance values, object detectors should -output bounding-boxes labelled in the same manner. diff --git a/object_detection/g3doc/exporting_models.md b/object_detection/g3doc/exporting_models.md deleted file mode 100644 index 2da97908..00000000 --- a/object_detection/g3doc/exporting_models.md +++ /dev/null @@ -1,22 +0,0 @@ -# Exporting a trained model for inference - -After your model has been trained, you should export it to a Tensorflow -graph proto. A checkpoint will typically consist of three files: - -* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001, -* model.ckpt-${CHECKPOINT_NUMBER}.index -* model.ckpt-${CHECKPOINT_NUMBER}.meta - -After you've identified a candidate checkpoint to export, run the following -command from tensorflow/models/research/: - -``` bash -# From tensorflow/models/research/ -python object_detection/export_inference_graph.py \ - --input_type image_tensor \ - --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ - --trained_checkpoint_prefix ${TRAIN_PATH} \ - --output_directory output_inference_graph.pb -``` - -Afterwards, you should see a graph named output_inference_graph.pb. diff --git a/object_detection/g3doc/img/dogs_detections_output.jpg b/object_detection/g3doc/img/dogs_detections_output.jpg deleted file mode 100644 index 9e88a701..00000000 Binary files a/object_detection/g3doc/img/dogs_detections_output.jpg and /dev/null differ diff --git a/object_detection/g3doc/img/example_cat.jpg b/object_detection/g3doc/img/example_cat.jpg deleted file mode 100644 index 74c7ef4b..00000000 Binary files a/object_detection/g3doc/img/example_cat.jpg and /dev/null differ diff --git a/object_detection/g3doc/img/groupof_case_eval.png b/object_detection/g3doc/img/groupof_case_eval.png deleted file mode 100644 index 5abc9b69..00000000 Binary files a/object_detection/g3doc/img/groupof_case_eval.png and /dev/null differ diff --git a/object_detection/g3doc/img/kites_detections_output.jpg b/object_detection/g3doc/img/kites_detections_output.jpg deleted file mode 100644 index 7c0f3364..00000000 Binary files a/object_detection/g3doc/img/kites_detections_output.jpg and /dev/null differ diff --git a/object_detection/g3doc/img/nongroupof_case_eval.png b/object_detection/g3doc/img/nongroupof_case_eval.png deleted file mode 100644 index cbb76f49..00000000 Binary files a/object_detection/g3doc/img/nongroupof_case_eval.png and /dev/null differ diff --git a/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg b/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg deleted file mode 100644 index 1e9412ad..00000000 Binary files a/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg and /dev/null differ diff --git a/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg b/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg deleted file mode 100644 index 46b1fb28..00000000 Binary files a/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg and /dev/null differ diff --git a/object_detection/g3doc/img/oxford_pet.png b/object_detection/g3doc/img/oxford_pet.png deleted file mode 100644 index ddac415f..00000000 Binary files a/object_detection/g3doc/img/oxford_pet.png and /dev/null differ diff --git a/object_detection/g3doc/img/tensorboard.png b/object_detection/g3doc/img/tensorboard.png deleted file mode 100644 index fbcdbeb3..00000000 Binary files a/object_detection/g3doc/img/tensorboard.png and /dev/null differ diff --git a/object_detection/g3doc/img/tensorboard2.png b/object_detection/g3doc/img/tensorboard2.png deleted file mode 100644 index 97ad22da..00000000 Binary files a/object_detection/g3doc/img/tensorboard2.png and /dev/null differ diff --git a/object_detection/g3doc/img/tf-od-api-logo.png b/object_detection/g3doc/img/tf-od-api-logo.png deleted file mode 100644 index 9fa9cc9d..00000000 Binary files a/object_detection/g3doc/img/tf-od-api-logo.png and /dev/null differ diff --git a/object_detection/g3doc/installation.md b/object_detection/g3doc/installation.md deleted file mode 100644 index 81b7503b..00000000 --- a/object_detection/g3doc/installation.md +++ /dev/null @@ -1,79 +0,0 @@ -# Installation - -## Dependencies - -Tensorflow Object Detection API depends on the following libraries: - -* Protobuf 2.6 -* Pillow 1.0 -* lxml -* tf Slim (which is included in the "tensorflow/models/research/" checkout) -* Jupyter notebook -* Matplotlib -* Tensorflow - -For detailed steps to install Tensorflow, follow the [Tensorflow installation -instructions](https://www.tensorflow.org/install/). A typical user can install -Tensorflow using one of the following commands: - -``` bash -# For CPU -pip install tensorflow -# For GPU -pip install tensorflow-gpu -``` - -The remaining libraries can be installed on Ubuntu 16.04 using via apt-get: - -``` bash -sudo apt-get install protobuf-compiler python-pil python-lxml -sudo pip install jupyter -sudo pip install matplotlib -``` - -Alternatively, users can install dependencies using pip: - -``` bash -sudo pip install pillow -sudo pip install lxml -sudo pip install jupyter -sudo pip install matplotlib -``` - -## Protobuf Compilation - -The Tensorflow Object Detection API uses Protobufs to configure model and -training parameters. Before the framework can be used, the Protobuf libraries -must be compiled. This should be done by running the following command from -the tensorflow/models/research/ directory: - - -``` bash -# From tensorflow/models/research/ -protoc object_detection/protos/*.proto --python_out=. -``` - -## Add Libraries to PYTHONPATH - -When running locally, the tensorflow/models/research/ and slim directories -should be appended to PYTHONPATH. This can be done by running the following from -tensorflow/models/research/: - - -``` bash -# From tensorflow/models/research/ -export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim -``` - -Note: This command needs to run from every new terminal you start. If you wish -to avoid running this manually, you can add it as a new line to the end of your -~/.bashrc file. - -# Testing the Installation - -You can test that you have correctly installed the Tensorflow Object Detection\ -API by running the following command: - -```bash -python object_detection/builders/model_builder_test.py -``` diff --git a/object_detection/g3doc/oid_inference_and_evaluation.md b/object_detection/g3doc/oid_inference_and_evaluation.md deleted file mode 100644 index 164fdc2c..00000000 --- a/object_detection/g3doc/oid_inference_and_evaluation.md +++ /dev/null @@ -1,255 +0,0 @@ -# Inference and evaluation on the Open Images dataset - -This page presents a tutorial for running object detector inference and -evaluation measure computations on the [Open Images -dataset](https://github.com/openimages/dataset), using tools from the -[TensorFlow Object Detection -API](https://github.com/tensorflow/models/tree/master/research/object_detection). -It shows how to download the images and annotations for the validation and test -sets of Open Images; how to package the downloaded data in a format understood -by the Object Detection API; where to find a trained object detector model for -Open Images; how to run inference; and how to compute evaluation measures on the -inferred detections. - -Inferred detections will look like the following: - -![](img/oid_bus_72e19c28aac34ed8.jpg){height="300"} -![](img/oid_monkey_3b4168c89cecbc5b.jpg){height="300"} - -On the validation set of Open Images, this tutorial requires 27GB of free disk -space and the inference step takes approximately 9 hours on a single NVIDIA -Tesla P100 GPU. On the test set -- 75GB and 27 hours respectively. All other -steps require less than two hours in total on both sets. - -## Installing TensorFlow, the Object Detection API, and Google Cloud SDK - -Please run through the [installation instructions](installation.md) to install -TensorFlow and all its dependencies. Ensure the Protobuf libraries are compiled -and the library directories are added to `PYTHONPATH`. You will also need to -`pip` install `pandas` and `contextlib2`. - -Some of the data used in this tutorial lives in Google Cloud buckets. To access -it, you will have to [install the Google Cloud -SDK](https://cloud.google.com/sdk/downloads) on your workstation or laptop. - -## Preparing the Open Images validation and test sets - -In order to run inference and subsequent evaluation measure computations, we -require a dataset of images and ground truth boxes, packaged as TFRecords of -TFExamples. To create such a dataset for Open Images, you will need to first -download ground truth boxes from the [Open Images -website](https://github.com/openimages/dataset): - -```bash -# From tensorflow/models/research -mkdir oid -cd oid -wget https://storage.googleapis.com/openimages/2017_07/annotations_human_bbox_2017_07.tar.gz -tar -xvf annotations_human_bbox_2017_07.tar.gz -``` - -Next, download the images. In this tutorial, we will use lower resolution images -provided by [CVDF](http://www.cvdfoundation.org). Please follow the instructions -on [CVDF's Open Images repository -page](https://github.com/cvdfoundation/open-images-dataset) in order to gain -access to the cloud bucket with the images. Then run: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # Set SPLIT to "test" to download the images in the test set -mkdir raw_images_${SPLIT} -gsutil -m rsync -r gs://open-images-dataset/$SPLIT raw_images_${SPLIT} -``` - -Another option for downloading the images is to follow the URLs contained in the -[image URLs and metadata CSV -files](https://storage.googleapis.com/openimages/2017_07/images_2017_07.tar.gz) -on the Open Images website. - -At this point, your `tensorflow/models/research/oid` directory should appear as -follows: - -```lang-none -|-- 2017_07 -| |-- test -| | `-- annotations-human-bbox.csv -| |-- train -| | `-- annotations-human-bbox.csv -| `-- validation -| `-- annotations-human-bbox.csv -|-- raw_images_validation (if you downloaded the validation split) -| `-- ... (41,620 files matching regex "[0-9a-f]{16}.jpg") -|-- raw_images_test (if you downloaded the test split) -| `-- ... (125,436 files matching regex "[0-9a-f]{16}.jpg") -`-- annotations_human_bbox_2017_07.tar.gz -``` - -Next, package the data into TFRecords of TFExamples by running: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # Set SPLIT to "test" to create TFRecords for the test split -mkdir ${SPLIT}_tfrecords - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/dataset_tools/create_oid_tf_record \ - --input_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \ - --input_images_directory raw_images_${SPLIT} \ - --input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \ - --output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \ - --num_shards=100 -``` - -This results in 100 TFRecord files (shards), written to -`oid/${SPLIT}_tfrecords`, with filenames matching -`${SPLIT}.tfrecord-000[0-9][0-9]-of-00100`. Each shard contains approximately -the same number of images and is defacto a representative random sample of the -input data. [This enables](#accelerating_inference) a straightforward work -division scheme for distributing inference and also approximate measure -computations on subsets of the validation and test sets. - -## Inferring detections - -Inference requires a trained object detection model. In this tutorial we will -use a model from the [detections model zoo](detection_model_zoo.md), which can -be downloaded and unpacked by running the commands below. More information about -the model, such as its architecture and how it was trained, is available in the -[model zoo page](detection_model_zoo.md). - -```bash -# From tensorflow/models/research/oid -wget http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz -tar -zxvf faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz -``` - -At this point, data is packed into TFRecords and we have an object detector -model. We can run inference using: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -TF_RECORD_FILES=$(ls -1 ${SPLIT}_tfrecords/* | tr '\n' ',') - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/inference/infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord-00000-of-00001 \ - --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \ - --discard_image_pixels -``` - -Inference preserves all fields of the input TFExamples, and adds new fields to -store the inferred detections. This allows [computing evaluation -measures](#compute_evaluation_measures) on the output TFRecord alone, as ground -truth boxes are preserved as well. Since measure computations don't require -access to the images, `infer_detections` can optionally discard them with the -`--discard_image_pixels` flag. Discarding the images drastically reduces the -size of the output TFRecord. - -### Accelerating inference {#accelerating_inference} - -Running inference on the whole validation or test set can take a long time to -complete due to the large number of images present in these sets (41,620 and -125,436 respectively). For quick but approximate evaluation, inference and the -subsequent measure computations can be run on a small number of shards. To run -for example on 2% of all the data, it is enough to set `TF_RECORD_FILES` as -shown below before running `infer_detections`: - -```bash -TF_RECORD_FILES=$(ls ${SPLIT}_tfrecords/${SPLIT}.tfrecord-0000[0-1]-of-00100 | tr '\n' ',') -``` - -Please note that computing evaluation measures on a small subset of the data -introduces variance and bias, since some classes of objects won't be seen during -evaluation. In the example above, this leads to 13.2% higher mAP on the first -two shards of the validation set compared to the mAP for the full set ([see mAP -results](#expected-maps)). - -Another way to accelerate inference is to run it in parallel on multiple -TensorFlow devices on possibly multiple machines. The script below uses -[tmux](https://github.com/tmux/tmux/wiki) to run a separate `infer_detections` -process for each GPU on different partition of the input data. - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -NUM_GPUS=4 -NUM_SHARDS=100 - -tmux new-session -d -s "inference" -function tmux_start { tmux new-window -d -n "inference:GPU$1" "${*:2}; exec bash"; } -for gpu_index in $(seq 0 $(($NUM_GPUS-1))); do - start_shard=$(( $gpu_index * $NUM_SHARDS / $NUM_GPUS )) - end_shard=$(( ($gpu_index + 1) * $NUM_SHARDS / $NUM_GPUS - 1)) - TF_RECORD_FILES=$(seq -s, -f "${SPLIT}_tfrecords/${SPLIT}.tfrecord-%05.0f-of-$(printf '%05d' $NUM_SHARDS)" $start_shard $end_shard) - tmux_start ${gpu_index} \ - PYTHONPATH=$PYTHONPATH:$(readlink -f ..) CUDA_VISIBLE_DEVICES=$gpu_index \ - python -m object_detection/inference/infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord-$(printf "%05d" $gpu_index)-of-$(printf "%05d" $NUM_GPUS) \ - --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \ - --discard_image_pixels -done -``` - -After all `infer_detections` processes finish, `tensorflow/models/research/oid` -will contain one output TFRecord from each process, with name matching -`validation_detections.tfrecord-0000[0-3]-of-00004`. - -## Computing evaluation measures {#compute_evaluation_measures} - -To compute evaluation measures on the inferred detections you first need to -create the appropriate configuration files: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test -NUM_SHARDS=1 # Set to NUM_GPUS if using the parallel evaluation script above - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '../object_detection/data/oid_bbox_trainable_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS}' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'open_images_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt -``` - -And then run: - -```bash -# From tensorflow/models/research/oid -SPLIT=validation # or test - -PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \ -python -m object_detection/metrics/offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt -``` - -The first configuration file contains an `object_detection.protos.InputReader` -message that describes the location of the necessary input files. The second -file contains an `object_detection.protos.EvalConfig` message that describes the -evaluation metric. For more information about these protos see the corresponding -source files. - -### Expected mAPs {#expected-maps} - -The result of running `offline_eval_map_corloc` is a CSV file located at -`${SPLIT}_eval_metrics/metrics.csv`. With the above configuration, the file will -contain average precision at IoU≥0.5 for each of the classes present in the -dataset. It will also contain the mAP@IoU≥0.5. Both the per-class average -precisions and the mAP are computed according to the [Open Images evaluation -protocol](evaluation_protocols.md). The expected mAPs for the validation and -test sets of Open Images in this case are: - -Set | Fraction of data | Images | mAP@IoU≥0.5 ----------: | :--------------: | :-----: | ----------- -validation | everything | 41,620 | 39.2% -validation | first 2 shards | 884 | 52.4% -test | everything | 125,436 | 37.7% -test | first 2 shards | 2,476 | 50.8% diff --git a/object_detection/g3doc/preparing_inputs.md b/object_detection/g3doc/preparing_inputs.md deleted file mode 100644 index d9d290d2..00000000 --- a/object_detection/g3doc/preparing_inputs.md +++ /dev/null @@ -1,57 +0,0 @@ -# Preparing Inputs - -Tensorflow Object Detection API reads data using the TFRecord file format. Two -sample scripts (`create_pascal_tf_record.py` and `create_pet_tf_record.py`) are -provided to convert from the PASCAL VOC dataset and Oxford-IIIT Pet dataset to -TFRecords. - -## Generating the PASCAL VOC TFRecord files. - -The raw 2012 PASCAL VOC data set is located -[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). -To download, extract and convert it to TFRecords, run the following commands -below: - -```bash -# From tensorflow/models/research/ -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -tar -xvf VOCtrainval_11-May-2012.tar -python object_detection/dataset_tools/create_pascal_tf_record.py \ - --label_map_path=object_detection/data/pascal_label_map.pbtxt \ - --data_dir=VOCdevkit --year=VOC2012 --set=train \ - --output_path=pascal_train.record -python object_detection/dataset_tools/create_pascal_tf_record.py \ - --label_map_path=object_detection/data/pascal_label_map.pbtxt \ - --data_dir=VOCdevkit --year=VOC2012 --set=val \ - --output_path=pascal_val.record -``` - -You should end up with two TFRecord files named `pascal_train.record` and -`pascal_val.record` in the `tensorflow/models/research/` directory. - -The label map for the PASCAL VOC data set can be found at -`object_detection/data/pascal_label_map.pbtxt`. - -## Generating the Oxford-IIIT Pet TFRecord files. - -The Oxford-IIIT Pet data set is located -[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). To download, extract and -convert it to TFRecrods, run the following commands below: - -```bash -# From tensorflow/models/research/ -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz -tar -xvf annotations.tar.gz -tar -xvf images.tar.gz -python object_detection/dataset_tools/create_pet_tf_record.py \ - --label_map_path=object_detection/data/pet_label_map.pbtxt \ - --data_dir=`pwd` \ - --output_dir=`pwd` -``` - -You should end up with two TFRecord files named `pet_train.record` and -`pet_val.record` in the `tensorflow/models/research/` directory. - -The label map for the Pet dataset can be found at -`object_detection/data/pet_label_map.pbtxt`. diff --git a/object_detection/g3doc/running_locally.md b/object_detection/g3doc/running_locally.md deleted file mode 100644 index b143a9b7..00000000 --- a/object_detection/g3doc/running_locally.md +++ /dev/null @@ -1,81 +0,0 @@ -# Running Locally - -This page walks through the steps required to train an object detection model -on a local machine. It assumes the reader has completed the -following prerequisites: - -1. The Tensorflow Object Detection API has been installed as documented in the -[installation instructions](installation.md). This includes installing library -dependencies, compiling the configuration protobufs and setting up the Python -environment. -2. A valid data set has been created. See [this page](preparing_inputs.md) for -instructions on how to generate a dataset for the PASCAL VOC challenge or the -Oxford-IIIT Pet dataset. -3. A Object Detection pipeline configuration has been written. See -[this page](configuring_jobs.md) for details on how to write a pipeline configuration. - -## Recommended Directory Structure for Training and Evaluation - -``` -+data - -label_map file - -train TFRecord file - -eval TFRecord file -+models - + model - -pipeline config file - +train - +eval -``` - -## Running the Training Job - -A local training job can be run with the following command: - -```bash -# From the tensorflow/models/research/ directory -python object_detection/train.py \ - --logtostderr \ - --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ - --train_dir=${PATH_TO_TRAIN_DIR} -``` - -where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config and -`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints -and events will be written to. By default, the training job will -run indefinitely until the user kills it. - -## Running the Evaluation Job - -Evaluation is run as a separate job. The eval job will periodically poll the -train directory for new checkpoints and evaluate them on a test dataset. The -job can be run using the following command: - -```bash -# From the tensorflow/models/research/ directory -python object_detection/eval.py \ - --logtostderr \ - --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \ - --checkpoint_dir=${PATH_TO_TRAIN_DIR} \ - --eval_dir=${PATH_TO_EVAL_DIR} -``` - -where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config, -`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints -were saved (same as the training job) and `${PATH_TO_EVAL_DIR}` points to the -directory in which evaluation events will be saved. As with the training job, -the eval job run until terminated by default. - -## Running Tensorboard - -Progress for training and eval jobs can be inspected using Tensorboard. If -using the recommended directory structure, Tensorboard can be run using the -following command: - -```bash -tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY} -``` - -where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the -train and eval directories. Please note it may take Tensorboard a couple minutes -to populate with data. diff --git a/object_detection/g3doc/running_notebook.md b/object_detection/g3doc/running_notebook.md deleted file mode 100644 index c2b8ad18..00000000 --- a/object_detection/g3doc/running_notebook.md +++ /dev/null @@ -1,15 +0,0 @@ -# Quick Start: Jupyter notebook for off-the-shelf inference - -If you'd like to hit the ground running and run detection on a few example -images right out of the box, we recommend trying out the Jupyter notebook demo. -To run the Jupyter notebook, run the following command from -`tensorflow/models/research/object_detection`: - -``` -# From tensorflow/models/research/object_detection -jupyter notebook -``` - -The notebook should open in your favorite web browser. Click the -[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link to -open the demo. diff --git a/object_detection/g3doc/running_on_cloud.md b/object_detection/g3doc/running_on_cloud.md deleted file mode 100644 index 3cb2885e..00000000 --- a/object_detection/g3doc/running_on_cloud.md +++ /dev/null @@ -1,128 +0,0 @@ -# Running on Google Cloud Platform - -The Tensorflow Object Detection API supports distributed training on Google -Cloud ML Engine. This section documents instructions on how to train and -evaluate your model using Cloud ML. The reader should complete the following -prerequistes: - -1. The reader has created and configured a project on Google Cloud Platform. -See [the Cloud ML quick start guide](https://cloud.google.com/ml-engine/docs/quickstarts/command-line). -2. The reader has installed the Tensorflow Object Detection API as documented -in the [installation instructions](installation.md). -3. The reader has a valid data set and stored it in a Google Cloud Storage -bucket. See [this page](preparing_inputs.md) for instructions on how to generate -a dataset for the PASCAL VOC challenge or the Oxford-IIIT Pet dataset. -4. The reader has configured a valid Object Detection pipeline, and stored it -in a Google Cloud Storage bucket. See [this page](configuring_jobs.md) for -details on how to write a pipeline configuration. - -Additionally, it is recommended users test their job by running training and -evaluation jobs for a few iterations -[locally on their own machines](running_locally.md). - -## Packaging - -In order to run the Tensorflow Object Detection API on Cloud ML, it must be -packaged (along with it's TF-Slim dependency). The required packages can be -created with the following command - -``` bash -# From tensorflow/models/research/ -python setup.py sdist -(cd slim && python setup.py sdist) -``` - -This will create python packages in dist/object_detection-0.1.tar.gz and -slim/dist/slim-0.1.tar.gz. - -## Running a Multiworker Training Job - -Google Cloud ML requires a YAML configuration file for a multiworker training -job using GPUs. A sample YAML file is given below: - -``` -trainingInput: - runtimeVersion: "1.0" - scaleTier: CUSTOM - masterType: standard_gpu - workerCount: 9 - workerType: standard_gpu - parameterServerCount: 3 - parameterServerType: standard - - -``` - -Please keep the following guidelines in mind when writing the YAML -configuration: - -* A job with n workers will have n + 1 training machines (n workers + 1 master). -* The number of parameters servers used should be an odd number to prevent - a parameter server from storing only weight variables or only bias variables - (due to round robin parameter scheduling). -* The learning rate in the training config should be decreased when using a - larger number of workers. Some experimentation is required to find the - optimal learning rate. - -The YAML file should be saved on the local machine (not on GCP). Once it has -been written, a user can start a training job on Cloud ML Engine using the -following command: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training object_detection_`date +%s` \ - --job-dir=gs://${TRAIN_DIR} \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.train \ - --region us-central1 \ - --config ${PATH_TO_LOCAL_YAML_FILE} \ - -- \ - --train_dir=gs://${TRAIN_DIR} \ - --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH} -``` - -Where `${PATH_TO_LOCAL_YAML_FILE}` is the local path to the YAML configuration, -`gs://${TRAIN_DIR}` specifies the directory on Google Cloud Storage where the -training checkpoints and events will be written to and -`gs://${PIPELINE_CONFIG_PATH}` points to the pipeline configuration stored on -Google Cloud Storage. - -Users can monitor the progress of their training job on the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). - -## Running an Evaluation Job on Cloud - -Evaluation jobs run on a single machine, so it is not necessary to write a YAML -configuration for evaluation. Run the following command to start the evaluation -job: - -``` bash -gcloud ml-engine jobs submit training object_detection_eval_`date +%s` \ - --job-dir=gs://${TRAIN_DIR} \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.eval \ - --region us-central1 \ - --scale-tier BASIC_GPU \ - -- \ - --checkpoint_dir=gs://${TRAIN_DIR} \ - --eval_dir=gs://${EVAL_DIR} \ - --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH} -``` - -Where `gs://${TRAIN_DIR}` points to the directory on Google Cloud Storage where -training checkpoints are saved (same as the training job), `gs://${EVAL_DIR}` -points to where evaluation events will be saved on Google Cloud Storage and -`gs://${PIPELINE_CONFIG_PATH}` points to where the pipeline configuration is -stored on Google Cloud Storage. - -## Running Tensorboard - -You can run Tensorboard locally on your own machine to view progress of your -training and eval jobs on Google Cloud ML. Run the following command to start -Tensorboard: - -``` bash -tensorboard --logdir=gs://${YOUR_CLOUD_BUCKET} -``` - -Note it may Tensorboard a few minutes to populate with results. diff --git a/object_detection/g3doc/running_pets.md b/object_detection/g3doc/running_pets.md deleted file mode 100644 index a82bc521..00000000 --- a/object_detection/g3doc/running_pets.md +++ /dev/null @@ -1,314 +0,0 @@ -# Quick Start: Distributed Training on the Oxford-IIIT Pets Dataset on Google Cloud - -This page is a walkthrough for training an object detector using the Tensorflow -Object Detection API. In this tutorial, we'll be training on the Oxford-IIIT Pets -dataset to build a system to detect various breeds of cats and dogs. The output -of the detector will look like the following: - -![](img/oxford_pet.png) - -## Setting up a Project on Google Cloud - -To accelerate the process, we'll run training and evaluation on [Google Cloud -ML Engine](https://cloud.google.com/ml-engine/) to leverage multiple GPUs. To -begin, you will have to set up Google Cloud via the following steps (if you have -already done this, feel free to skip to the next section): - -1. [Create a GCP project](https://cloud.google.com/resource-manager/docs/creating-managing-projects). -2. [Install the Google Cloud SDK](https://cloud.google.com/sdk/downloads) on -your workstation or laptop. -This will provide the tools you need to upload files to Google Cloud Storage and -start ML training jobs. -3. [Enable the ML Engine -APIs](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component&_ga=1.73374291.1570145678.1496689256). -By default, a new GCP project does not enable APIs to start ML Engine training -jobs. Use the above link to explicitly enable them. -4. [Set up a Google Cloud Storage (GCS) -bucket](https://cloud.google.com/storage/docs/creating-buckets). ML Engine -training jobs can only access files on a Google Cloud Storage bucket. In this -tutorial, we'll be required to upload our dataset and configuration to GCS. - -Please remember the name of your GCS bucket, as we will reference it multiple -times in this document. Substitute `${YOUR_GCS_BUCKET}` with the name of -your bucket in this document. For your convenience, you should define the -environment variable below: - -``` bash -export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET} -``` - -## Installing Tensorflow and the Tensorflow Object Detection API - -Please run through the [installation instructions](installation.md) to install -Tensorflow and all it dependencies. Ensure the Protobuf libraries are -compiled and the library directories are added to `PYTHONPATH`. - -## Getting the Oxford-IIIT Pets Dataset and Uploading it to Google Cloud Storage - -In order to train a detector, we require a dataset of images, bounding boxes and -classifications. For this demo, we'll use the Oxford-IIIT Pets dataset. The raw -dataset for Oxford-IIIT Pets lives -[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). You will need to download -both the image dataset [`images.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz) -and the groundtruth data [`annotations.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz) -to the `tensorflow/models/research/` directory and unzip them. This may take -some time. - -``` bash -# From tensorflow/models/research/ -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz -wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz -tar -xvf images.tar.gz -tar -xvf annotations.tar.gz -``` - -After downloading the tarballs, your `tensorflow/models/research/` directory -should appear as follows: - -```lang-none -- images.tar.gz -- annotations.tar.gz -+ images/ -+ annotations/ -+ object_detection/ -... other files and directories -``` - -The Tensorflow Object Detection API expects data to be in the TFRecord format, -so we'll now run the `create_pet_tf_record` script to convert from the raw -Oxford-IIIT Pet dataset into TFRecords. Run the following commands from the -`tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -python object_detection/dataset_tools/create_pet_tf_record.py \ - --label_map_path=object_detection/data/pet_label_map.pbtxt \ - --data_dir=`pwd` \ - --output_dir=`pwd` -``` - -Note: It is normal to see some warnings when running this script. You may ignore -them. - -Two TFRecord files named `pet_train.record` and `pet_val.record` should be -generated in the `tensorflow/models/research/` directory. - -Now that the data has been generated, we'll need to upload it to Google Cloud -Storage so the data can be accessed by ML Engine. Run the following command to -copy the files into your GCS bucket (substituting `${YOUR_GCS_BUCKET}`): - -``` bash -# From tensorflow/models/research/ -gsutil cp pet_train.record gs://${YOUR_GCS_BUCKET}/data/pet_train.record -gsutil cp pet_val.record gs://${YOUR_GCS_BUCKET}/data/pet_val.record -gsutil cp object_detection/data/pet_label_map.pbtxt gs://${YOUR_GCS_BUCKET}/data/pet_label_map.pbtxt -``` - -Please remember the path where you upload the data to, as we will need this -information when configuring the pipeline in a following step. - -## Downloading a COCO-pretrained Model for Transfer Learning - -Training a state of the art object detector from scratch can take days, even -when using multiple GPUs! In order to speed up training, we'll take an object -detector trained on a different dataset (COCO), and reuse some of it's -parameters to initialize our new model. - -Download our [COCO-pretrained Faster R-CNN with Resnet-101 -model](http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz). -Unzip the contents of the folder and copy the `model.ckpt*` files into your GCS -Bucket. - -``` bash -wget http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz -tar -xvf faster_rcnn_resnet101_coco_11_06_2017.tar.gz -gsutil cp faster_rcnn_resnet101_coco_11_06_2017/model.ckpt.* gs://${YOUR_GCS_BUCKET}/data/ -``` - -Remember the path where you uploaded the model checkpoint to, as we will need it -in the following step. - -## Configuring the Object Detection Pipeline - -In the Tensorflow Object Detection API, the model parameters, training -parameters and eval parameters are all defined by a config file. More details -can be found [here](configuring_jobs.md). For this tutorial, we will use some -predefined templates provided with the source code. In the -`object_detection/samples/configs` folder, there are skeleton object_detection -configuration files. We will use `faster_rcnn_resnet101_pets.config` as a -starting point for configuring the pipeline. Open the file with your favourite -text editor. - -We'll need to configure some paths in order for the template to work. Search the -file for instances of `PATH_TO_BE_CONFIGURED` and replace them with the -appropriate value (typically `gs://${YOUR_GCS_BUCKET}/data/`). Afterwards -upload your edited file onto GCS, making note of the path it was uploaded to -(we'll need it when starting the training/eval jobs). - -``` bash -# From tensorflow/models/research/ - -# Edit the faster_rcnn_resnet101_pets.config template. Please note that there -# are multiple places where PATH_TO_BE_CONFIGURED needs to be set. -sed -i "s|PATH_TO_BE_CONFIGURED|"gs://${YOUR_GCS_BUCKET}"/data|g" \ - object_detection/samples/configs/faster_rcnn_resnet101_pets.config - -# Copy edited template to cloud. -gsutil cp object_detection/samples/configs/faster_rcnn_resnet101_pets.config \ - gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -## Checking Your Google Cloud Storage Bucket - -At this point in the tutorial, you should have uploaded the training/validation -datasets (including label map), our COCO trained FasterRCNN finetune checkpoint and your job -configuration to your Google Cloud Storage Bucket. Your bucket should look like -the following: - -```lang-none -+ ${YOUR_GCS_BUCKET}/ - + data/ - - faster_rcnn_resnet101_pets.config - - model.ckpt.index - - model.ckpt.meta - - model.ckpt.data-00000-of-00001 - - pet_label_map.pbtxt - - pet_train.record - - pet_val.record -``` - -You can inspect your bucket using the [Google Cloud Storage -browser](https://console.cloud.google.com/storage/browser). - -## Starting Training and Evaluation Jobs on Google Cloud ML Engine - -Before we can start a job on Google Cloud ML Engine, we must: - -1. Package the Tensorflow Object Detection code. -2. Write a cluster configuration for our Google Cloud ML job. - -To package the Tensorflow Object Detection code, run the following commands from -the `tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -python setup.py sdist -(cd slim && python setup.py sdist) -``` - -You should see two tar.gz files created at `dist/object_detection-0.1.tar.gz` -and `slim/dist/slim-0.1.tar.gz`. - -For running the training Cloud ML job, we'll configure the cluster to use 10 -training jobs (1 master + 9 workers) and three parameters servers. The -configuration file can be found at `object_detection/samples/cloud/cloud.yml`. - -To start training, execute the following command from the -`tensorflow/models/research/` directory: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training `whoami`_object_detection_`date +%s` \ - --job-dir=gs://${YOUR_GCS_BUCKET}/train \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.train \ - --region us-central1 \ - --config object_detection/samples/cloud/cloud.yml \ - -- \ - --train_dir=gs://${YOUR_GCS_BUCKET}/train \ - --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -Once training has started, we can run an evaluation concurrently: - -``` bash -# From tensorflow/models/research/ -gcloud ml-engine jobs submit training `whoami`_object_detection_eval_`date +%s` \ - --job-dir=gs://${YOUR_GCS_BUCKET}/train \ - --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \ - --module-name object_detection.eval \ - --region us-central1 \ - --scale-tier BASIC_GPU \ - -- \ - --checkpoint_dir=gs://${YOUR_GCS_BUCKET}/train \ - --eval_dir=gs://${YOUR_GCS_BUCKET}/eval \ - --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config -``` - -Note: Even though we're running an evaluation job, the `gcloud ml-engine jobs -submit training` command is correct. ML Engine does not distinguish between -training and evaluation jobs. - -Users can monitor and stop training and evaluation jobs on the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). - -## Monitoring Progress with Tensorboard - -You can monitor progress of the training and eval jobs by running Tensorboard on -your local machine: - -``` bash -# This command needs to be run once to allow your local machine to access your -# GCS bucket. -gcloud auth application-default login - -tensorboard --logdir=gs://${YOUR_GCS_BUCKET} -``` - -Once Tensorboard is running, navigate to `localhost:6006` from your favourite -web browser. You should see something similar to the following: - -![](img/tensorboard.png) - -You will also want to click on the images tab to see example detections made by -the model while it trains. After about an hour and a half of training, you can -expect to see something like this: - -![](img/tensorboard2.png) - -Note: It takes roughly 10 minutes for a job to get started on ML Engine, and -roughly an hour for the system to evaluate the validation dataset. It may take -some time to populate the dashboards. If you do not see any entries after half -an hour, check the logs from the [ML Engine -Dashboard](https://console.cloud.google.com/mlengine/jobs). Note that by default -the training jobs are configured to go for much longer than is necessary for -convergence. To save money, we recommend killing your jobs once you've seen -that they've converged. - -## Exporting the Tensorflow Graph - -After your model has been trained, you should export it to a Tensorflow -graph proto. First, you need to identify a candidate checkpoint to export. You -can search your bucket using the [Google Cloud Storage -Browser](https://console.cloud.google.com/storage/browser). The file should be -stored under `${YOUR_GCS_BUCKET}/train`. The checkpoint will typically consist of -three files: - -* `model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001` -* `model.ckpt-${CHECKPOINT_NUMBER}.index` -* `model.ckpt-${CHECKPOINT_NUMBER}.meta` - -After you've identified a candidate checkpoint to export, run the following -command from `tensorflow/models/research/`: - -``` bash -# From tensorflow/models/research/ -gsutil cp gs://${YOUR_GCS_BUCKET}/train/model.ckpt-${CHECKPOINT_NUMBER}.* . -python object_detection/export_inference_graph.py \ - --input_type image_tensor \ - --pipeline_config_path object_detection/samples/configs/faster_rcnn_resnet101_pets.config \ - --trained_checkpoint_prefix model.ckpt-${CHECKPOINT_NUMBER} \ - --output_directory exported_graphs -``` - -Afterwards, you should see a directory named `exported_graphs` containing the -SavedModel and frozen graph. - -## What's Next - -Congratulations, you have now trained an object detector for various cats and -dogs! There different things you can do now: - -1. [Test your exported model using the provided Jupyter notebook.](running_notebook.md) -2. [Experiment with different model configurations.](configuring_jobs.md) -3. Train an object detector using your own data. diff --git a/object_detection/g3doc/using_your_own_dataset.md b/object_detection/g3doc/using_your_own_dataset.md deleted file mode 100644 index c403930e..00000000 --- a/object_detection/g3doc/using_your_own_dataset.md +++ /dev/null @@ -1,157 +0,0 @@ -# Preparing Inputs - -To use your own dataset in Tensorflow Object Detection API, you must convert it -into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details). -This document outlines how to write a script to generate the TFRecord file. - -## Label Maps - -Each dataset is required to have a label map associated with it. This label map -defines a mapping from string class names to integer class Ids. The label map -should be a `StringIntLabelMap` text protobuf. Sample label maps can be found in -object_detection/data. Label maps should always start from id 1. - -## Dataset Requirements - -For every example in your dataset, you should have the following information: - -1. An RGB image for the dataset encoded as jpeg or png. -2. A list of bounding boxes for the image. Each bounding box should contain: - 1. A bounding box coordinates (with origin in top left corner) defined by 4 - floating point numbers [ymin, xmin, ymax, xmax]. Note that we store the - _normalized_ coordinates (x / width, y / height) in the TFRecord dataset. - 2. The class of the object in the bounding box. - -# Example Image - -Consider the following image: - -![Example Image](img/example_cat.jpg "Example Image") - -with the following label map: - -``` -item { - id: 1 - name: 'Cat' -} - - -item { - id: 2 - name: 'Dog' -} -``` - -We can generate a tf.Example proto for this image using the following code: - -```python - -def create_cat_tf_example(encoded_cat_image_data): - """Creates a tf.Example proto from sample cat image. - - Args: - encoded_cat_image_data: The jpg encoded data of the cat image. - - Returns: - example: The created tf.Example. - """ - - height = 1032.0 - width = 1200.0 - filename = 'example_cat.jpg' - image_format = b'jpg' - - xmins = [322.0 / 1200.0] - xmaxs = [1062.0 / 1200.0] - ymins = [174.0 / 1032.0] - ymaxs = [761.0 / 1032.0] - classes_text = ['Cat'] - classes = [1] - - tf_example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(filename), - 'image/source_id': dataset_util.bytes_feature(filename), - 'image/encoded': dataset_util.bytes_feature(encoded_image_data), - 'image/format': dataset_util.bytes_feature(image_format), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - })) - return tf_example -``` - -## Conversion Script Outline - -A typical conversion script will look like the following: - -```python - -import tensorflow as tf - -from object_detection.utils import dataset_util - - -flags = tf.app.flags -flags.DEFINE_string('output_path', '', 'Path to output TFRecord') -FLAGS = flags.FLAGS - - -def create_tf_example(example): - # TODO(user): Populate the following variables from your example. - height = None # Image height - width = None # Image width - filename = None # Filename of the image. Empty if image is not from file - encoded_image_data = None # Encoded image bytes - image_format = None # b'jpeg' or b'png' - - xmins = [] # List of normalized left x coordinates in bounding box (1 per box) - xmaxs = [] # List of normalized right x coordinates in bounding box - # (1 per box) - ymins = [] # List of normalized top y coordinates in bounding box (1 per box) - ymaxs = [] # List of normalized bottom y coordinates in bounding box - # (1 per box) - classes_text = [] # List of string class name of bounding box (1 per box) - classes = [] # List of integer class id of bounding box (1 per box) - - tf_example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(height), - 'image/width': dataset_util.int64_feature(width), - 'image/filename': dataset_util.bytes_feature(filename), - 'image/source_id': dataset_util.bytes_feature(filename), - 'image/encoded': dataset_util.bytes_feature(encoded_image_data), - 'image/format': dataset_util.bytes_feature(image_format), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), - 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), - 'image/object/class/label': dataset_util.int64_list_feature(classes), - })) - return tf_example - - -def main(_): - writer = tf.python_io.TFRecordWriter(FLAGS.output_path) - - # TODO(user): Write code to read in your dataset to examples variable - - for example in examples: - tf_example = create_tf_example(example) - writer.write(tf_example.SerializeToString()) - - writer.close() - - -if __name__ == '__main__': - tf.app.run() - -``` - -Note: You may notice additional fields in some other datasets. They are -currently unused by the API and are optional. diff --git a/object_detection/inference.py b/object_detection/inference.py deleted file mode 100644 index 7af17191..00000000 --- a/object_detection/inference.py +++ /dev/null @@ -1,209 +0,0 @@ -import numpy as np -import os -import six.moves.urllib as urllib -import sys -import tarfile -import tensorflow as tf -import zipfile -import cv2 -import glob -import time -import argparse -from multiprocessing import Process, Queue, Event - -from collections import defaultdict -from io import StringIO -from matplotlib import pyplot as plt -from PIL import Image -from utils import label_map_util -from utils import visualization_utils as vis_util - -def load_image_into_numpy_array(image): - (im_width, im_height) = image.size - return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) - - -def load_details(args): - - PATH_TO_CKPT = args.frozen_graph - PATH_TO_LABELS = args.label_map - NUM_CLASSES = args.num_output_classes - PATH_TO_TEST_IMAGES_DIR = args.input_dir - PATH_TO_RESULT_IMAGES_DIR = args.output_dir - - if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) - - TEST_IMAGE_PATHS = sorted(glob.glob(os.path.join(PATH_TO_TEST_IMAGES_DIR, '*.jpg'))) - JPG_PATHS = [ os.path.basename(path) for path in TEST_IMAGE_PATHS ] - RESULT_IMAGE_PATHS = [ os.path.join(PATH_TO_RESULT_IMAGES_DIR, jpg_path) for jpg_path in JPG_PATHS ] - - label_map = label_map_util.load_labelmap(PATH_TO_LABELS) - categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) - category_index = label_map_util.create_category_index(categories) - - return TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, category_index - - -def feed(queue, args): - - """ - Queue that reads images from disk. - All GPU worker processes poll from this queue for input. - """ - - TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, _ = load_details(args) - key = 0 - for image_path, result_path in zip(TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS): - key+=1 - image_np = cv2.imread(image_path, 1) - image_np_expanded = np.expand_dims(image_np, axis=0) - queue.put((image_np, image_np_expanded, result_path, key)) - - -def infer(args, feed_queue, stitch_queue, completed, gpu_id): - - """ - Binds a process to a GPU and uses it for inference - """ - - config = tf.ConfigProto(allow_soft_placement = True) - config.gpu_options.allow_growth = False - config.gpu_options.per_process_gpu_memory_fraction = 0.75 / args.n_jobs - - # Scaling 0.75 down by args.n_jobs is required because total GPU memory is sum of - # memory of all available GPUs. Since, we need each GPU to use 75% of a single GPU - # memory, we have to multiple total memory by (0.75/args.n_jobs) - - detection_graph = tf.Graph() - with detection_graph.device('/gpu:' + str(gpu_id)): - with detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(args.frozen_graph, 'rb') as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - - with tf.Session(graph=detection_graph, config=config) as sess: - - # Fetching tensors from the graph - image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') - detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') - detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') - detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') - num_detections = detection_graph.get_tensor_by_name('num_detections:0') - - while True: - if not feed_queue.empty(): - begin = time.time() - image_np, image_np_expanded, result_path, key = feed_queue.get() - - (boxes, scores, classes, num) = sess.run( - [detection_boxes, detection_scores, detection_classes, num_detections], - feed_dict={image_tensor: image_np_expanded}) - - FPS2 = 1/(time.time() - begin) - stitch_queue.put((boxes, scores, classes, num, image_np, result_path, FPS2, key)) - - if completed.is_set(): - break - - print('Done') - - -def stitch(queue, completed, args): - - """ - Stitches frames inorder - """ - - TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, category_index = load_details(args) - SQ = lambda x: np.squeeze(x) - total_frames = len(RESULT_IMAGE_PATHS) - first_frame = time.time() - process_buffer = {} - current_frame = 1 - - print('Processing...') - while True: - if not queue.empty(): - boxes, scores, classes, count, image_np, result_path, FPS2, key = queue.get() - process_buffer[key] = (boxes, scores, classes, count, image_np, result_path, FPS2) - - # Keeps polling for the next frame - current_objects = process_buffer.pop(current_frame, None) - - if current_objects is not None: - - begin = time.time() - (boxes, scores, classes, count, image_np, result_path, FPS2) = current_objects - boxes, classes, scores = SQ(boxes), SQ(classes).astype(np.int32), SQ(scores) - - vis_util.visualize_boxes_and_labels_on_image_array( - image_np, - boxes, - classes, - scores, - category_index, - use_normalized_coordinates=True, - line_thickness=8) - - cv2.imwrite(result_path, image_np) - - FPS = 1 / (time.time() - begin) - log = 'Images Processed: %d Count: %d Process+Stitch_FPS: %.2f Process_FPS: %.2f ' % (key, count, FPS, FPS2) - - with open(os.path.join('logs' + str(args.n_jobs) + '.txt'), 'w') as file: - file.write(log + '\n') - if key == total_frames-1: - file.write("Time Taken -> %.2f \n" % (time.time() - first_frame)) - - if key == total_frames-1: - print("Time Taken -> ", time.time() - first_frame) - - current_frame += 1 - - if current_frame == total_frames: - completed.set() - break - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - - parser.add_argument("--input_dir", help = "Path of the input images directory") - parser.add_argument("--frozen_graph", help = "Path of the frozen graph model") - parser.add_argument("--label_map", help = "Path of the label map file") - parser.add_argument("--output_dir", help = "Path of the output directory") - parser.add_argument("--num_output_classes", help="Defines the number of output classes", type=int) - parser.add_argument("--n_jobs", help="Number of GPU jobs in parallel", type=int) - parser.add_argument("--delay", help="Delay for queue in seconds", type=int, default=0) - - args = parser.parse_args() - - # Initializing queues and events - stitch_queue = Queue() - feed_queue = Queue() - completed = Event() - - gpu_workers = [] - - # Creating processes for GPU inference, loading data and stitching data - for gpu_id in range(args.n_jobs): - gpu_workers.append(Process(target=infer, args=(args, feed_queue, stitch_queue, completed, gpu_id))) - stitch_cpu = Process(target=stitch, args=(stitch_queue, completed, args)) - feed_cpu = Process(target=feed, args=(feed_queue, args)) - - # Optional delay to give imread a head start - feed_cpu.start() - time.sleep(args.delay) - - stitch_cpu.start() - for gpu in gpu_workers: - gpu.start() - - feed_cpu.join() - stitch_cpu.join() - for gpu in gpu_workers: - gpu.join() diff --git a/object_detection/inference/BUILD b/object_detection/inference/BUILD deleted file mode 100644 index c36df0d0..00000000 --- a/object_detection/inference/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -# Tensorflow Object Detection API: main runnables. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_library( - name = "detection_inference", - srcs = ["detection_inference.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_test( - name = "detection_inference_test", - srcs = ["detection_inference_test.py"], - deps = [ - ":detection_inference", - "//third_party/py/PIL:pil", - "//third_party/py/numpy", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:dataset_util", - ], -) - -py_binary( - name = "infer_detections", - srcs = ["infer_detections.py"], - deps = [ - ":detection_inference", - "//tensorflow", - ], -) diff --git a/object_detection/inference/detection_inference.py b/object_detection/inference/detection_inference.py deleted file mode 100644 index dc66686f..00000000 --- a/object_detection/inference/detection_inference.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions for detection inference.""" -from __future__ import division - -import tensorflow as tf - -from object_detection.core import standard_fields - - -def build_input(tfrecord_paths): - """Builds the graph's input. - - Args: - tfrecord_paths: List of paths to the input TFRecords - - Returns: - serialized_example_tensor: The next serialized example. String scalar Tensor - image_tensor: The decoded image of the example. Uint8 tensor, - shape=[1, None, None,3] - """ - filename_queue = tf.train.string_input_producer( - tfrecord_paths, shuffle=False, num_epochs=1) - - tf_record_reader = tf.TFRecordReader() - _, serialized_example_tensor = tf_record_reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example_tensor, - features={ - standard_fields.TfExampleFields.image_encoded: - tf.FixedLenFeature([], tf.string), - }) - encoded_image = features[standard_fields.TfExampleFields.image_encoded] - image_tensor = tf.image.decode_image(encoded_image, channels=3) - image_tensor.set_shape([None, None, 3]) - image_tensor = tf.expand_dims(image_tensor, 0) - - return serialized_example_tensor, image_tensor - - -def build_inference_graph(image_tensor, inference_graph_path): - """Loads the inference graph and connects it to the input image. - - Args: - image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3] - inference_graph_path: Path to the inference graph with embedded weights - - Returns: - detected_boxes_tensor: Detected boxes. Float tensor, - shape=[num_detections, 4] - detected_scores_tensor: Detected scores. Float tensor, - shape=[num_detections] - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - """ - with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: - graph_content = graph_def_file.read() - graph_def = tf.GraphDef() - graph_def.MergeFromString(graph_content) - - tf.import_graph_def( - graph_def, name='', input_map={'image_tensor': image_tensor}) - - g = tf.get_default_graph() - - num_detections_tensor = tf.squeeze( - g.get_tensor_by_name('num_detections:0'), 0) - num_detections_tensor = tf.cast(num_detections_tensor, tf.int32) - - detected_boxes_tensor = tf.squeeze( - g.get_tensor_by_name('detection_boxes:0'), 0) - detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor] - - detected_scores_tensor = tf.squeeze( - g.get_tensor_by_name('detection_scores:0'), 0) - detected_scores_tensor = detected_scores_tensor[:num_detections_tensor] - - detected_labels_tensor = tf.squeeze( - g.get_tensor_by_name('detection_classes:0'), 0) - detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64) - detected_labels_tensor = detected_labels_tensor[:num_detections_tensor] - - return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor - - -def infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor, discard_image_pixels): - """Runs the supplied tensors and adds the inferred detections to the example. - - Args: - serialized_example_tensor: Serialized TF example. Scalar string tensor - detected_boxes_tensor: Detected boxes. Float tensor, - shape=[num_detections, 4] - detected_scores_tensor: Detected scores. Float tensor, - shape=[num_detections] - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - discard_image_pixels: If true, discards the image from the result - Returns: - The de-serialized TF example augmented with the inferred detections. - """ - tf_example = tf.train.Example() - (serialized_example, detected_boxes, detected_scores, - detected_classes) = tf.get_default_session().run([ - serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor - ]) - detected_boxes = detected_boxes.T - - tf_example.ParseFromString(serialized_example) - feature = tf_example.features.feature - feature[standard_fields.TfExampleFields. - detection_score].float_list.value[:] = detected_scores - feature[standard_fields.TfExampleFields. - detection_bbox_ymin].float_list.value[:] = detected_boxes[0] - feature[standard_fields.TfExampleFields. - detection_bbox_xmin].float_list.value[:] = detected_boxes[1] - feature[standard_fields.TfExampleFields. - detection_bbox_ymax].float_list.value[:] = detected_boxes[2] - feature[standard_fields.TfExampleFields. - detection_bbox_xmax].float_list.value[:] = detected_boxes[3] - feature[standard_fields.TfExampleFields. - detection_class_label].int64_list.value[:] = detected_classes - - if discard_image_pixels: - del feature[standard_fields.TfExampleFields.image_encoded] - - return tf_example diff --git a/object_detection/inference/detection_inference_test.py b/object_detection/inference/detection_inference_test.py deleted file mode 100644 index eabb6b47..00000000 --- a/object_detection/inference/detection_inference_test.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Tests for detection_inference.py.""" - -import os -import StringIO - -import numpy as np -from PIL import Image -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.inference import detection_inference -from object_detection.utils import dataset_util - - -def get_mock_tfrecord_path(): - return os.path.join(tf.test.get_temp_dir(), 'mock.tfrec') - - -def create_mock_tfrecord(): - pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB') - image_output_stream = StringIO.StringIO() - pil_image.save(image_output_stream, format='png') - encoded_image = image_output_stream.getvalue() - - feature_map = { - 'test_field': - dataset_util.float_list_feature([1, 2, 3, 4]), - standard_fields.TfExampleFields.image_encoded: - dataset_util.bytes_feature(encoded_image), - } - - tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map)) - with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer: - writer.write(tf_example.SerializeToString()) - - -def get_mock_graph_path(): - return os.path.join(tf.test.get_temp_dir(), 'mock_graph.pb') - - -def create_mock_graph(): - g = tf.Graph() - with g.as_default(): - in_image_tensor = tf.placeholder( - tf.uint8, shape=[1, None, None, 3], name='image_tensor') - tf.constant([2.0], name='num_detections') - tf.constant( - [[[0, 0.8, 0.7, 1], [0.1, 0.2, 0.8, 0.9], [0.2, 0.3, 0.4, 0.5]]], - name='detection_boxes') - tf.constant([[0.1, 0.2, 0.3]], name='detection_scores') - tf.identity( - tf.constant([[1.0, 2.0, 3.0]]) * - tf.reduce_sum(tf.cast(in_image_tensor, dtype=tf.float32)), - name='detection_classes') - graph_def = g.as_graph_def() - - with tf.gfile.Open(get_mock_graph_path(), 'w') as fl: - fl.write(graph_def.SerializeToString()) - - -class InferDetectionsTests(tf.test.TestCase): - - def test_simple(self): - create_mock_graph() - create_mock_tfrecord() - - serialized_example_tensor, image_tensor = detection_inference.build_input( - [get_mock_tfrecord_path()]) - self.assertAllEqual(image_tensor.get_shape().as_list(), [1, None, None, 3]) - - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, get_mock_graph_path()) - - with self.test_session(use_gpu=False) as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, False) - - self.assertProtoEquals(r""" - features { - feature { - key: "image/detection/bbox/ymin" - value { float_list { value: [0.0, 0.1] } } } - feature { - key: "image/detection/bbox/xmin" - value { float_list { value: [0.8, 0.2] } } } - feature { - key: "image/detection/bbox/ymax" - value { float_list { value: [0.7, 0.8] } } } - feature { - key: "image/detection/bbox/xmax" - value { float_list { value: [1.0, 0.9] } } } - feature { - key: "image/detection/label" - value { int64_list { value: [123, 246] } } } - feature { - key: "image/detection/score" - value { float_list { value: [0.1, 0.2] } } } - feature { - key: "image/encoded" - value { bytes_list { value: - "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\001\000\000" - "\000\001\010\002\000\000\000\220wS\336\000\000\000\022IDATx" - "\234b\250f`\000\000\000\000\377\377\003\000\001u\000|gO\242" - "\213\000\000\000\000IEND\256B`\202" } } } - feature { - key: "test_field" - value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } } - """, tf_example) - - def test_discard_image(self): - create_mock_graph() - create_mock_tfrecord() - - serialized_example_tensor, image_tensor = detection_inference.build_input( - [get_mock_tfrecord_path()]) - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, get_mock_graph_path()) - - with self.test_session(use_gpu=False) as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, True) - - self.assertProtoEquals(r""" - features { - feature { - key: "image/detection/bbox/ymin" - value { float_list { value: [0.0, 0.1] } } } - feature { - key: "image/detection/bbox/xmin" - value { float_list { value: [0.8, 0.2] } } } - feature { - key: "image/detection/bbox/ymax" - value { float_list { value: [0.7, 0.8] } } } - feature { - key: "image/detection/bbox/xmax" - value { float_list { value: [1.0, 0.9] } } } - feature { - key: "image/detection/label" - value { int64_list { value: [123, 246] } } } - feature { - key: "image/detection/score" - value { float_list { value: [0.1, 0.2] } } } - feature { - key: "test_field" - value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } } - """, tf_example) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/inference/infer_detections.py b/object_detection/inference/infer_detections.py deleted file mode 100644 index a251009e..00000000 --- a/object_detection/inference/infer_detections.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Infers detections on a TFRecord of TFExamples given an inference graph. - -Example usage: - ./infer_detections \ - --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \ - --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \ - --inference_graph=/path/to/frozen_weights_inference_graph.pb - -The output is a TFRecord of TFExamples. Each TFExample from the input is first -augmented with detections from the inference graph and then copied to the -output. - -The input and output nodes of the inference graph are expected to have the same -types, shapes, and semantics, as the input and output nodes of graphs produced -by export_inference_graph.py, when run with --input_type=image_tensor. - -The script can also discard the image pixels in the output. This greatly -reduces the output size and can potentially accelerate reading data in -subsequent processing steps that don't require the images (e.g. computing -metrics). -""" - -import itertools -import tensorflow as tf -from object_detection.inference import detection_inference - -tf.flags.DEFINE_string('input_tfrecord_paths', None, - 'A comma separated list of paths to input TFRecords.') -tf.flags.DEFINE_string('output_tfrecord_path', None, - 'Path to the output TFRecord.') -tf.flags.DEFINE_string('inference_graph', None, - 'Path to the inference graph with embedded weights.') -tf.flags.DEFINE_boolean('discard_image_pixels', False, - 'Discards the images in the output TFExamples. This' - ' significantly reduces the output size and is useful' - ' if the subsequent tools don\'t need access to the' - ' images (e.g. when computing evaluation measures).') - -FLAGS = tf.flags.FLAGS - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - required_flags = ['input_tfrecord_paths', 'output_tfrecord_path', - 'inference_graph'] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - with tf.Session() as sess: - input_tfrecord_paths = [ - v for v in FLAGS.input_tfrecord_paths.split(',') if v] - tf.logging.info('Reading input from %d files', len(input_tfrecord_paths)) - serialized_example_tensor, image_tensor = detection_inference.build_input( - input_tfrecord_paths) - tf.logging.info('Reading graph and building model...') - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, FLAGS.inference_graph) - - tf.logging.info('Running inference and writing output to {}'.format( - FLAGS.output_tfrecord_path)) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - with tf.python_io.TFRecordWriter( - FLAGS.output_tfrecord_path) as tf_record_writer: - try: - for counter in itertools.count(): - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10, - counter) - tf_example = detection_inference.infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, - FLAGS.discard_image_pixels) - tf_record_writer.write(tf_example.SerializeToString()) - except tf.errors.OutOfRangeError: - tf.logging.info('Finished processing records') - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/matchers/BUILD b/object_detection/matchers/BUILD deleted file mode 100644 index 1bc5992f..00000000 --- a/object_detection/matchers/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -# Tensorflow Object Detection API: Matcher implementations. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 -py_library( - name = "argmax_matcher", - srcs = [ - "argmax_matcher.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:matcher", - ], -) - -py_test( - name = "argmax_matcher_test", - srcs = ["argmax_matcher_test.py"], - deps = [ - ":argmax_matcher", - "//tensorflow", - ], -) - -py_library( - name = "bipartite_matcher", - srcs = [ - "bipartite_matcher.py", - ], - deps = [ - "//tensorflow", - "//tensorflow/contrib/image:image_py", - "//tensorflow_models/object_detection/core:matcher", - ], -) - -py_test( - name = "bipartite_matcher_test", - srcs = [ - "bipartite_matcher_test.py", - ], - deps = [ - ":bipartite_matcher", - "//tensorflow", - ], -) diff --git a/object_detection/matchers/__init__.py b/object_detection/matchers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/matchers/__pycache__/__init__.cpython-35.pyc b/object_detection/matchers/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 949e5d44..00000000 Binary files a/object_detection/matchers/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc b/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc deleted file mode 100644 index 405b0cad..00000000 Binary files a/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc and /dev/null differ diff --git a/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc b/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc deleted file mode 100644 index 7371d354..00000000 Binary files a/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc and /dev/null differ diff --git a/object_detection/matchers/argmax_matcher.py b/object_detection/matchers/argmax_matcher.py deleted file mode 100644 index 97d85185..00000000 --- a/object_detection/matchers/argmax_matcher.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Argmax matcher implementation. - -This class takes a similarity matrix and matches columns to rows based on the -maximum value per column. One can specify matched_thresholds and -to prevent columns from matching to rows (generally resulting in a negative -training example) and unmatched_theshold to ignore the match (generally -resulting in neither a positive or negative training example). - -This matcher is used in Fast(er)-RCNN. - -Note: matchers are used in TargetAssigners. There is a create_target_assigner -factory function for popular implementations. -""" - -import tensorflow as tf - -from object_detection.core import matcher - - -class ArgMaxMatcher(matcher.Matcher): - """Matcher based on highest value. - - This class computes matches from a similarity matrix. Each column is matched - to a single row. - - To support object detection target assignment this class enables setting both - matched_threshold (upper threshold) and unmatched_threshold (lower thresholds) - defining three categories of similarity which define whether examples are - positive, negative, or ignored: - (1) similarity >= matched_threshold: Highest similarity. Matched/Positive! - (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity. - Depending on negatives_lower_than_unmatched, this is either - Unmatched/Negative OR Ignore. - (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag - negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore. - For ignored matches this class sets the values in the Match object to -2. - """ - - def __init__(self, - matched_threshold, - unmatched_threshold=None, - negatives_lower_than_unmatched=True, - force_match_for_each_row=False): - """Construct ArgMaxMatcher. - - Args: - matched_threshold: Threshold for positive matches. Positive if - sim >= matched_threshold, where sim is the maximum value of the - similarity matrix for a given column. Set to None for no threshold. - unmatched_threshold: Threshold for negative matches. Negative if - sim < unmatched_threshold. Defaults to matched_threshold - when set to None. - negatives_lower_than_unmatched: Boolean which defaults to True. If True - then negative matches are the ones below the unmatched_threshold, - whereas ignored matches are in between the matched and umatched - threshold. If False, then negative matches are in between the matched - and unmatched threshold, and everything lower than unmatched is ignored. - force_match_for_each_row: If True, ensures that each row is matched to - at least one column (which is not guaranteed otherwise if the - matched_threshold is high). Defaults to False. See - argmax_matcher_test.testMatcherForceMatch() for an example. - - Raises: - ValueError: if unmatched_threshold is set but matched_threshold is not set - or if unmatched_threshold > matched_threshold. - """ - if (matched_threshold is None) and (unmatched_threshold is not None): - raise ValueError('Need to also define matched_threshold when' - 'unmatched_threshold is defined') - self._matched_threshold = matched_threshold - if unmatched_threshold is None: - self._unmatched_threshold = matched_threshold - else: - if unmatched_threshold > matched_threshold: - raise ValueError('unmatched_threshold needs to be smaller or equal' - 'to matched_threshold') - self._unmatched_threshold = unmatched_threshold - if not negatives_lower_than_unmatched: - if self._unmatched_threshold == self._matched_threshold: - raise ValueError('When negatives are in between matched and ' - 'unmatched thresholds, these cannot be of equal ' - 'value. matched: %s, unmatched: %s', - self._matched_threshold, self._unmatched_threshold) - self._force_match_for_each_row = force_match_for_each_row - self._negatives_lower_than_unmatched = negatives_lower_than_unmatched - - def _match(self, similarity_matrix): - """Tries to match each column of the similarity matrix to a row. - - Args: - similarity_matrix: tensor of shape [N, M] representing any similarity - metric. - - Returns: - Match object with corresponding matches for each of M columns. - """ - - def _match_when_rows_are_empty(): - """Performs matching when the rows of similarity matrix are empty. - - When the rows are empty, all detections are false positives. So we return - a tensor of -1's to indicate that the columns do not match to any rows. - - Returns: - matches: int32 tensor indicating the row each column matches to. - """ - return -1 * tf.ones([tf.shape(similarity_matrix)[1]], dtype=tf.int32) - - def _match_when_rows_are_non_empty(): - """Performs matching when the rows of similarity matrix are non empty. - - Returns: - matches: int32 tensor indicating the row each column matches to. - """ - # Matches for each column - matches = tf.argmax(similarity_matrix, 0) - - # Deal with matched and unmatched threshold - if self._matched_threshold is not None: - # Get logical indices of ignored and unmatched columns as tf.int64 - matched_vals = tf.reduce_max(similarity_matrix, 0) - below_unmatched_threshold = tf.greater(self._unmatched_threshold, - matched_vals) - between_thresholds = tf.logical_and( - tf.greater_equal(matched_vals, self._unmatched_threshold), - tf.greater(self._matched_threshold, matched_vals)) - - if self._negatives_lower_than_unmatched: - matches = self._set_values_using_indicator(matches, - below_unmatched_threshold, - -1) - matches = self._set_values_using_indicator(matches, - between_thresholds, - -2) - else: - matches = self._set_values_using_indicator(matches, - below_unmatched_threshold, - -2) - matches = self._set_values_using_indicator(matches, - between_thresholds, - -1) - - if self._force_match_for_each_row: - forced_matches_ids = tf.cast(tf.argmax(similarity_matrix, 1), tf.int32) - - # Set matches[forced_matches_ids] = [0, ..., R], R is number of rows. - row_range = tf.range(tf.shape(similarity_matrix)[0]) - col_range = tf.range(tf.shape(similarity_matrix)[1]) - forced_matches_values = tf.cast(row_range, matches.dtype) - keep_matches_ids, _ = tf.setdiff1d(col_range, forced_matches_ids) - keep_matches_values = tf.gather(matches, keep_matches_ids) - matches = tf.dynamic_stitch( - [forced_matches_ids, - keep_matches_ids], [forced_matches_values, keep_matches_values]) - - return tf.cast(matches, tf.int32) - - return tf.cond( - tf.greater(tf.shape(similarity_matrix)[0], 0), - _match_when_rows_are_non_empty, _match_when_rows_are_empty) - - def _set_values_using_indicator(self, x, indicator, val): - """Set the indicated fields of x to val. - - Args: - x: tensor. - indicator: boolean with same shape as x. - val: scalar with value to set. - - Returns: - modified tensor. - """ - indicator = tf.cast(indicator, x.dtype) - return tf.add(tf.multiply(x, 1 - indicator), val * indicator) diff --git a/object_detection/matchers/argmax_matcher_test.py b/object_detection/matchers/argmax_matcher_test.py deleted file mode 100644 index 36740f4b..00000000 --- a/object_detection/matchers/argmax_matcher_test.py +++ /dev/null @@ -1,237 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.matchers.argmax_matcher.""" - -import numpy as np -import tensorflow as tf - -from object_detection.matchers import argmax_matcher - - -class ArgMaxMatcherTest(tf.test.TestCase): - - def test_return_correct_matches_with_default_thresholds(self): - similarity = np.array([[1., 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]]) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None) - expected_matched_rows = np.array([2, 0, 1, 0, 1]) - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, np.arange(similarity.shape[1])) - self.assertEmpty(res_unmatched_cols) - - def test_return_correct_matches_with_empty_rows(self): - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None) - sim = 0.2*tf.ones([0, 5]) - match = matcher.match(sim) - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_unmatched_cols = sess.run(unmatched_cols) - self.assertAllEqual(res_unmatched_cols, np.arange(5)) - - def test_return_correct_matches_with_matched_threshold(self): - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.int32) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([1, 2]) - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - init_op = tf.global_variables_initializer() - - with self.test_session() as sess: - sess.run(init_op) - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, expected_matched_cols) - self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols) - - def test_return_correct_matches_with_matched_and_unmatched_threshold(self): - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.int32) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3, - unmatched_threshold=2) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([1]) # col 2 has too high maximum val - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, expected_matched_cols) - self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols) - - def test_return_correct_matches_negatives_lower_than_unmatched_false(self): - similarity = np.array([[1, 1, 1, 3, 1], - [2, -1, 2, 0, 4], - [3, 0, -1, 0, 0]], dtype=np.int32) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3, - unmatched_threshold=2, - negatives_lower_than_unmatched=False) - expected_matched_cols = np.array([0, 3, 4]) - expected_matched_rows = np.array([2, 0, 1]) - expected_unmatched_cols = np.array([2]) # col 1 has too low maximum val - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, expected_matched_cols) - self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols) - - def test_return_correct_matches_unmatched_row_not_using_force_match(self): - similarity = np.array([[1, 1, 1, 3, 1], - [-1, 0, -2, -2, -1], - [3, 0, -1, 2, 0]], dtype=np.int32) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3, - unmatched_threshold=2) - expected_matched_cols = np.array([0, 3]) - expected_matched_rows = np.array([2, 0]) - expected_unmatched_cols = np.array([1, 2, 4]) - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, expected_matched_cols) - self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols) - - def test_return_correct_matches_unmatched_row_while_using_force_match(self): - similarity = np.array([[1, 1, 1, 3, 1], - [-1, 0, -2, -2, -1], - [3, 0, -1, 2, 0]], dtype=np.int32) - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3, - unmatched_threshold=2, - force_match_for_each_row=True) - expected_matched_cols = np.array([0, 1, 3]) - expected_matched_rows = np.array([2, 1, 0]) - expected_unmatched_cols = np.array([2, 4]) # col 2 has too high max val - - sim = tf.constant(similarity) - match = matcher.match(sim) - matched_cols = match.matched_column_indices() - matched_rows = match.matched_row_indices() - unmatched_cols = match.unmatched_column_indices() - - with self.test_session() as sess: - res_matched_cols = sess.run(matched_cols) - res_matched_rows = sess.run(matched_rows) - res_unmatched_cols = sess.run(unmatched_cols) - - self.assertAllEqual(res_matched_rows, expected_matched_rows) - self.assertAllEqual(res_matched_cols, expected_matched_cols) - self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols) - - def test_valid_arguments_corner_case(self): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=1) - - def test_invalid_arguments_corner_case_negatives_lower_than_thres_false(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=1, - negatives_lower_than_unmatched=False) - - def test_invalid_arguments_no_matched_threshold(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=None, - unmatched_threshold=4) - - def test_invalid_arguments_unmatched_thres_larger_than_matched_thres(self): - with self.assertRaises(ValueError): - argmax_matcher.ArgMaxMatcher(matched_threshold=1, - unmatched_threshold=2) - - def test_set_values_using_indicator(self): - input_a = np.array([3, 4, 5, 1, 4, 3, 2]) - expected_b = np.array([3, 0, 0, 1, 0, 3, 2]) # Set a>3 to 0 - expected_c = np.array( - [3., 4., 5., -1., 4., 3., -1.]) # Set a<3 to -1. Float32 - idxb_ = input_a > 3 - idxc_ = input_a < 3 - - matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None) - - a = tf.constant(input_a) - idxb = tf.constant(idxb_) - idxc = tf.constant(idxc_) - b = matcher._set_values_using_indicator(a, idxb, 0) - c = matcher._set_values_using_indicator(tf.cast(a, tf.float32), idxc, -1) - with self.test_session() as sess: - res_b = sess.run(b) - res_c = sess.run(c) - self.assertAllEqual(res_b, expected_b) - self.assertAllEqual(res_c, expected_c) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/matchers/bipartite_matcher.py b/object_detection/matchers/bipartite_matcher.py deleted file mode 100644 index 3d717d12..00000000 --- a/object_detection/matchers/bipartite_matcher.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bipartite matcher implementation.""" - -import tensorflow as tf - -from tensorflow.contrib.image.python.ops import image_ops -from object_detection.core import matcher - - -class GreedyBipartiteMatcher(matcher.Matcher): - """Wraps a Tensorflow greedy bipartite matcher.""" - - def _match(self, similarity_matrix, num_valid_rows=-1): - """Bipartite matches a collection rows and columns. A greedy bi-partite. - - TODO: Add num_valid_columns options to match only that many columns with - all the rows. - - Args: - similarity_matrix: Float tensor of shape [N, M] with pairwise similarity - where higher values mean more similar. - num_valid_rows: A scalar or a 1-D tensor with one element describing the - number of valid rows of similarity_matrix to consider for the bipartite - matching. If set to be negative, then all rows from similarity_matrix - are used. - - Returns: - match_results: int32 tensor of shape [M] with match_results[i]=-1 - meaning that column i is not matched and otherwise that it is matched to - row match_results[i]. - """ - # Convert similarity matrix to distance matrix as tf.image.bipartite tries - # to find minimum distance matches. - distance_matrix = -1 * similarity_matrix - _, match_results = image_ops.bipartite_match( - distance_matrix, num_valid_rows) - match_results = tf.reshape(match_results, [-1]) - match_results = tf.cast(match_results, tf.int32) - return match_results diff --git a/object_detection/matchers/bipartite_matcher_test.py b/object_detection/matchers/bipartite_matcher_test.py deleted file mode 100644 index 2ee45a80..00000000 --- a/object_detection/matchers/bipartite_matcher_test.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.core.bipartite_matcher.""" - -import tensorflow as tf - -from object_detection.matchers import bipartite_matcher - - -class GreedyBipartiteMatcherTest(tf.test.TestCase): - - def test_get_expected_matches_when_all_rows_are_valid(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 2 - expected_match_results = [-1, 1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_expected_matches_with_valid_rows_set_to_minus_one(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = -1 - expected_match_results = [-1, 1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_no_matches_with_zero_valid_rows(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 0 - expected_match_results = [-1, -1, -1] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - def test_get_expected_matches_with_only_one_valid_row(self): - similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) - num_valid_rows = 1 - expected_match_results = [-1, -1, 0] - - matcher = bipartite_matcher.GreedyBipartiteMatcher() - match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) - with self.test_session() as sess: - match_results_out = sess.run(match._match_results) - self.assertAllEqual(match_results_out, expected_match_results) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/meta_architectures/BUILD b/object_detection/meta_architectures/BUILD deleted file mode 100644 index 0172a9c0..00000000 --- a/object_detection/meta_architectures/BUILD +++ /dev/null @@ -1,109 +0,0 @@ -# Tensorflow Object Detection API: Meta-architectures. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_library( - name = "ssd_meta_arch", - srcs = ["ssd_meta_arch.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/core:model", - "//tensorflow_models/object_detection/core:target_assigner", - "//tensorflow_models/object_detection/utils:shape_utils", - "//tensorflow_models/object_detection/utils:visualization_utils", - ], -) - -py_test( - name = "ssd_meta_arch_test", - srcs = ["ssd_meta_arch_test.py"], - deps = [ - ":ssd_meta_arch", - "//tensorflow", - "//tensorflow/python:training", - "//tensorflow_models/object_detection/core:anchor_generator", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/core:post_processing", - "//tensorflow_models/object_detection/core:region_similarity_calculator", - "//tensorflow_models/object_detection/utils:test_utils", - ], -) - -py_library( - name = "faster_rcnn_meta_arch", - srcs = [ - "faster_rcnn_meta_arch.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator", - "//tensorflow_models/object_detection/core:balanced_positive_negative_sampler", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:box_list_ops", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/core:model", - "//tensorflow_models/object_detection/core:post_processing", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/core:target_assigner", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/object_detection/utils:shape_utils", - ], -) - -py_library( - name = "faster_rcnn_meta_arch_test_lib", - srcs = [ - "faster_rcnn_meta_arch_test_lib.py", - ], - deps = [ - ":faster_rcnn_meta_arch", - "//tensorflow", - "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator", - "//tensorflow_models/object_detection/builders:box_predictor_builder", - "//tensorflow_models/object_detection/builders:hyperparams_builder", - "//tensorflow_models/object_detection/builders:post_processing_builder", - "//tensorflow_models/object_detection/core:losses", - "//tensorflow_models/object_detection/protos:box_predictor_py_pb2", - "//tensorflow_models/object_detection/protos:hyperparams_py_pb2", - "//tensorflow_models/object_detection/protos:post_processing_py_pb2", - ], -) - -py_test( - name = "faster_rcnn_meta_arch_test", - srcs = ["faster_rcnn_meta_arch_test.py"], - deps = [ - ":faster_rcnn_meta_arch_test_lib", - ], -) - -py_library( - name = "rfcn_meta_arch", - srcs = ["rfcn_meta_arch.py"], - deps = [ - ":faster_rcnn_meta_arch", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/utils:ops", - ], -) - -py_test( - name = "rfcn_meta_arch_test", - srcs = ["rfcn_meta_arch_test.py"], - deps = [ - ":faster_rcnn_meta_arch_test_lib", - ":rfcn_meta_arch", - "//tensorflow", - ], -) diff --git a/object_detection/meta_architectures/__init__.py b/object_detection/meta_architectures/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 188b1fa1..00000000 Binary files a/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc deleted file mode 100644 index 70a966dd..00000000 Binary files a/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc and /dev/null differ diff --git a/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc deleted file mode 100644 index 5d67872e..00000000 Binary files a/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc and /dev/null differ diff --git a/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc deleted file mode 100644 index 45168a64..00000000 Binary files a/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc and /dev/null differ diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch.py b/object_detection/meta_architectures/faster_rcnn_meta_arch.py deleted file mode 100644 index ae878b93..00000000 --- a/object_detection/meta_architectures/faster_rcnn_meta_arch.py +++ /dev/null @@ -1,1677 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Faster R-CNN meta-architecture definition. - -General tensorflow implementation of Faster R-CNN detection models. - -See Faster R-CNN: Ren, Shaoqing, et al. -"Faster R-CNN: Towards real-time object detection with region proposal -networks." Advances in neural information processing systems. 2015. - -We allow for two modes: first_stage_only=True and first_stage_only=False. In -the former setting, all of the user facing methods (e.g., predict, postprocess, -loss) can be used as if the model consisted only of the RPN, returning class -agnostic proposals (these can be thought of as approximate detections with no -associated class information). In the latter setting, proposals are computed, -then passed through a second stage "box classifier" to yield (multi-class) -detections. - -Implementations of Faster R-CNN models must define a new -FasterRCNNFeatureExtractor and override three methods: `preprocess`, -`_extract_proposal_features` (the first stage of the model), and -`_extract_box_classifier_features` (the second stage of the model). Optionally, -the `restore_fn` method can be overridden. See tests for an example. - -A few important notes: -+ Batching conventions: We support batched inference and training where -all images within a batch have the same resolution. Batch sizes are determined -dynamically via the shape of the input tensors (rather than being specified -directly as, e.g., a model constructor). - -A complication is that due to non-max suppression, we are not guaranteed to get -the same number of proposals from the first stage RPN (region proposal network) -for each image (though in practice, we should often get the same number of -proposals). For this reason we pad to a max number of proposals per image -within a batch. This `self.max_num_proposals` property is set to the -`first_stage_max_proposals` parameter at inference time and the -`second_stage_batch_size` at training time since we subsample the batch to -be sent through the box classifier during training. - -For the second stage of the pipeline, we arrange the proposals for all images -within the batch along a single batch dimension. For example, the input to -_extract_box_classifier_features is a tensor of shape -`[total_num_proposals, crop_height, crop_width, depth]` where -total_num_proposals is batch_size * self.max_num_proposals. (And note that per -the above comment, a subset of these entries correspond to zero paddings.) - -+ Coordinate representations: -Following the API (see model.DetectionModel definition), our outputs after -postprocessing operations are always normalized boxes however, internally, we -sometimes convert to absolute --- e.g. for loss computation. In particular, -anchors and proposal_boxes are both represented as absolute coordinates. -""" -from abc import abstractmethod -from functools import partial -import tensorflow as tf - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.core import balanced_positive_negative_sampler as sampler -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import box_predictor -from object_detection.core import losses -from object_detection.core import model -from object_detection.core import post_processing -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner -from object_detection.utils import ops -from object_detection.utils import shape_utils - -slim = tf.contrib.slim - - -class FasterRCNNFeatureExtractor(object): - """Faster R-CNN Feature Extractor definition.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - first_stage_features_stride: Output stride of extracted RPN feature map. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a relative large batch size - (e.g. 8), it could be desirable to enable batch norm update. - reuse_weights: Whether to reuse variables. Default is None. - weight_decay: float weight decay for feature extractor (default: 0.0). - """ - self._is_training = is_training - self._first_stage_features_stride = first_stage_features_stride - self._train_batch_norm = (batch_norm_trainable and is_training) - self._reuse_weights = reuse_weights - self._weight_decay = weight_decay - - @abstractmethod - def preprocess(self, resized_inputs): - """Feature-extractor specific preprocessing (minus image resizing).""" - pass - - def extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - This function is responsible for extracting feature maps from preprocessed - images. These features are used by the region proposal network (RPN) to - predict proposals. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - """ - with tf.variable_scope(scope, values=[preprocessed_inputs]): - return self._extract_proposal_features(preprocessed_inputs, scope) - - @abstractmethod - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features, to be overridden.""" - pass - - def extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope(scope, values=[proposal_feature_maps]): - return self._extract_box_classifier_features(proposal_feature_maps, scope) - - @abstractmethod - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features, to be overridden.""" - pass - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - variables_to_restore = {} - for variable in tf.global_variables(): - for scope_name in [first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope]: - if variable.op.name.startswith(scope_name): - var_name = variable.op.name.replace(scope_name + '/', '') - variables_to_restore[var_name] = variable - return variables_to_restore - - -class FasterRCNNMetaArch(model.DetectionModel): - """Faster R-CNN Meta-architecture definition.""" - - def __init__(self, - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - first_stage_only, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - initial_crop_size, - maxpool_kernel_size, - maxpool_stride, - second_stage_mask_rcnn_box_predictor, - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - second_stage_mask_prediction_loss_weight=1.0, - hard_example_miner=None, - parallel_iterations=16): - """FasterRCNNMetaArch Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - num_classes: Number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - image_resizer_fn: A callable for image resizing. This callable - takes a rank-3 image tensor of shape [height, width, channels] - (corresponding to a single image) and returns a rank-3 image tensor, - possibly with new spatial dimensions. See - builders/image_resizer_builder.py. - feature_extractor: A FasterRCNNFeatureExtractor object. - first_stage_only: Whether to construct only the Region Proposal Network - (RPN) part of the model. - first_stage_anchor_generator: An anchor_generator.AnchorGenerator object - (note that currently we only support - grid_anchor_generator.GridAnchorGenerator objects) - first_stage_atrous_rate: A single integer indicating the atrous rate for - the single convolution op which is applied to the `rpn_features_to_crop` - tensor to obtain a tensor to be used for box prediction. Some feature - extractors optionally allow for producing feature maps computed at - denser resolutions. The atrous rate is used to compensate for the - denser feature maps by using an effectively larger receptive field. - (This should typically be set to 1). - first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d, - separable_conv2d and fully_connected ops for the RPN box predictor. - first_stage_box_predictor_kernel_size: Kernel size to use for the - convolution op just prior to RPN box predictions. - first_stage_box_predictor_depth: Output depth for the convolution op - just prior to RPN box predictions. - first_stage_minibatch_size: The "batch size" to use for computing the - objectness and location loss of the region proposal network. This - "batch size" refers to the number of anchors selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - first_stage_positive_balance_fraction: Fraction of positive examples - per image for the RPN. The recommended value for Faster RCNN is 0.5. - first_stage_nms_score_threshold: Score threshold for non max suppression - for the Region Proposal Network (RPN). This value is expected to be in - [0, 1] as it is applied directly after a softmax transformation. The - recommended value for Faster R-CNN is 0. - first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold - for performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_max_proposals: Maximum number of boxes to retain after - performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_localization_loss_weight: A float - first_stage_objectness_loss_weight: A float - initial_crop_size: A single integer indicating the output size - (width and height are set to be the same) of the initial bilinear - interpolation based cropping during ROI pooling. - maxpool_kernel_size: A single integer indicating the kernel size of the - max pool op on the cropped feature map during ROI pooling. - maxpool_stride: A single integer indicating the stride of the max pool - op on the cropped feature map during ROI pooling. - second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for - the second stage. - second_stage_batch_size: The batch size used for computing the - classification and refined location loss of the box classifier. This - "batch size" refers to the number of proposals selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - second_stage_balance_fraction: Fraction of positive examples to use - per image for the box classifier. The recommended value for Faster RCNN - is 0.25. - second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores`, optional `clip_window` and - optional (kwarg) `mask` inputs (with all other inputs already set) - and returns a dictionary containing tensors with keys: - `detection_boxes`, `detection_scores`, `detection_classes`, - `num_detections`, and (optionally) `detection_masks`. See - `post_processing.batch_multiclass_non_max_suppression` for the type and - shape of these tensors. - second_stage_score_conversion_fn: Callable elementwise nonlinearity - (that takes tensors as inputs and returns tensors). This is usually - used to convert logits to probabilities. - second_stage_localization_loss_weight: A float indicating the scale factor - for second stage localization loss. - second_stage_classification_loss_weight: A float indicating the scale - factor for second stage classification loss. - second_stage_classification_loss: Classification loss used by the second - stage classifier. Either losses.WeightedSigmoidClassificationLoss or - losses.WeightedSoftmaxClassificationLoss. - second_stage_mask_prediction_loss_weight: A float indicating the scale - factor for second stage mask prediction loss. This is applicable only if - second stage box predictor is configured to predict masks. - hard_example_miner: A losses.HardExampleMiner object (can be None). - parallel_iterations: (Optional) The number of iterations allowed to run - in parallel for calls to tf.map_fn. - Raises: - ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at - training time. - ValueError: If first_stage_anchor_generator is not of type - grid_anchor_generator.GridAnchorGenerator. - """ - super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes) - - if is_training and second_stage_batch_size > first_stage_max_proposals: - raise ValueError('second_stage_batch_size should be no greater than ' - 'first_stage_max_proposals.') - if not isinstance(first_stage_anchor_generator, - grid_anchor_generator.GridAnchorGenerator): - raise ValueError('first_stage_anchor_generator must be of type ' - 'grid_anchor_generator.GridAnchorGenerator.') - - self._is_training = is_training - self._image_resizer_fn = image_resizer_fn - self._feature_extractor = feature_extractor - self._first_stage_only = first_stage_only - - # The first class is reserved as background. - unmatched_cls_target = tf.constant( - [1] + self._num_classes * [0], dtype=tf.float32) - self._proposal_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'proposal') - self._detector_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target) - # Both proposal and detector target assigners use the same box coder - self._box_coder = self._proposal_target_assigner.box_coder - - # (First stage) Region proposal network parameters - self._first_stage_anchor_generator = first_stage_anchor_generator - self._first_stage_atrous_rate = first_stage_atrous_rate - self._first_stage_box_predictor_arg_scope = ( - first_stage_box_predictor_arg_scope) - self._first_stage_box_predictor_kernel_size = ( - first_stage_box_predictor_kernel_size) - self._first_stage_box_predictor_depth = first_stage_box_predictor_depth - self._first_stage_minibatch_size = first_stage_minibatch_size - self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=first_stage_positive_balance_fraction) - self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor( - self._is_training, num_classes=1, - conv_hyperparams=self._first_stage_box_predictor_arg_scope, - min_depth=0, max_depth=0, num_layers_before_predictor=0, - use_dropout=False, dropout_keep_prob=1.0, kernel_size=1, - box_code_size=self._box_coder.code_size) - - self._first_stage_nms_score_threshold = first_stage_nms_score_threshold - self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold - self._first_stage_max_proposals = first_stage_max_proposals - - self._first_stage_localization_loss = ( - losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True)) - self._first_stage_objectness_loss = ( - losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True)) - self._first_stage_loc_loss_weight = first_stage_localization_loss_weight - self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight - - # Per-region cropping parameters - self._initial_crop_size = initial_crop_size - self._maxpool_kernel_size = maxpool_kernel_size - self._maxpool_stride = maxpool_stride - - self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor - - self._second_stage_batch_size = second_stage_batch_size - self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=second_stage_balance_fraction) - - self._second_stage_nms_fn = second_stage_non_max_suppression_fn - self._second_stage_score_conversion_fn = second_stage_score_conversion_fn - - self._second_stage_localization_loss = ( - losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True)) - self._second_stage_classification_loss = second_stage_classification_loss - self._second_stage_mask_loss = ( - losses.WeightedSigmoidClassificationLoss(anchorwise_output=True)) - self._second_stage_loc_loss_weight = second_stage_localization_loss_weight - self._second_stage_cls_loss_weight = second_stage_classification_loss_weight - self._second_stage_mask_loss_weight = ( - second_stage_mask_prediction_loss_weight) - self._hard_example_miner = hard_example_miner - self._parallel_iterations = parallel_iterations - - @property - def first_stage_feature_extractor_scope(self): - return 'FirstStageFeatureExtractor' - - @property - def second_stage_feature_extractor_scope(self): - return 'SecondStageFeatureExtractor' - - @property - def first_stage_box_predictor_scope(self): - return 'FirstStageBoxPredictor' - - @property - def second_stage_box_predictor_scope(self): - return 'SecondStageBoxPredictor' - - @property - def max_num_proposals(self): - """Max number of proposals (to pad to) for each image in the input batch. - - At training time, this is set to be the `second_stage_batch_size` if hard - example miner is not configured, else it is set to - `first_stage_max_proposals`. At inference time, this is always set to - `first_stage_max_proposals`. - - Returns: - A positive integer. - """ - if self._is_training and not self._hard_example_miner: - return self._second_stage_batch_size - return self._first_stage_max_proposals - - def preprocess(self, inputs): - """Feature-extractor specific preprocessing. - - See base class. - - For Faster R-CNN, we perform image resizing in the base class --- each - class subclassing FasterRCNNMetaArch is responsible for any additional - preprocessing (e.g., scaling pixel values to be in [-1, 1]). - - Args: - inputs: a [batch, height_in, width_in, channels] float tensor representing - a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float - tensor representing a batch of images. - Raises: - ValueError: if inputs tensor does not have type tf.float32 - """ - if inputs.dtype is not tf.float32: - raise ValueError('`preprocess` expects a tf.float32 tensor') - with tf.name_scope('Preprocessor'): - resized_inputs = tf.map_fn(self._image_resizer_fn, - elems=inputs, - dtype=tf.float32, - parallel_iterations=self._parallel_iterations) - return self._feature_extractor.preprocess(resized_inputs) - - def predict(self, preprocessed_inputs): - """Predicts unpostprocessed tensors from input tensor. - - This function takes an input batch of images and runs it through the - forward pass of the network to yield "raw" un-postprocessed predictions. - If `first_stage_only` is True, this function only returns first stage - RPN predictions (un-postprocessed). Otherwise it returns both - first stage RPN predictions as well as second stage box classifier - predictions. - - Other remarks: - + Anchor pruning vs. clipping: following the recommendation of the Faster - R-CNN paper, we prune anchors that venture outside the image window at - training time and clip anchors to the image window at inference time. - + Proposal padding: as described at the top of the file, proposals are - padded to self._max_num_proposals and flattened so that proposals from all - images within the input batch are arranged along the same batch dimension. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch_size, height, width, depth] to be used for predicting proposal - boxes and corresponding objectness scores. - 2) rpn_features_to_crop: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. - 3) image_shape: a 1-D tensor of shape [4] representing the input - image shape. - 4) rpn_box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - 5) rpn_objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - 6) anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN (in absolute coordinates). Note that - `num_anchors` can differ depending on whether the model is created in - training or inference mode. - - (and if first_stage_only=False): - 7) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, 4] representing predicted - (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals - 8) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 9) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 10) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes in absolute coordinates. - 11) mask_predictions: (optional) a 4-D tensor with shape - [total_num_padded_proposals, num_classes, mask_height, mask_width] - containing instance mask predictions. - """ - (rpn_box_predictor_features, rpn_features_to_crop, anchors_boxlist, - image_shape) = self._extract_rpn_feature_maps(preprocessed_inputs) - (rpn_box_encodings, rpn_objectness_predictions_with_background - ) = self._predict_rpn_proposals(rpn_box_predictor_features) - - # The Faster R-CNN paper recommends pruning anchors that venture outside - # the image window at training time and clipping at inference time. - clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]])) - if self._is_training: - (rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors_boxlist) = self._remove_invalid_anchors_and_predictions( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors_boxlist, clip_window) - else: - anchors_boxlist = box_list_ops.clip_to_window( - anchors_boxlist, clip_window) - - anchors = anchors_boxlist.get() - prediction_dict = { - 'rpn_box_predictor_features': rpn_box_predictor_features, - 'rpn_features_to_crop': rpn_features_to_crop, - 'image_shape': image_shape, - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'anchors': anchors - } - - if not self._first_stage_only: - prediction_dict.update(self._predict_second_stage( - rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features_to_crop, - anchors, image_shape)) - return prediction_dict - - def _predict_second_stage(self, rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features_to_crop, - anchors, - image_shape): - """Predicts the output tensors from second stage of Faster R-CNN. - - Args: - rpn_box_encodings: 4-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes. - rpn_objectness_predictions_with_background: 2-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - rpn_features_to_crop: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. - anchors: 2-D float tensor of shape - [num_anchors, self._box_coder.code_size]. - image_shape: A 1D int32 tensors of size [4] containing the image shape. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, 4] representing predicted - (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals - 2) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 3) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 4) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes in absolute coordinates. - 5) proposal_boxes_normalized: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes in normalized coordinates. Can be used to override the - boxes proposed by the RPN, thus enabling one to extract features and - get box classification and prediction for externally selected areas - of the image. - 6) box_classifier_features: a 4-D float32 tensor representing the - features for each proposal. - 7) mask_predictions: (optional) a 4-D tensor with shape - [total_num_padded_proposals, num_classes, mask_height, mask_width] - containing instance mask predictions. - """ - proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors, image_shape) - - flattened_proposal_feature_maps = ( - self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, proposal_boxes_normalized)) - - box_classifier_features = ( - self._feature_extractor.extract_box_classifier_features( - flattened_proposal_feature_maps, - scope=self.second_stage_feature_extractor_scope)) - - box_predictions = self._mask_rcnn_box_predictor.predict( - box_classifier_features, - num_predictions_per_location=1, - scope=self.second_stage_box_predictor_scope) - refined_box_encodings = tf.squeeze( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.squeeze(box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) - - absolute_proposal_boxes = ops.normalized_to_image_coordinates( - proposal_boxes_normalized, image_shape, self._parallel_iterations) - - prediction_dict = { - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': - class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': absolute_proposal_boxes, - 'box_classifier_features': box_classifier_features, - 'proposal_boxes_normalized': proposal_boxes_normalized, - } - if box_predictor.MASK_PREDICTIONS in box_predictions: - mask_predictions = tf.squeeze(box_predictions[ - box_predictor.MASK_PREDICTIONS], axis=1) - prediction_dict['mask_predictions'] = mask_predictions - - return prediction_dict - - def _extract_rpn_feature_maps(self, preprocessed_inputs): - """Extracts RPN features. - - This function extracts two feature maps: a feature map to be directly - fed to a box predictor (to predict location and objectness scores for - proposals) and a feature map from which to crop regions which will then - be sent to the second stage box classifier. - - Args: - preprocessed_inputs: a [batch, height, width, channels] image tensor. - - Returns: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. - rpn_features_to_crop: A 4-D float32 tensor with shape - [batch, height, width, depth] representing image features to crop using - the proposals boxes. - anchors: A BoxList representing anchors (for the RPN) in - absolute coordinates. - image_shape: A 1-D tensor representing the input image shape. - """ - image_shape = tf.shape(preprocessed_inputs) - rpn_features_to_crop = self._feature_extractor.extract_proposal_features( - preprocessed_inputs, scope=self.first_stage_feature_extractor_scope) - - feature_map_shape = tf.shape(rpn_features_to_crop) - anchors = self._first_stage_anchor_generator.generate( - [(feature_map_shape[1], feature_map_shape[2])]) - with slim.arg_scope(self._first_stage_box_predictor_arg_scope): - kernel_size = self._first_stage_box_predictor_kernel_size - rpn_box_predictor_features = slim.conv2d( - rpn_features_to_crop, - self._first_stage_box_predictor_depth, - kernel_size=[kernel_size, kernel_size], - rate=self._first_stage_atrous_rate, - activation_fn=tf.nn.relu6) - return (rpn_box_predictor_features, rpn_features_to_crop, - anchors, image_shape) - - def _predict_rpn_proposals(self, rpn_box_predictor_features): - """Adds box predictors to RPN feature map to predict proposals. - - Note resulting tensors will not have been postprocessed. - - Args: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. - - Returns: - box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - - Raises: - RuntimeError: if the anchor generator generates anchors corresponding to - multiple feature maps. We currently assume that a single feature map - is generated for the RPN. - """ - num_anchors_per_location = ( - self._first_stage_anchor_generator.num_anchors_per_location()) - if len(num_anchors_per_location) != 1: - raise RuntimeError('anchor_generator is expected to generate anchors ' - 'corresponding to a single feature map.') - box_predictions = self._first_stage_box_predictor.predict( - rpn_box_predictor_features, - num_anchors_per_location[0], - scope=self.first_stage_box_predictor_scope) - - box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] - objectness_predictions_with_background = box_predictions[ - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - return (tf.squeeze(box_encodings, axis=2), - objectness_predictions_with_background) - - def _remove_invalid_anchors_and_predictions( - self, - box_encodings, - objectness_predictions_with_background, - anchors_boxlist, - clip_window): - """Removes anchors that (partially) fall outside an image. - - Also removes associated box encodings and objectness predictions. - - Args: - box_encodings: 3-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted boxes. - objectness_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - anchors_boxlist: A BoxList representing num_anchors anchors (for the RPN) - in absolute coordinates. - clip_window: a 1-D tensor representing the [ymin, xmin, ymax, xmax] - extent of the window to clip/prune to. - - Returns: - box_encodings: 4-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes, where num_valid_anchors <= num_anchors - objectness_predictions_with_background: 2-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors, where - num_valid_anchors <= num_anchors. Note that this - tensor *includes* background class predictions (at class index 0). - anchors: A BoxList representing num_valid_anchors anchors (for the RPN) in - absolute coordinates. - """ - pruned_anchors_boxlist, keep_indices = box_list_ops.prune_outside_window( - anchors_boxlist, clip_window) - def _batch_gather_kept_indices(predictions_tensor): - return tf.map_fn( - partial(tf.gather, indices=keep_indices), - elems=predictions_tensor, - dtype=tf.float32, - parallel_iterations=self._parallel_iterations, - back_prop=True) - return (_batch_gather_kept_indices(box_encodings), - _batch_gather_kept_indices(objectness_predictions_with_background), - pruned_anchors_boxlist) - - def _flatten_first_two_dimensions(self, inputs): - """Flattens `K-d` tensor along batch dimension to be a `(K-1)-d` tensor. - - Converts `inputs` with shape [A, B, ..., depth] into a tensor of shape - [A * B, ..., depth]. - - Args: - inputs: A float tensor with shape [A, B, ..., depth]. Note that the first - two and last dimensions must be statically defined. - Returns: - A float tensor with shape [A * B, ..., depth] (where the first and last - dimension are statically defined. - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs) - flattened_shape = tf.stack([combined_shape[0] * combined_shape[1]] + - combined_shape[2:]) - return tf.reshape(inputs, flattened_shape) - - def postprocess(self, prediction_dict): - """Convert prediction tensors to final detections. - - This function converts raw predictions tensors to final detection results. - See base class for output format conventions. Note also that by default, - scores are to be interpreted as logits, but if a score_converter is used, - then scores are remapped (and may thus have a different interpretation). - - If first_stage_only=True, the returned results represent proposals from the - first stage RPN and are padded to have self.max_num_proposals for each - image; otherwise, the results can be interpreted as multiclass detections - from the full two-stage model and are padded to self._max_detections. - - Args: - prediction_dict: a dictionary holding prediction tensors (see the - documentation for the predict method. If first_stage_only=True, we - expect prediction_dict to contain `rpn_box_encodings`, - `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`, - `image_shape`, and `anchors` fields. Otherwise we expect - prediction_dict to additionally contain `refined_box_encodings`, - `class_predictions_with_background`, `num_proposals`, - `proposal_boxes` and, optionally, `mask_predictions` fields. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detection, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - (this entry is only created if rpn_mode=False) - num_detections: [batch] - """ - with tf.name_scope('FirstStagePostprocessor'): - image_shape = prediction_dict['image_shape'] - if self._first_stage_only: - proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn( - prediction_dict['rpn_box_encodings'], - prediction_dict['rpn_objectness_predictions_with_background'], - prediction_dict['anchors'], - image_shape) - return { - 'detection_boxes': proposal_boxes, - 'detection_scores': proposal_scores, - 'num_detections': tf.to_float(num_proposals) - } - with tf.name_scope('SecondStagePostprocessor'): - mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS) - detections_dict = self._postprocess_box_classifier( - prediction_dict['refined_box_encodings'], - prediction_dict['class_predictions_with_background'], - prediction_dict['proposal_boxes'], - prediction_dict['num_proposals'], - image_shape, - mask_predictions=mask_predictions) - return detections_dict - - def _postprocess_rpn(self, - rpn_box_encodings_batch, - rpn_objectness_predictions_with_background_batch, - anchors, - image_shape): - """Converts first stage prediction tensors from the RPN to proposals. - - This function decodes the raw RPN predictions, runs non-max suppression - on the result. - - Note that the behavior of this function is slightly modified during - training --- specifically, we stop the gradient from passing through the - proposal boxes and we only return a balanced sampled subset of proposals - with size `second_stage_batch_size`. - - Args: - rpn_box_encodings_batch: A 3-D float32 tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted proposal box encodings. - rpn_objectness_predictions_with_background_batch: A 3-D float tensor of - shape [batch_size, num_anchors, 2] containing objectness predictions - (logits) for each of the anchors with 0 corresponding to background - and 1 corresponding to object. - anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN. Note that `num_anchors` can differ depending - on whether the model is created in training or inference mode. - image_shape: A 1-D tensor representing the input image shape. - - Returns: - proposal_boxes: A float tensor with shape - [batch_size, max_num_proposals, 4] representing the (potentially zero - padded) proposal boxes for all images in the batch. These boxes are - represented as normalized coordinates. - proposal_scores: A float tensor with shape - [batch_size, max_num_proposals] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - """ - rpn_box_encodings_batch = tf.expand_dims(rpn_box_encodings_batch, axis=2) - rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape( - rpn_box_encodings_batch) - tiled_anchor_boxes = tf.tile( - tf.expand_dims(anchors, 0), [rpn_encodings_shape[0], 1, 1]) - proposal_boxes = self._batch_decode_boxes(rpn_box_encodings_batch, - tiled_anchor_boxes) - proposal_boxes = tf.squeeze(proposal_boxes, axis=2) - rpn_objectness_softmax_without_background = tf.nn.softmax( - rpn_objectness_predictions_with_background_batch)[:, :, 1] - clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]])) - (proposal_boxes, proposal_scores, _, _, _, - num_proposals) = post_processing.batch_multiclass_non_max_suppression( - tf.expand_dims(proposal_boxes, axis=2), - tf.expand_dims(rpn_objectness_softmax_without_background, - axis=2), - self._first_stage_nms_score_threshold, - self._first_stage_nms_iou_threshold, - self._first_stage_max_proposals, - self._first_stage_max_proposals, - clip_window=clip_window) - if self._is_training: - proposal_boxes = tf.stop_gradient(proposal_boxes) - if not self._hard_example_miner: - (groundtruth_boxlists, groundtruth_classes_with_background_list, - _) = self._format_groundtruth_data(image_shape) - (proposal_boxes, proposal_scores, - num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch( - proposal_boxes, proposal_scores, num_proposals, - groundtruth_boxlists, groundtruth_classes_with_background_list) - # normalize proposal boxes - proposal_boxes_reshaped = tf.reshape(proposal_boxes, [-1, 4]) - normalized_proposal_boxes_reshaped = box_list_ops.to_normalized_coordinates( - box_list.BoxList(proposal_boxes_reshaped), - image_shape[1], image_shape[2], check_range=False).get() - proposal_boxes = tf.reshape(normalized_proposal_boxes_reshaped, - [-1, proposal_boxes.shape[1].value, 4]) - return proposal_boxes, proposal_scores, num_proposals - - def _unpad_proposals_and_sample_box_classifier_batch( - self, - proposal_boxes, - proposal_scores, - num_proposals, - groundtruth_boxlists, - groundtruth_classes_with_background_list): - """Unpads proposals and samples a minibatch for second stage. - - Args: - proposal_boxes: A float tensor with shape - [batch_size, num_proposals, 4] representing the (potentially zero - padded) proposal boxes for all images in the batch. These boxes are - represented as normalized coordinates. - proposal_scores: A float tensor with shape - [batch_size, num_proposals] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates - of the groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - - Returns: - proposal_boxes: A float tensor with shape - [batch_size, second_stage_batch_size, 4] representing the (potentially - zero padded) proposal boxes for all images in the batch. These boxes - are represented as normalized coordinates. - proposal_scores: A float tensor with shape - [batch_size, second_stage_batch_size] representing the (potentially zero - padded) proposal objectness scores for all images in the batch. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - """ - single_image_proposal_box_sample = [] - single_image_proposal_score_sample = [] - single_image_num_proposals_sample = [] - for (single_image_proposal_boxes, - single_image_proposal_scores, - single_image_num_proposals, - single_image_groundtruth_boxlist, - single_image_groundtruth_classes_with_background) in zip( - tf.unstack(proposal_boxes), - tf.unstack(proposal_scores), - tf.unstack(num_proposals), - groundtruth_boxlists, - groundtruth_classes_with_background_list): - static_shape = single_image_proposal_boxes.get_shape() - sliced_static_shape = tf.TensorShape([tf.Dimension(None), - static_shape.dims[-1]]) - single_image_proposal_boxes = tf.slice( - single_image_proposal_boxes, - [0, 0], - [single_image_num_proposals, -1]) - single_image_proposal_boxes.set_shape(sliced_static_shape) - - single_image_proposal_scores = tf.slice(single_image_proposal_scores, - [0], - [single_image_num_proposals]) - single_image_boxlist = box_list.BoxList(single_image_proposal_boxes) - single_image_boxlist.add_field(fields.BoxListFields.scores, - single_image_proposal_scores) - sampled_boxlist = self._sample_box_classifier_minibatch( - single_image_boxlist, - single_image_groundtruth_boxlist, - single_image_groundtruth_classes_with_background) - sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list( - sampled_boxlist, - num_boxes=self._second_stage_batch_size) - single_image_num_proposals_sample.append(tf.minimum( - sampled_boxlist.num_boxes(), - self._second_stage_batch_size)) - bb = sampled_padded_boxlist.get() - single_image_proposal_box_sample.append(bb) - single_image_proposal_score_sample.append( - sampled_padded_boxlist.get_field(fields.BoxListFields.scores)) - return (tf.stack(single_image_proposal_box_sample), - tf.stack(single_image_proposal_score_sample), - tf.stack(single_image_num_proposals_sample)) - - def _format_groundtruth_data(self, image_shape): - """Helper function for preparing groundtruth data for target assignment. - - In order to be consistent with the model.DetectionModel interface, - groundtruth boxes are specified in normalized coordinates and classes are - specified as label indices with no assumed background category. To prepare - for target assignment, we: - 1) convert boxes to absolute coordinates, - 2) add a background class at class index 0 - 3) groundtruth instance masks, if available, are resized to match - image_shape. - - Args: - image_shape: A 1-D int32 tensor of shape [4] representing the shape of the - input image batch. - - Returns: - groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates - of the groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - groundtruth_masks_list: If present, a list of 3-D tf.float32 tensors of - shape [num_boxes, image_height, image_width] containing instance masks. - This is set to None if no masks exist in the provided groundtruth. - """ - groundtruth_boxlists = [ - box_list_ops.to_absolute_coordinates( - box_list.BoxList(boxes), image_shape[1], image_shape[2]) - for boxes in self.groundtruth_lists(fields.BoxListFields.boxes)] - groundtruth_classes_with_background_list = [ - tf.to_float( - tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')) - for one_hot_encoding in self.groundtruth_lists( - fields.BoxListFields.classes)] - - groundtruth_masks_list = self._groundtruth_lists.get( - fields.BoxListFields.masks) - if groundtruth_masks_list is not None: - resized_masks_list = [] - for mask in groundtruth_masks_list: - resized_4d_mask = tf.image.resize_images( - tf.expand_dims(mask, axis=3), - image_shape[1:3], - method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, - align_corners=True) - resized_masks_list.append(tf.squeeze(resized_4d_mask, axis=3)) - groundtruth_masks_list = resized_masks_list - - return (groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_masks_list) - - def _sample_box_classifier_minibatch(self, - proposal_boxlist, - groundtruth_boxlist, - groundtruth_classes_with_background): - """Samples a mini-batch of proposals to be sent to the box classifier. - - Helper function for self._postprocess_rpn. - - Args: - proposal_boxlist: A BoxList containing K proposal boxes in absolute - coordinates. - groundtruth_boxlist: A Boxlist containing N groundtruth object boxes in - absolute coordinates. - groundtruth_classes_with_background: A tensor with shape - `[N, self.num_classes + 1]` representing groundtruth classes. The - classes are assumed to be k-hot encoded, and include background as the - zero-th class. - - Returns: - a BoxList contained sampled proposals. - """ - (cls_targets, cls_weights, _, _, _) = self._detector_target_assigner.assign( - proposal_boxlist, groundtruth_boxlist, - groundtruth_classes_with_background) - # Selects all boxes as candidates if none of them is selected according - # to cls_weights. This could happen as boxes within certain IOU ranges - # are ignored. If triggered, the selected boxes will still be ignored - # during loss computation. - cls_weights += tf.to_float(tf.equal(tf.reduce_sum(cls_weights), 0)) - positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0) - sampled_indices = self._second_stage_sampler.subsample( - tf.cast(cls_weights, tf.bool), - self._second_stage_batch_size, - positive_indicator) - return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices) - - def _compute_second_stage_input_feature_maps(self, features_to_crop, - proposal_boxes_normalized): - """Crops to a set of proposals from the feature map for a batch of images. - - Helper function for self._postprocess_rpn. This function calls - `tf.image.crop_and_resize` to create the feature map to be passed to the - second stage box classifier for each proposal. - - Args: - features_to_crop: A float32 tensor with shape - [batch_size, height, width, depth] - proposal_boxes_normalized: A float32 tensor with shape [batch_size, - num_proposals, box_code_size] containing proposal boxes in - normalized coordinates. - - Returns: - A float32 tensor with shape [K, new_height, new_width, depth]. - """ - def get_box_inds(proposals): - proposals_shape = proposals.get_shape().as_list() - if any(dim is None for dim in proposals_shape): - proposals_shape = tf.shape(proposals) - ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) - multiplier = tf.expand_dims( - tf.range(start=0, limit=proposals_shape[0]), 1) - return tf.reshape(ones_mat * multiplier, [-1]) - - cropped_regions = tf.image.crop_and_resize( - features_to_crop, - self._flatten_first_two_dimensions(proposal_boxes_normalized), - get_box_inds(proposal_boxes_normalized), - (self._initial_crop_size, self._initial_crop_size)) - return slim.max_pool2d( - cropped_regions, - [self._maxpool_kernel_size, self._maxpool_kernel_size], - stride=self._maxpool_stride) - - def _postprocess_box_classifier(self, - refined_box_encodings, - class_predictions_with_background, - proposal_boxes, - num_proposals, - image_shape, - mask_predictions=None): - """Converts predictions from the second stage box classifier to detections. - - Args: - refined_box_encodings: a 3-D float tensor with shape - [total_num_padded_proposals, num_classes, 4] representing predicted - (final) refined box encodings. - class_predictions_with_background: a 3-D tensor float with shape - [total_num_padded_proposals, num_classes + 1] containing class - predictions (logits) for each of the proposals. Note that this tensor - *includes* background class predictions (at class index 0). - proposal_boxes: a 3-D float tensor with shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes in absolute coordinates. - num_proposals: a 1-D int32 tensor of shape [batch] representing the number - of proposals predicted for each image in the batch. - image_shape: a 1-D int32 tensor representing the input image shape. - mask_predictions: (optional) a 4-D float tensor with shape - [total_num_padded_proposals, num_classes, mask_height, mask_width] - containing instance mask prediction logits. - - Returns: - A dictionary containing: - `detection_boxes`: [batch, max_detection, 4] - `detection_scores`: [batch, max_detections] - `detection_classes`: [batch, max_detections] - `num_detections`: [batch] - `detection_masks`: - (optional) [batch, max_detections, mask_height, mask_width]. Note - that a pixel-wise sigmoid score converter is applied to the detection - masks. - """ - refined_box_encodings_batch = tf.reshape(refined_box_encodings, - [-1, self.max_num_proposals, - self.num_classes, - self._box_coder.code_size]) - class_predictions_with_background_batch = tf.reshape( - class_predictions_with_background, - [-1, self.max_num_proposals, self.num_classes + 1] - ) - refined_decoded_boxes_batch = self._batch_decode_boxes( - refined_box_encodings_batch, proposal_boxes) - class_predictions_with_background_batch = ( - self._second_stage_score_conversion_fn( - class_predictions_with_background_batch)) - class_predictions_batch = tf.reshape( - tf.slice(class_predictions_with_background_batch, - [0, 0, 1], [-1, -1, -1]), - [-1, self.max_num_proposals, self.num_classes]) - clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]])) - - mask_predictions_batch = None - if mask_predictions is not None: - mask_height = mask_predictions.shape[2].value - mask_width = mask_predictions.shape[3].value - mask_predictions = tf.sigmoid(mask_predictions) - mask_predictions_batch = tf.reshape( - mask_predictions, [-1, self.max_num_proposals, - self.num_classes, mask_height, mask_width]) - (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, _, - num_detections) = self._second_stage_nms_fn( - refined_decoded_boxes_batch, - class_predictions_batch, - clip_window=clip_window, - change_coordinate_frame=True, - num_valid_boxes=num_proposals, - masks=mask_predictions_batch) - detections = {'detection_boxes': nmsed_boxes, - 'detection_scores': nmsed_scores, - 'detection_classes': nmsed_classes, - 'num_detections': tf.to_float(num_detections)} - if nmsed_masks is not None: - detections['detection_masks'] = nmsed_masks - return detections - - def _batch_decode_boxes(self, box_encodings, anchor_boxes): - """Decodes box encodings with respect to the anchor boxes. - - Args: - box_encodings: a 4-D tensor with shape - [batch_size, num_anchors, num_classes, self._box_coder.code_size] - representing box encodings. - anchor_boxes: [batch_size, num_anchors, 4] representing - decoded bounding boxes. - - Returns: - decoded_boxes: a [batch_size, num_anchors, num_classes, 4] - float tensor representing bounding box predictions - (for each image in batch, proposal and class). - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape( - box_encodings) - num_classes = combined_shape[2] - tiled_anchor_boxes = tf.tile( - tf.expand_dims(anchor_boxes, 2), [1, 1, num_classes, 1]) - tiled_anchors_boxlist = box_list.BoxList( - tf.reshape(tiled_anchor_boxes, [-1, 4])) - decoded_boxes = self._box_coder.decode( - tf.reshape(box_encodings, [-1, self._box_coder.code_size]), - tiled_anchors_boxlist) - return tf.reshape(decoded_boxes.get(), - tf.stack([combined_shape[0], combined_shape[1], - num_classes, 4])) - - def loss(self, prediction_dict, scope=None): - """Compute scalar loss tensors given prediction tensors. - - If first_stage_only=True, only RPN related losses are computed (i.e., - `rpn_localization_loss` and `rpn_objectness_loss`). Otherwise all - losses are computed. - - Args: - prediction_dict: a dictionary holding prediction tensors (see the - documentation for the predict method. If first_stage_only=True, we - expect prediction_dict to contain `rpn_box_encodings`, - `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`, - `image_shape`, and `anchors` fields. Otherwise we expect - prediction_dict to additionally contain `refined_box_encodings`, - `class_predictions_with_background`, `num_proposals`, and - `proposal_boxes` fields. - scope: Optional scope name. - - Returns: - a dictionary mapping loss keys (`first_stage_localization_loss`, - `first_stage_objectness_loss`, 'second_stage_localization_loss', - 'second_stage_classification_loss') to scalar tensors representing - corresponding loss values. - """ - with tf.name_scope(scope, 'Loss', prediction_dict.values()): - (groundtruth_boxlists, groundtruth_classes_with_background_list, - groundtruth_masks_list - ) = self._format_groundtruth_data(prediction_dict['image_shape']) - loss_dict = self._loss_rpn( - prediction_dict['rpn_box_encodings'], - prediction_dict['rpn_objectness_predictions_with_background'], - prediction_dict['anchors'], - groundtruth_boxlists, - groundtruth_classes_with_background_list) - if not self._first_stage_only: - loss_dict.update( - self._loss_box_classifier( - prediction_dict['refined_box_encodings'], - prediction_dict['class_predictions_with_background'], - prediction_dict['proposal_boxes'], - prediction_dict['num_proposals'], - groundtruth_boxlists, - groundtruth_classes_with_background_list, - prediction_dict['image_shape'], - prediction_dict.get('mask_predictions'), - groundtruth_masks_list, - )) - return loss_dict - - def _loss_rpn(self, - rpn_box_encodings, - rpn_objectness_predictions_with_background, - anchors, - groundtruth_boxlists, - groundtruth_classes_with_background_list): - """Computes scalar RPN loss tensors. - - Uses self._proposal_target_assigner to obtain regression and classification - targets for the first stage RPN, samples a "minibatch" of anchors to - participate in the loss computation, and returns the RPN losses. - - Args: - rpn_box_encodings: A 4-D float tensor of shape - [batch_size, num_anchors, self._box_coder.code_size] containing - predicted proposal box encodings. - rpn_objectness_predictions_with_background: A 2-D float tensor of shape - [batch_size, num_anchors, 2] containing objectness predictions - (logits) for each of the anchors with 0 corresponding to background - and 1 corresponding to object. - anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors - for the first stage RPN. Note that `num_anchors` can differ depending - on whether the model is created in training or inference mode. - groundtruth_boxlists: A list of BoxLists containing coordinates of the - groundtruth boxes. - groundtruth_classes_with_background_list: A list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the - class targets with the 0th index assumed to map to the background class. - - Returns: - a dictionary mapping loss keys (`first_stage_localization_loss`, - `first_stage_objectness_loss`) to scalar tensors representing - corresponding loss values. - """ - with tf.name_scope('RPNLoss'): - (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, _) = target_assigner.batch_assign_targets( - self._proposal_target_assigner, box_list.BoxList(anchors), - groundtruth_boxlists, len(groundtruth_boxlists)*[None]) - batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2) - - def _minibatch_subsample_fn(inputs): - cls_targets, cls_weights = inputs - return self._first_stage_sampler.subsample( - tf.cast(cls_weights, tf.bool), - self._first_stage_minibatch_size, tf.cast(cls_targets, tf.bool)) - batch_sampled_indices = tf.to_float(tf.map_fn( - _minibatch_subsample_fn, - [batch_cls_targets, batch_cls_weights], - dtype=tf.bool, - parallel_iterations=self._parallel_iterations, - back_prop=True)) - - # Normalize by number of examples in sampled minibatch - normalizer = tf.reduce_sum(batch_sampled_indices, axis=1) - batch_one_hot_targets = tf.one_hot( - tf.to_int32(batch_cls_targets), depth=2) - sampled_reg_indices = tf.multiply(batch_sampled_indices, - batch_reg_weights) - - localization_losses = self._first_stage_localization_loss( - rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices) - objectness_losses = self._first_stage_objectness_loss( - rpn_objectness_predictions_with_background, - batch_one_hot_targets, weights=batch_sampled_indices) - localization_loss = tf.reduce_mean( - tf.reduce_sum(localization_losses, axis=1) / normalizer) - objectness_loss = tf.reduce_mean( - tf.reduce_sum(objectness_losses, axis=1) / normalizer) - loss_dict = {} - - with tf.name_scope('localization_loss'): - loss_dict['first_stage_localization_loss'] = ( - self._first_stage_loc_loss_weight * localization_loss) - with tf.name_scope('objectness_loss'): - loss_dict['first_stage_objectness_loss'] = ( - self._first_stage_obj_loss_weight * objectness_loss) - return loss_dict - - def _loss_box_classifier(self, - refined_box_encodings, - class_predictions_with_background, - proposal_boxes, - num_proposals, - groundtruth_boxlists, - groundtruth_classes_with_background_list, - image_shape, - prediction_masks=None, - groundtruth_masks_list=None): - """Computes scalar box classifier loss tensors. - - Uses self._detector_target_assigner to obtain regression and classification - targets for the second stage box classifier, optionally performs - hard mining, and returns losses. All losses are computed independently - for each image and then averaged across the batch. - Please note that for boxes and masks with multiple labels, the box - regression and mask prediction losses are only computed for one label. - - This function assumes that the proposal boxes in the "padded" regions are - actually zero (and thus should not be matched to). - - - Args: - refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, box_coder.code_size] representing - predicted (final) refined box encodings. - class_predictions_with_background: a 2-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors. Note that this tensor - *includes* background class predictions (at class index 0). - proposal_boxes: [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - groundtruth_boxlists: a list of BoxLists containing coordinates of the - groundtruth boxes. - groundtruth_classes_with_background_list: a list of 2-D one-hot - (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the - class targets with the 0th index assumed to map to the background class. - image_shape: a 1-D tensor of shape [4] representing the image shape. - prediction_masks: an optional 4-D tensor with shape [total_num_proposals, - num_classes, mask_height, mask_width] containing the instance masks for - each box. - groundtruth_masks_list: an optional list of 3-D tensors of shape - [num_boxes, image_height, image_width] containing the instance masks for - each of the boxes. - - Returns: - a dictionary mapping loss keys ('second_stage_localization_loss', - 'second_stage_classification_loss') to scalar tensors representing - corresponding loss values. - - Raises: - ValueError: if `predict_instance_masks` in - second_stage_mask_rcnn_box_predictor is True and - `groundtruth_masks_list` is not provided. - """ - with tf.name_scope('BoxClassifierLoss'): - paddings_indicator = self._padded_batched_proposals_indicator( - num_proposals, self.max_num_proposals) - proposal_boxlists = [ - box_list.BoxList(proposal_boxes_single_image) - for proposal_boxes_single_image in tf.unstack(proposal_boxes)] - batch_size = len(proposal_boxlists) - - num_proposals_or_one = tf.to_float(tf.expand_dims( - tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1)) - normalizer = tf.tile(num_proposals_or_one, - [1, self.max_num_proposals]) * batch_size - - (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets, - batch_reg_weights, _) = target_assigner.batch_assign_targets( - self._detector_target_assigner, proposal_boxlists, - groundtruth_boxlists, groundtruth_classes_with_background_list) - - # We only predict refined location encodings for the non background - # classes, but we now pad it to make it compatible with the class - # predictions - flat_cls_targets_with_background = tf.reshape( - batch_cls_targets_with_background, - [batch_size * self.max_num_proposals, -1]) - refined_box_encodings_with_background = tf.pad( - refined_box_encodings, [[0, 0], [1, 0], [0, 0]]) - # For anchors with multiple labels, picks refined_location_encodings - # for just one class to avoid over-counting for regression loss and - # (optionally) mask loss. - one_hot_flat_cls_targets_with_background = tf.argmax( - flat_cls_targets_with_background, axis=1) - one_hot_flat_cls_targets_with_background = tf.one_hot( - one_hot_flat_cls_targets_with_background, - flat_cls_targets_with_background.get_shape()[1]) - refined_box_encodings_masked_by_class_targets = tf.boolean_mask( - refined_box_encodings_with_background, - tf.greater(one_hot_flat_cls_targets_with_background, 0)) - class_predictions_with_background = tf.reshape( - class_predictions_with_background, - [batch_size, self.max_num_proposals, -1]) - reshaped_refined_box_encodings = tf.reshape( - refined_box_encodings_masked_by_class_targets, - [batch_size, -1, 4]) - - second_stage_loc_losses = self._second_stage_localization_loss( - reshaped_refined_box_encodings, - batch_reg_targets, weights=batch_reg_weights) / normalizer - second_stage_cls_losses = self._second_stage_classification_loss( - class_predictions_with_background, - batch_cls_targets_with_background, - weights=batch_cls_weights) / normalizer - second_stage_loc_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_loc_losses, paddings_indicator)) - second_stage_cls_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_cls_losses, paddings_indicator)) - - if self._hard_example_miner: - (second_stage_loc_loss, second_stage_cls_loss - ) = self._unpad_proposals_and_apply_hard_mining( - proposal_boxlists, second_stage_loc_losses, - second_stage_cls_losses, num_proposals) - loss_dict = {} - with tf.name_scope('localization_loss'): - loss_dict['second_stage_localization_loss'] = ( - self._second_stage_loc_loss_weight * second_stage_loc_loss) - - with tf.name_scope('classification_loss'): - loss_dict['second_stage_classification_loss'] = ( - self._second_stage_cls_loss_weight * second_stage_cls_loss) - - second_stage_mask_loss = None - if prediction_masks is not None: - if groundtruth_masks_list is None: - raise ValueError('Groundtruth instance masks not provided. ' - 'Please configure input reader.') - - # Create a new target assigner that matches the proposals to groundtruth - # and returns the mask targets. - # TODO: Move `unmatched_cls_target` from constructor to assign function. - # This will enable reuse of a single target assigner for both class - # targets and mask targets. - mask_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', 'detection', - unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32)) - (batch_mask_targets, _, _, - batch_mask_target_weights, _) = target_assigner.batch_assign_targets( - mask_target_assigner, proposal_boxlists, - groundtruth_boxlists, groundtruth_masks_list) - - # Pad the prediction_masks with to add zeros for background class to be - # consistent with class predictions. - prediction_masks_with_background = tf.pad( - prediction_masks, [[0, 0], [1, 0], [0, 0], [0, 0]]) - prediction_masks_masked_by_class_targets = tf.boolean_mask( - prediction_masks_with_background, - tf.greater(one_hot_flat_cls_targets_with_background, 0)) - mask_height = prediction_masks.shape[2].value - mask_width = prediction_masks.shape[3].value - reshaped_prediction_masks = tf.reshape( - prediction_masks_masked_by_class_targets, - [batch_size, -1, mask_height * mask_width]) - - batch_mask_targets_shape = tf.shape(batch_mask_targets) - flat_gt_masks = tf.reshape(batch_mask_targets, - [-1, batch_mask_targets_shape[2], - batch_mask_targets_shape[3]]) - - # Use normalized proposals to crop mask targets from image masks. - flat_normalized_proposals = box_list_ops.to_normalized_coordinates( - box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])), - image_shape[1], image_shape[2]).get() - - flat_cropped_gt_mask = tf.image.crop_and_resize( - tf.expand_dims(flat_gt_masks, -1), - flat_normalized_proposals, - tf.range(flat_normalized_proposals.shape[0].value), - [mask_height, mask_width]) - - batch_cropped_gt_mask = tf.reshape( - flat_cropped_gt_mask, - [batch_size, -1, mask_height * mask_width]) - - second_stage_mask_losses = self._second_stage_mask_loss( - reshaped_prediction_masks, - batch_cropped_gt_mask, - weights=batch_mask_target_weights) / ( - mask_height * mask_width * - tf.maximum(tf.reduce_sum(batch_mask_target_weights, axis=1, - keep_dims=True), - tf.ones((batch_size, 1)))) - second_stage_mask_loss = tf.reduce_sum( - tf.boolean_mask(second_stage_mask_losses, paddings_indicator)) - - if second_stage_mask_loss is not None: - with tf.name_scope('mask_loss'): - loss_dict['second_stage_mask_loss'] = ( - self._second_stage_mask_loss_weight * second_stage_mask_loss) - return loss_dict - - def _padded_batched_proposals_indicator(self, - num_proposals, - max_num_proposals): - """Creates indicator matrix of non-pad elements of padded batch proposals. - - Args: - num_proposals: Tensor of type tf.int32 with shape [batch_size]. - max_num_proposals: Maximum number of proposals per image (integer). - - Returns: - A Tensor of type tf.bool with shape [batch_size, max_num_proposals]. - """ - batch_size = tf.size(num_proposals) - tiled_num_proposals = tf.tile( - tf.expand_dims(num_proposals, 1), [1, max_num_proposals]) - tiled_proposal_index = tf.tile( - tf.expand_dims(tf.range(max_num_proposals), 0), [batch_size, 1]) - return tf.greater(tiled_num_proposals, tiled_proposal_index) - - def _unpad_proposals_and_apply_hard_mining(self, - proposal_boxlists, - second_stage_loc_losses, - second_stage_cls_losses, - num_proposals): - """Unpads proposals and applies hard mining. - - Args: - proposal_boxlists: A list of `batch_size` BoxLists each representing - `self.max_num_proposals` representing decoded proposal bounding boxes - for each image. - second_stage_loc_losses: A Tensor of type `float32`. A tensor of shape - `[batch_size, self.max_num_proposals]` representing per-anchor - second stage localization loss values. - second_stage_cls_losses: A Tensor of type `float32`. A tensor of shape - `[batch_size, self.max_num_proposals]` representing per-anchor - second stage classification loss values. - num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch] - representing the number of proposals predicted for each image in - the batch. - - Returns: - second_stage_loc_loss: A scalar float32 tensor representing the second - stage localization loss. - second_stage_cls_loss: A scalar float32 tensor representing the second - stage classification loss. - """ - for (proposal_boxlist, single_image_loc_loss, single_image_cls_loss, - single_image_num_proposals) in zip( - proposal_boxlists, - tf.unstack(second_stage_loc_losses), - tf.unstack(second_stage_cls_losses), - tf.unstack(num_proposals)): - proposal_boxlist = box_list.BoxList( - tf.slice(proposal_boxlist.get(), - [0, 0], [single_image_num_proposals, -1])) - single_image_loc_loss = tf.slice(single_image_loc_loss, - [0], [single_image_num_proposals]) - single_image_cls_loss = tf.slice(single_image_cls_loss, - [0], [single_image_num_proposals]) - return self._hard_example_miner( - location_losses=tf.expand_dims(single_image_loc_loss, 0), - cls_losses=tf.expand_dims(single_image_cls_loss, 0), - decoded_boxlist_list=[proposal_boxlist]) - - def restore_map(self, from_detection_checkpoint=True): - """Returns a map of variables to load from a foreign checkpoint. - - See parent class for details. - - Args: - from_detection_checkpoint: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - if not from_detection_checkpoint: - return self._feature_extractor.restore_from_classification_checkpoint_fn( - self.first_stage_feature_extractor_scope, - self.second_stage_feature_extractor_scope) - - variables_to_restore = tf.global_variables() - variables_to_restore.append(slim.get_or_create_global_step()) - # Only load feature extractor variables to be consistent with loading from - # a classification checkpoint. - feature_extractor_variables = tf.contrib.framework.filter_variables( - variables_to_restore, - include_patterns=[self.first_stage_feature_extractor_scope, - self.second_stage_feature_extractor_scope]) - return {var.op.name: var for var in feature_extractor_variables} diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py b/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py deleted file mode 100644 index b31a22db..00000000 --- a/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" - -import numpy as np -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib - - -class FasterRCNNMetaArchTest( - faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase): - - def test_postprocess_second_stage_only_inference_mode_with_masks(self): - model = self._build_model( - is_training=False, first_stage_only=False, second_stage_batch_size=6) - - batch_size = 2 - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = tf.constant( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32) - num_proposals = tf.constant([3, 2], dtype=tf.int32) - refined_box_encodings = tf.zeros( - [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32) - class_predictions_with_background = tf.ones( - [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32) - image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32) - - mask_height = 2 - mask_width = 2 - mask_predictions = 30. * tf.ones( - [total_num_padded_proposals, model.num_classes, - mask_height, mask_width], dtype=tf.float32) - exp_detection_masks = np.array([[[[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]]], - [[[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[1, 1], [1, 1]], - [[0, 0], [0, 0]]]]) - - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': proposal_boxes, - 'image_shape': image_shape, - 'mask_predictions': mask_predictions - }) - with self.test_session() as sess: - detections_out = sess.run(detections) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllClose(detections_out['detection_scores'], - [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) - self.assertAllClose(detections_out['detection_classes'], - [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) - self.assertAllClose(detections_out['num_detections'], [5, 4]) - self.assertAllClose(detections_out['detection_masks'], - exp_detection_masks) - - def _get_box_classifier_features_shape(self, - image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - num_features): - return (batch_size * max_num_proposals, - initial_crop_size/maxpool_stride, - initial_crop_size/maxpool_stride, - num_features) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py b/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py deleted file mode 100644 index 1e84dad3..00000000 --- a/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py +++ /dev/null @@ -1,1257 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" -import numpy as np -import tensorflow as tf -from google.protobuf import text_format -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.builders import post_processing_builder -from object_detection.core import losses -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.protos import box_predictor_pb2 -from object_detection.protos import hyperparams_pb2 -from object_detection.protos import post_processing_pb2 - -slim = tf.contrib.slim -BOX_CODE_SIZE = 4 - - -class FakeFasterRCNNFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Fake feature extracture to use in tests.""" - - def __init__(self): - super(FakeFasterRCNNFeatureExtractor, self).__init__( - is_training=False, - first_stage_features_stride=32, - reuse_weights=None, - weight_decay=0.0) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def _extract_proposal_features(self, preprocessed_inputs, scope): - with tf.variable_scope('mock_model'): - return 0 * slim.conv2d(preprocessed_inputs, - num_outputs=3, kernel_size=1, scope='layer1') - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - with tf.variable_scope('mock_model'): - return 0 * slim.conv2d(proposal_feature_maps, - num_outputs=3, kernel_size=1, scope='layer2') - - -class FasterRCNNMetaArchTestBase(tf.test.TestCase): - """Base class to test Faster R-CNN and R-FCN meta architectures.""" - - def _build_arg_scope_with_hyperparams(self, - hyperparams_text_proto, - is_training): - hyperparams = hyperparams_pb2.Hyperparams() - text_format.Merge(hyperparams_text_proto, hyperparams) - return hyperparams_builder.build(hyperparams, is_training=is_training) - - def _get_second_stage_box_predictor_text_proto(self): - box_predictor_text_proto = """ - mask_rcnn_box_predictor { - fc_hyperparams { - op: FC - activation: NONE - regularizer { - l2_regularizer { - weight: 0.0005 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - """ - return box_predictor_text_proto - - def _get_second_stage_box_predictor(self, num_classes, is_training): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge(self._get_second_stage_box_predictor_text_proto(), - box_predictor_proto) - return box_predictor_builder.build( - hyperparams_builder.build, - box_predictor_proto, - num_classes=num_classes, - is_training=is_training) - - def _get_model(self, box_predictor, **common_kwargs): - return faster_rcnn_meta_arch.FasterRCNNMetaArch( - initial_crop_size=3, - maxpool_kernel_size=1, - maxpool_stride=1, - second_stage_mask_rcnn_box_predictor=box_predictor, - **common_kwargs) - - def _build_model(self, - is_training, - first_stage_only, - second_stage_batch_size, - first_stage_max_proposals=8, - num_classes=2, - hard_mining=False, - softmax_second_stage_classification_loss=True): - - def image_resizer_fn(image): - return tf.identity(image) - - # anchors in this test are designed so that a subset of anchors are inside - # the image and a subset of anchors are outside. - first_stage_anchor_scales = (0.001, 0.005, 0.1) - first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) - first_stage_anchor_strides = (1, 1) - first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( - first_stage_anchor_scales, - first_stage_anchor_aspect_ratios, - anchor_stride=first_stage_anchor_strides) - - fake_feature_extractor = FakeFasterRCNNFeatureExtractor() - - first_stage_box_predictor_hyperparams_text_proto = """ - op: CONV - activation: RELU - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - } - } - """ - first_stage_box_predictor_arg_scope = ( - self._build_arg_scope_with_hyperparams( - first_stage_box_predictor_hyperparams_text_proto, is_training)) - - first_stage_box_predictor_kernel_size = 3 - first_stage_atrous_rate = 1 - first_stage_box_predictor_depth = 512 - first_stage_minibatch_size = 3 - first_stage_positive_balance_fraction = .5 - - first_stage_nms_score_threshold = -1.0 - first_stage_nms_iou_threshold = 1.0 - first_stage_max_proposals = first_stage_max_proposals - - first_stage_localization_loss_weight = 1.0 - first_stage_objectness_loss_weight = 1.0 - - post_processing_text_proto = """ - batch_non_max_suppression { - score_threshold: -20.0 - iou_threshold: 1.0 - max_detections_per_class: 5 - max_total_detections: 5 - } - """ - post_processing_config = post_processing_pb2.PostProcessing() - text_format.Merge(post_processing_text_proto, post_processing_config) - second_stage_non_max_suppression_fn, _ = post_processing_builder.build( - post_processing_config) - second_stage_balance_fraction = 1.0 - - second_stage_score_conversion_fn = tf.identity - second_stage_localization_loss_weight = 1.0 - second_stage_classification_loss_weight = 1.0 - if softmax_second_stage_classification_loss: - second_stage_classification_loss = ( - losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True)) - else: - second_stage_classification_loss = ( - losses.WeightedSigmoidClassificationLoss(anchorwise_output=True)) - - hard_example_miner = None - if hard_mining: - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=1, - iou_threshold=0.99, - loss_type='both', - cls_loss_weight=second_stage_classification_loss_weight, - loc_loss_weight=second_stage_localization_loss_weight, - max_negatives_per_positive=None) - - common_kwargs = { - 'is_training': is_training, - 'num_classes': num_classes, - 'image_resizer_fn': image_resizer_fn, - 'feature_extractor': fake_feature_extractor, - 'first_stage_only': first_stage_only, - 'first_stage_anchor_generator': first_stage_anchor_generator, - 'first_stage_atrous_rate': first_stage_atrous_rate, - 'first_stage_box_predictor_arg_scope': - first_stage_box_predictor_arg_scope, - 'first_stage_box_predictor_kernel_size': - first_stage_box_predictor_kernel_size, - 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, - 'first_stage_minibatch_size': first_stage_minibatch_size, - 'first_stage_positive_balance_fraction': - first_stage_positive_balance_fraction, - 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, - 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, - 'first_stage_max_proposals': first_stage_max_proposals, - 'first_stage_localization_loss_weight': - first_stage_localization_loss_weight, - 'first_stage_objectness_loss_weight': - first_stage_objectness_loss_weight, - 'second_stage_batch_size': second_stage_batch_size, - 'second_stage_balance_fraction': second_stage_balance_fraction, - 'second_stage_non_max_suppression_fn': - second_stage_non_max_suppression_fn, - 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, - 'second_stage_localization_loss_weight': - second_stage_localization_loss_weight, - 'second_stage_classification_loss_weight': - second_stage_classification_loss_weight, - 'second_stage_classification_loss': - second_stage_classification_loss, - 'hard_example_miner': hard_example_miner} - - return self._get_model(self._get_second_stage_box_predictor( - num_classes=num_classes, is_training=is_training), **common_kwargs) - - def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only( - self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=False, first_stage_only=True, second_stage_batch_size=2) - batch_size = 2 - height = 10 - width = 12 - input_image_shape = (batch_size, height, width, 3) - - preprocessed_inputs = tf.placeholder(dtype=tf.float32, - shape=(batch_size, None, None, 3)) - prediction_dict = model.predict(preprocessed_inputs) - - # In inference mode, anchors are clipped to the image window, but not - # pruned. Since MockFasterRCNN.extract_proposal_features returns a - # tensor with the same shape as its input, the expected number of anchors - # is height * width * the number of anchors per location (i.e. 3x3). - expected_num_anchors = height * width * 3 * 3 - expected_output_keys = set([ - 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape', - 'rpn_box_encodings', 'rpn_objectness_predictions_with_background', - 'anchors']) - expected_output_shapes = { - 'rpn_box_predictor_features': (batch_size, height, width, 512), - 'rpn_features_to_crop': (batch_size, height, width, 3), - 'rpn_box_encodings': (batch_size, expected_num_anchors, 4), - 'rpn_objectness_predictions_with_background': - (batch_size, expected_num_anchors, 2), - 'anchors': (expected_num_anchors, 4) - } - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_inputs: - np.zeros(input_image_shape) - }) - - self.assertEqual(set(prediction_out.keys()), expected_output_keys) - - self.assertAllEqual(prediction_out['image_shape'], input_image_shape) - for output_key, expected_shape in expected_output_shapes.items(): - self.assertAllEqual(prediction_out[output_key].shape, expected_shape) - - # Check that anchors are clipped to window. - anchors = prediction_out['anchors'] - self.assertTrue(np.all(np.greater_equal(anchors, 0))) - self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 1], width))) - self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) - - def test_predict_gives_valid_anchors_in_training_mode_first_stage_only(self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=True, first_stage_only=True, second_stage_batch_size=2) - batch_size = 2 - height = 10 - width = 12 - input_image_shape = (batch_size, height, width, 3) - preprocessed_inputs = tf.placeholder(dtype=tf.float32, - shape=(batch_size, None, None, 3)) - prediction_dict = model.predict(preprocessed_inputs) - - expected_output_keys = set([ - 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape', - 'rpn_box_encodings', 'rpn_objectness_predictions_with_background', - 'anchors']) - # At training time, anchors that exceed image bounds are pruned. Thus - # the `expected_num_anchors` in the above inference mode test is now - # a strict upper bound on the number of anchors. - num_anchors_strict_upper_bound = height * width * 3 * 3 - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_inputs: - np.zeros(input_image_shape) - }) - - self.assertEqual(set(prediction_out.keys()), expected_output_keys) - self.assertAllEqual(prediction_out['image_shape'], input_image_shape) - - # Check that anchors have less than the upper bound and - # are clipped to window. - anchors = prediction_out['anchors'] - self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4) - num_anchors_out = anchors.shape[0] - self.assertTrue(num_anchors_out < num_anchors_strict_upper_bound) - - self.assertTrue(np.all(np.greater_equal(anchors, 0))) - self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 1], width))) - self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) - self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) - - self.assertAllEqual(prediction_out['rpn_box_encodings'].shape, - (batch_size, num_anchors_out, 4)) - self.assertAllEqual( - prediction_out['rpn_objectness_predictions_with_background'].shape, - (batch_size, num_anchors_out, 2)) - - def test_predict_correct_shapes_in_inference_mode_both_stages( - self): - batch_size = 2 - image_size = 10 - max_num_proposals = 8 - initial_crop_size = 3 - maxpool_stride = 1 - - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - expected_num_anchors = image_size * image_size * 3 * 3 - expected_shapes = { - 'rpn_box_predictor_features': - (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'rpn_box_encodings': (2, expected_num_anchors, 4), - 'rpn_objectness_predictions_with_background': - (2, expected_num_anchors, 2), - 'anchors': (expected_num_anchors, 4), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape(image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - 3) - } - - for input_shape in input_shapes: - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=False, first_stage_only=False, - second_stage_batch_size=2) - preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape) - result_tensor_dict = model.predict(preprocessed_inputs) - init_op = tf.global_variables_initializer() - with self.test_session(graph=test_graph) as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict, feed_dict={ - preprocessed_inputs: - np.zeros((batch_size, image_size, image_size, 3))}) - self.assertEqual(set(tensor_dict_out.keys()), - set(expected_shapes.keys())) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - - def test_predict_gives_correct_shapes_in_train_mode_both_stages(self): - test_graph = tf.Graph() - with test_graph.as_default(): - model = self._build_model( - is_training=True, first_stage_only=False, second_stage_batch_size=7) - - batch_size = 2 - image_size = 10 - max_num_proposals = 7 - initial_crop_size = 3 - maxpool_stride = 1 - - image_shape = (batch_size, image_size, image_size, 3) - preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32) - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [ - tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - - result_tensor_dict = model.predict(preprocessed_inputs) - expected_shapes = { - 'rpn_box_predictor_features': - (2, image_size, image_size, 512), - 'rpn_features_to_crop': (2, image_size, image_size, 3), - 'image_shape': (4,), - 'refined_box_encodings': (2 * max_num_proposals, 2, 4), - 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1), - 'num_proposals': (2,), - 'proposal_boxes': (2, max_num_proposals, 4), - 'proposal_boxes_normalized': (2, max_num_proposals, 4), - 'box_classifier_features': - self._get_box_classifier_features_shape(image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - 3) - } - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - tensor_dict_out = sess.run(result_tensor_dict) - self.assertEqual(set(tensor_dict_out.keys()), - set(expected_shapes.keys()).union(set([ - 'rpn_box_encodings', - 'rpn_objectness_predictions_with_background', - 'anchors']))) - for key in expected_shapes: - self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key]) - - anchors_shape_out = tensor_dict_out['anchors'].shape - self.assertEqual(2, len(anchors_shape_out)) - self.assertEqual(4, anchors_shape_out[1]) - num_anchors_out = anchors_shape_out[0] - self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape, - (2, num_anchors_out, 4)) - self.assertAllEqual( - tensor_dict_out['rpn_objectness_predictions_with_background'].shape, - (2, num_anchors_out, 2)) - - def test_postprocess_first_stage_only_inference_mode(self): - model = self._build_model( - is_training=False, first_stage_only=True, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - proposals = model.postprocess({ - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'rpn_features_to_crop': rpn_features_to_crop, - 'anchors': anchors, - 'image_shape': image_shape}) - expected_proposal_boxes = [ - [[0, 0, .5, .5], [.5, .5, 1, 1], [0, .5, .5, 1], [.5, 0, 1.0, .5]] - + 4 * [4 * [0]], - [[0, .5, .5, 1], [.5, 0, 1.0, .5], [0, 0, .5, .5], [.5, .5, 1, 1]] - + 4 * [4 * [0]]] - expected_proposal_scores = [[1, 1, 0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0, 0, 0]] - expected_num_proposals = [4, 4] - - expected_output_keys = set(['detection_boxes', 'detection_scores', - 'num_detections']) - self.assertEqual(set(proposals.keys()), expected_output_keys) - with self.test_session() as sess: - proposals_out = sess.run(proposals) - self.assertAllClose(proposals_out['detection_boxes'], - expected_proposal_boxes) - self.assertAllClose(proposals_out['detection_scores'], - expected_proposal_scores) - self.assertAllEqual(proposals_out['num_detections'], - expected_num_proposals) - - def test_postprocess_first_stage_only_train_mode(self): - model = self._build_model( - is_training=True, first_stage_only=True, second_stage_batch_size=2) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [-10, 12], - [-10, 11], - [-10, 10]], - [[-10, 13], - [-10, 12], - [-10, 11], - [-10, 10]]], dtype=tf.float32) - rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - proposals = model.postprocess({ - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'rpn_features_to_crop': rpn_features_to_crop, - 'anchors': anchors, - 'image_shape': image_shape}) - expected_proposal_boxes = [ - [[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]] - expected_proposal_scores = [[1, 1], - [1, 1]] - expected_num_proposals = [2, 2] - - expected_output_keys = set(['detection_boxes', 'detection_scores', - 'num_detections']) - self.assertEqual(set(proposals.keys()), expected_output_keys) - - with self.test_session() as sess: - proposals_out = sess.run(proposals) - self.assertAllClose(proposals_out['detection_boxes'], - expected_proposal_boxes) - self.assertAllClose(proposals_out['detection_scores'], - expected_proposal_scores) - self.assertAllEqual(proposals_out['num_detections'], - expected_num_proposals) - - def test_postprocess_second_stage_only_inference_mode(self): - num_proposals_shapes = [(2), (None)] - refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)] - class_predictions_with_background_shapes = [(16, 3), (None, 3)] - proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)] - batch_size = 2 - image_shape = np.array((2, 36, 48, 3), dtype=np.int32) - for (num_proposals_shape, refined_box_encoding_shape, - class_predictions_with_background_shape, - proposal_boxes_shape) in zip(num_proposals_shapes, - refined_box_encodings_shapes, - class_predictions_with_background_shapes, - proposal_boxes_shapes): - tf_graph = tf.Graph() - with tf_graph.as_default(): - model = self._build_model( - is_training=False, first_stage_only=False, - second_stage_batch_size=6) - total_num_padded_proposals = batch_size * model.max_num_proposals - proposal_boxes = np.array( - [[[1, 1, 2, 3], - [0, 0, 1, 1], - [.5, .5, .6, .6], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]], - [[2, 3, 6, 8], - [1, 2, 5, 3], - 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]]) - num_proposals = np.array([3, 2], dtype=np.int32) - refined_box_encodings = np.zeros( - [total_num_padded_proposals, model.num_classes, 4]) - class_predictions_with_background = np.ones( - [total_num_padded_proposals, model.num_classes+1]) - - num_proposals_placeholder = tf.placeholder(tf.int32, - shape=num_proposals_shape) - refined_box_encodings_placeholder = tf.placeholder( - tf.float32, shape=refined_box_encoding_shape) - class_predictions_with_background_placeholder = tf.placeholder( - tf.float32, shape=class_predictions_with_background_shape) - proposal_boxes_placeholder = tf.placeholder( - tf.float32, shape=proposal_boxes_shape) - image_shape_placeholder = tf.placeholder(tf.int32, shape=(4)) - - detections = model.postprocess({ - 'refined_box_encodings': refined_box_encodings_placeholder, - 'class_predictions_with_background': - class_predictions_with_background_placeholder, - 'num_proposals': num_proposals_placeholder, - 'proposal_boxes': proposal_boxes_placeholder, - 'image_shape': image_shape_placeholder, - }) - with self.test_session(graph=tf_graph) as sess: - detections_out = sess.run( - detections, - feed_dict={ - refined_box_encodings_placeholder: refined_box_encodings, - class_predictions_with_background_placeholder: - class_predictions_with_background, - num_proposals_placeholder: num_proposals, - proposal_boxes_placeholder: proposal_boxes, - image_shape_placeholder: image_shape - }) - self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4]) - self.assertAllClose(detections_out['detection_scores'], - [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]) - self.assertAllClose(detections_out['detection_classes'], - [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]) - self.assertAllClose(detections_out['num_detections'], [5, 4]) - - def test_preprocess_preserves_input_shapes(self): - image_shapes = [(3, None, None, 3), - (None, 10, 10, 3), - (None, None, None, 3)] - for image_shape in image_shapes: - model = self._build_model( - is_training=False, first_stage_only=False, second_stage_batch_size=6) - image_placeholder = tf.placeholder(tf.float32, shape=image_shape) - preprocessed_inputs = model.preprocess(image_placeholder) - self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape) - - # TODO: Split test into two - with and without masks. - def test_loss_first_stage_only_mode(self): - model = self._build_model( - is_training=True, first_stage_only=True, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict) - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0) - self.assertTrue('second_stage_localization_loss' not in loss_dict_out) - self.assertTrue('second_stage_classification_loss' not in loss_dict_out) - - # TODO: Split test into two - with and without masks. - def test_loss_full(self): - model = self._build_model( - is_training=True, first_stage_only=False, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [-10, 12]], - [[10, -10], - [-10, 13], - [-10, 12], - [10, -11]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - num_proposals = tf.constant([6, 6], dtype=tf.int32) - proposal_boxes = tf.constant( - 2 * [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32], - [0, 0, 16, 16], - [0, 16, 16, 32]]], dtype=tf.float32) - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [10, -10, -10], - [10, -10, -10], - [-10, -10, 10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], # second image - [-10, 10, -10], - [-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [-10, 10, -10]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32), - tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32), - tf.constant([[1, 0], [1, 0]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)), - dtype=tf.float32), - tf.convert_to_tensor(np.ones((2, 32, 32)), - dtype=tf.float32)] - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0) - - def test_loss_full_zero_padded_proposals(self): - model = self._build_model( - is_training=True, first_stage_only=False, second_stage_batch_size=6) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]],], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], - [10, -10, -10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)), - dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0) - - def test_loss_full_multiple_label_groundtruth(self): - model = self._build_model( - is_training=True, first_stage_only=False, second_stage_batch_size=6, - softmax_second_stage_classification_loss=False) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant([ - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]],], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - # second_stage_localization_loss should only be computed for predictions - # that match groundtruth. For multiple label groundtruth boxes, the loss - # should only be computed once for the label with the smaller index. - refined_box_encodings = tf.constant( - [[[0, 0, 0, 0], [1, 1, -1, -1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]], - [[1, 1, -1, -1], [1, 1, 1, 1]]], dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-100, 100, 100], - [100, -100, -100], - [100, -100, -100], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - mask_predictions_logits = 20 * tf.ones((batch_size * - model.max_num_proposals, - model.num_classes, - 14, 14), - dtype=tf.float32) - - groundtruth_boxes_list = [ - tf.constant([[0, 0, .5, .5]], dtype=tf.float32)] - # Box contains two ground truth labels. - groundtruth_classes_list = [tf.constant([[1, 1]], dtype=tf.float32)] - - # Set all elements of groundtruth mask to 1.0. In this case all proposal - # crops of the groundtruth masks should return a mask that covers the entire - # proposal. Thus, if mask_predictions_logits element values are all greater - # than 20, the loss should be zero. - groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)), - dtype=tf.float32)] - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals, - 'mask_predictions': mask_predictions_logits - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list) - loss_dict = model.loss(prediction_dict) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0) - - def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(self): - model = self._build_model( - is_training=True, first_stage_only=False, second_stage_batch_size=6) - batch_size = 2 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant( - [[[-10, 13], - [10, -10], - [10, -11], - [10, -12]], - [[-10, 13], - [10, -10], - [10, -11], - [10, -12]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer. - num_proposals = tf.constant([3, 2], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]], - [[0, 0, 16, 16], - [0, 16, 16, 32], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [10, -10, -10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0], - [-10, -10, 10], # second image - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0], - [0, 0, 0],], dtype=tf.float32) - - # The first groundtruth box is 4/5 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(4/5)) / num_proposals - # = 2 * (abs(5 * log(1/2)) - .5) / 3 - # The second groundtruth box is identical to the prediction and thus - # experiences zero loss. - # Total average loss is (abs(5 * log(1/2)) - .5) / 3. - groundtruth_boxes_list = [ - tf.constant([[0.05, 0.05, 0.45, 0.45]], dtype=tf.float32), - tf.constant([[0.0, 0.0, 0.5, 0.5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32), - tf.constant([[0, 1]], dtype=tf.float32)] - exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0 - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['first_stage_localization_loss'], - exp_loc_loss) - self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0) - self.assertAllClose(loss_dict_out['second_stage_localization_loss'], - exp_loc_loss) - self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0) - - def test_loss_with_hard_mining(self): - model = self._build_model(is_training=True, - first_stage_only=False, - second_stage_batch_size=None, - first_stage_max_proposals=6, - hard_mining=True) - batch_size = 1 - anchors = tf.constant( - [[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [16, 16, 32, 32]], dtype=tf.float32) - rpn_box_encodings = tf.zeros( - [batch_size, - anchors.get_shape().as_list()[0], - BOX_CODE_SIZE], dtype=tf.float32) - # use different numbers for the objectness category to break ties in - # order of boxes returned by NMS - rpn_objectness_predictions_with_background = tf.constant( - [[[-10, 13], - [-10, 12], - [10, -11], - [10, -12]]], dtype=tf.float32) - image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32) - - # box_classifier_batch_size is 6, but here we assume that the number of - # actual proposals (not counting zero paddings) is fewer (3). - num_proposals = tf.constant([3], dtype=tf.int32) - proposal_boxes = tf.constant( - [[[0, 0, 16, 16], - [0, 16, 16, 32], - [16, 0, 32, 16], - [0, 0, 0, 0], # begin paddings - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=tf.float32) - - refined_box_encodings = tf.zeros( - (batch_size * model.max_num_proposals, - model.num_classes, - BOX_CODE_SIZE), dtype=tf.float32) - class_predictions_with_background = tf.constant( - [[-10, 10, -10], # first image - [-10, -10, 10], - [10, -10, -10], - [0, 0, 0], # begin paddings - [0, 0, 0], - [0, 0, 0]], dtype=tf.float32) - - # The first groundtruth box is 4/5 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(4/5)) / num_proposals - # = 2 * (abs(5 * log(1/2)) - .5) / 3 - # The second groundtruth box is 46/50 of the anchor size in both directions - # experiencing a loss of: - # 2 * SmoothL1(5 * log(42/50)) / num_proposals - # = 2 * (.5(5 * log(.92))^2 - .5) / 3. - # Since the first groundtruth box experiences greater loss, and we have - # set num_hard_examples=1 in the HardMiner, the final localization loss - # corresponds to that of the first groundtruth box. - groundtruth_boxes_list = [ - tf.constant([[0.05, 0.05, 0.45, 0.45], - [0.02, 0.52, 0.48, 0.98],], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32)] - exp_loc_loss = 2 * (-5 * np.log(.8) - 0.5) / 3.0 - - prediction_dict = { - 'rpn_box_encodings': rpn_box_encodings, - 'rpn_objectness_predictions_with_background': - rpn_objectness_predictions_with_background, - 'image_shape': image_shape, - 'anchors': anchors, - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'proposal_boxes': proposal_boxes, - 'num_proposals': num_proposals - } - model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - loss_dict = model.loss(prediction_dict) - - with self.test_session() as sess: - loss_dict_out = sess.run(loss_dict) - self.assertAllClose(loss_dict_out['second_stage_localization_loss'], - exp_loc_loss) - self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0) - - def test_restore_map_for_classification_ckpt(self): - # Define mock tensorflow classification graph and save variables. - test_graph_classification = tf.Graph() - with test_graph_classification.as_default(): - image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3]) - with tf.variable_scope('mock_model'): - net = slim.conv2d(image, num_outputs=3, kernel_size=1, scope='layer1') - slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2') - - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session() as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Create tensorflow detection graph and load variables from - # classification checkpoint. - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - model = self._build_model( - is_training=False, first_stage_only=False, second_stage_batch_size=6) - - inputs_shape = (2, 20, 20, 3) - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs) - model.postprocess(prediction_dict) - var_map = model.restore_map(from_detection_checkpoint=False) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session() as sess: - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn(model.first_stage_feature_extractor_scope, var.name) - self.assertNotIn(model.second_stage_feature_extractor_scope, - var.name) - - def test_restore_map_for_detection_ckpt(self): - # Define first detection graph and save variables. - test_graph_detection1 = tf.Graph() - with test_graph_detection1.as_default(): - model = self._build_model( - is_training=False, first_stage_only=False, second_stage_batch_size=6) - inputs_shape = (2, 20, 20, 3) - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs = model.preprocess(inputs) - prediction_dict = model.predict(preprocessed_inputs) - model.postprocess(prediction_dict) - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session() as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Define second detection graph and restore variables. - test_graph_detection2 = tf.Graph() - with test_graph_detection2.as_default(): - model2 = self._build_model(is_training=False, first_stage_only=False, - second_stage_batch_size=6, num_classes=42) - - inputs_shape2 = (2, 20, 20, 3) - inputs2 = tf.to_float(tf.random_uniform( - inputs_shape2, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs2 = model2.preprocess(inputs2) - prediction_dict2 = model2.predict(preprocessed_inputs2) - model2.postprocess(prediction_dict2) - var_map = model2.restore_map(from_detection_checkpoint=True) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session() as sess: - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn(model2.first_stage_feature_extractor_scope, var.name) - self.assertNotIn(model2.second_stage_feature_extractor_scope, - var.name) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/meta_architectures/rfcn_meta_arch.py b/object_detection/meta_architectures/rfcn_meta_arch.py deleted file mode 100644 index a1154555..00000000 --- a/object_detection/meta_architectures/rfcn_meta_arch.py +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""R-FCN meta-architecture definition. - -R-FCN: Dai, Jifeng, et al. "R-FCN: Object Detection via Region-based -Fully Convolutional Networks." arXiv preprint arXiv:1605.06409 (2016). - -The R-FCN meta architecture is similar to Faster R-CNN and only differs in the -second stage. Hence this class inherits FasterRCNNMetaArch and overrides only -the `_predict_second_stage` method. - -Similar to Faster R-CNN we allow for two modes: first_stage_only=True and -first_stage_only=False. In the former setting, all of the user facing methods -(e.g., predict, postprocess, loss) can be used as if the model consisted -only of the RPN, returning class agnostic proposals (these can be thought of as -approximate detections with no associated class information). In the latter -setting, proposals are computed, then passed through a second stage -"box classifier" to yield (multi-class) detections. - -Implementations of R-FCN models must define a new FasterRCNNFeatureExtractor and -override three methods: `preprocess`, `_extract_proposal_features` (the first -stage of the model), and `_extract_box_classifier_features` (the second stage of -the model). Optionally, the `restore_fn` method can be overridden. See tests -for an example. - -See notes in the documentation of Faster R-CNN meta-architecture as they all -apply here. -""" -import tensorflow as tf - -from object_detection.core import box_predictor -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.utils import ops - - -class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): - """R-FCN Meta-architecture definition.""" - - def __init__(self, - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - first_stage_only, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - second_stage_rfcn_box_predictor, - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - hard_example_miner, - parallel_iterations=16): - """RFCNMetaArch Constructor. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - num_classes: Number of classes. Note that num_classes *does not* - include the background category, so if groundtruth labels take values - in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the - assigned classification targets can range from {0,... K}). - image_resizer_fn: A callable for image resizing. This callable always - takes a rank-3 image tensor (corresponding to a single image) and - returns a rank-3 image tensor, possibly with new spatial dimensions. - See builders/image_resizer_builder.py. - feature_extractor: A FasterRCNNFeatureExtractor object. - first_stage_only: Whether to construct only the Region Proposal Network - (RPN) part of the model. - first_stage_anchor_generator: An anchor_generator.AnchorGenerator object - (note that currently we only support - grid_anchor_generator.GridAnchorGenerator objects) - first_stage_atrous_rate: A single integer indicating the atrous rate for - the single convolution op which is applied to the `rpn_features_to_crop` - tensor to obtain a tensor to be used for box prediction. Some feature - extractors optionally allow for producing feature maps computed at - denser resolutions. The atrous rate is used to compensate for the - denser feature maps by using an effectively larger receptive field. - (This should typically be set to 1). - first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d, - separable_conv2d and fully_connected ops for the RPN box predictor. - first_stage_box_predictor_kernel_size: Kernel size to use for the - convolution op just prior to RPN box predictions. - first_stage_box_predictor_depth: Output depth for the convolution op - just prior to RPN box predictions. - first_stage_minibatch_size: The "batch size" to use for computing the - objectness and location loss of the region proposal network. This - "batch size" refers to the number of anchors selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - first_stage_positive_balance_fraction: Fraction of positive examples - per image for the RPN. The recommended value for Faster RCNN is 0.5. - first_stage_nms_score_threshold: Score threshold for non max suppression - for the Region Proposal Network (RPN). This value is expected to be in - [0, 1] as it is applied directly after a softmax transformation. The - recommended value for Faster R-CNN is 0. - first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold - for performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_max_proposals: Maximum number of boxes to retain after - performing Non-Max Suppression (NMS) on the boxes predicted by the - Region Proposal Network (RPN). - first_stage_localization_loss_weight: A float - first_stage_objectness_loss_weight: A float - second_stage_rfcn_box_predictor: RFCN box predictor to use for - second stage. - second_stage_batch_size: The batch size used for computing the - classification and refined location loss of the box classifier. This - "batch size" refers to the number of proposals selected as contributing - to the loss function for any given image within the image batch and is - only called "batch_size" due to terminology from the Faster R-CNN paper. - second_stage_balance_fraction: Fraction of positive examples to use - per image for the box classifier. The recommended value for Faster RCNN - is 0.25. - second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores`, optional `clip_window` and - optional (kwarg) `mask` inputs (with all other inputs already set) - and returns a dictionary containing tensors with keys: - `detection_boxes`, `detection_scores`, `detection_classes`, - `num_detections`, and (optionally) `detection_masks`. See - `post_processing.batch_multiclass_non_max_suppression` for the type and - shape of these tensors. - second_stage_score_conversion_fn: Callable elementwise nonlinearity - (that takes tensors as inputs and returns tensors). This is usually - used to convert logits to probabilities. - second_stage_localization_loss_weight: A float - second_stage_classification_loss_weight: A float - second_stage_classification_loss: A string indicating which loss function - to use, supports 'softmax' and 'sigmoid'. - hard_example_miner: A losses.HardExampleMiner object (can be None). - parallel_iterations: (Optional) The number of iterations allowed to run - in parallel for calls to tf.map_fn. - Raises: - ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` - ValueError: If first_stage_anchor_generator is not of type - grid_anchor_generator.GridAnchorGenerator. - """ - super(RFCNMetaArch, self).__init__( - is_training, - num_classes, - image_resizer_fn, - feature_extractor, - first_stage_only, - first_stage_anchor_generator, - first_stage_atrous_rate, - first_stage_box_predictor_arg_scope, - first_stage_box_predictor_kernel_size, - first_stage_box_predictor_depth, - first_stage_minibatch_size, - first_stage_positive_balance_fraction, - first_stage_nms_score_threshold, - first_stage_nms_iou_threshold, - first_stage_max_proposals, - first_stage_localization_loss_weight, - first_stage_objectness_loss_weight, - None, # initial_crop_size is not used in R-FCN - None, # maxpool_kernel_size is not use in R-FCN - None, # maxpool_stride is not use in R-FCN - None, # fully_connected_box_predictor is not used in R-FCN. - second_stage_batch_size, - second_stage_balance_fraction, - second_stage_non_max_suppression_fn, - second_stage_score_conversion_fn, - second_stage_localization_loss_weight, - second_stage_classification_loss_weight, - second_stage_classification_loss, - 1.0, # second stage mask prediction loss weight isn't used in R-FCN. - hard_example_miner, - parallel_iterations) - - self._rfcn_box_predictor = second_stage_rfcn_box_predictor - - def _predict_second_stage(self, rpn_box_encodings, - rpn_objectness_predictions_with_background, - rpn_features, - anchors, - image_shape): - """Predicts the output tensors from 2nd stage of FasterRCNN. - - Args: - rpn_box_encodings: 4-D float tensor of shape - [batch_size, num_valid_anchors, self._box_coder.code_size] containing - predicted boxes. - rpn_objectness_predictions_with_background: 2-D float tensor of shape - [batch_size, num_valid_anchors, 2] containing class - predictions (logits) for each of the anchors. Note that this - tensor *includes* background class predictions (at class index 0). - rpn_features: A 4-D float32 tensor with shape - [batch_size, height, width, depth] representing image features from the - RPN. - anchors: 2-D float tensor of shape - [num_anchors, self._box_coder.code_size]. - image_shape: A 1D int32 tensors of size [4] containing the image shape. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) refined_box_encodings: a 3-D tensor with shape - [total_num_proposals, num_classes, 4] representing predicted - (final) refined box encodings, where - total_num_proposals=batch_size*self._max_num_proposals - 2) class_predictions_with_background: a 3-D tensor with shape - [total_num_proposals, num_classes + 1] containing class - predictions (logits) for each of the anchors, where - total_num_proposals=batch_size*self._max_num_proposals. - Note that this tensor *includes* background class predictions - (at class index 0). - 3) num_proposals: An int32 tensor of shape [batch_size] representing the - number of proposals generated by the RPN. `num_proposals` allows us - to keep track of which entries are to be treated as zero paddings and - which are not since we always pad the number of proposals to be - `self.max_num_proposals` for each image. - 4) proposal_boxes: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing - decoded proposal bounding boxes (in absolute coordinates). - 5) proposal_boxes_normalized: A float32 tensor of shape - [batch_size, self.max_num_proposals, 4] representing decoded proposal - bounding boxes (in normalized coordinates). Can be used to override - the boxes proposed by the RPN, thus enabling one to extract box - classification and prediction for externally selected areas of the - image. - 6) box_classifier_features: a 4-D float32 tensor, of shape - [batch_size, feature_map_height, feature_map_width, depth], - representing the box classifier features. - """ - proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( - rpn_box_encodings, rpn_objectness_predictions_with_background, - anchors, image_shape) - - box_classifier_features = ( - self._feature_extractor.extract_box_classifier_features( - rpn_features, - scope=self.second_stage_feature_extractor_scope)) - - box_predictions = self._rfcn_box_predictor.predict( - box_classifier_features, - num_predictions_per_location=1, - scope=self.second_stage_box_predictor_scope, - proposal_boxes=proposal_boxes_normalized) - refined_box_encodings = tf.squeeze( - box_predictions[box_predictor.BOX_ENCODINGS], axis=1) - class_predictions_with_background = tf.squeeze( - box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], - axis=1) - - absolute_proposal_boxes = ops.normalized_to_image_coordinates( - proposal_boxes_normalized, image_shape, - parallel_iterations=self._parallel_iterations) - - prediction_dict = { - 'refined_box_encodings': refined_box_encodings, - 'class_predictions_with_background': - class_predictions_with_background, - 'num_proposals': num_proposals, - 'proposal_boxes': absolute_proposal_boxes, - 'box_classifier_features': box_classifier_features, - 'proposal_boxes_normalized': proposal_boxes_normalized, - } - return prediction_dict diff --git a/object_detection/meta_architectures/rfcn_meta_arch_test.py b/object_detection/meta_architectures/rfcn_meta_arch_test.py deleted file mode 100644 index 829140ac..00000000 --- a/object_detection/meta_architectures/rfcn_meta_arch_test.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.rfcn_meta_arch.""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib -from object_detection.meta_architectures import rfcn_meta_arch - - -class RFCNMetaArchTest( - faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase): - - def _get_second_stage_box_predictor_text_proto(self): - box_predictor_text_proto = """ - rfcn_box_predictor { - conv_hyperparams { - op: CONV - activation: NONE - regularizer { - l2_regularizer { - weight: 0.0005 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - """ - return box_predictor_text_proto - - def _get_model(self, box_predictor, **common_kwargs): - return rfcn_meta_arch.RFCNMetaArch( - second_stage_rfcn_box_predictor=box_predictor, **common_kwargs) - - def _get_box_classifier_features_shape(self, - image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - num_features): - return (batch_size, image_size, image_size, num_features) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/meta_architectures/ssd_meta_arch.py b/object_detection/meta_architectures/ssd_meta_arch.py deleted file mode 100644 index f15cc4af..00000000 --- a/object_detection/meta_architectures/ssd_meta_arch.py +++ /dev/null @@ -1,700 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SSD Meta-architecture definition. - -General tensorflow implementation of convolutional Multibox/SSD detection -models. -""" -from abc import abstractmethod - -import re -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_predictor as bpredictor -from object_detection.core import model -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner -from object_detection.utils import shape_utils -from object_detection.utils import visualization_utils - -slim = tf.contrib.slim - - -class SSDFeatureExtractor(object): - """SSD Feature Extractor definition.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams, - batch_norm_trainable=True, - reuse_weights=None): - """Constructor. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a small batch size - (e.g. 1), it is desirable to disable batch norm update and use - pretrained batch norm params. - reuse_weights: whether to reuse variables. Default is None. - """ - self._is_training = is_training - self._depth_multiplier = depth_multiplier - self._min_depth = min_depth - self._pad_to_multiple = pad_to_multiple - self._conv_hyperparams = conv_hyperparams - self._batch_norm_trainable = batch_norm_trainable - self._reuse_weights = reuse_weights - - @abstractmethod - def preprocess(self, resized_inputs): - """Preprocesses images for feature extraction (minus image resizing). - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - pass - - @abstractmethod - def extract_features(self, preprocessed_inputs): - """Extracts features from preprocessed inputs. - - This function is responsible for extracting feature maps from preprocessed - images. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - pass - - -class SSDMetaArch(model.DetectionModel): - """SSD Meta-architecture definition.""" - - def __init__(self, - is_training, - anchor_generator, - box_predictor, - box_coder, - feature_extractor, - matcher, - region_similarity_calculator, - image_resizer_fn, - non_max_suppression_fn, - score_conversion_fn, - classification_loss, - localization_loss, - classification_loss_weight, - localization_loss_weight, - normalize_loss_by_num_matches, - hard_example_miner, - add_summaries=True): - """SSDMetaArch Constructor. - - TODO: group NMS parameters + score converter into a class and loss - parameters into a class and write config protos for postprocessing - and losses. - - Args: - is_training: A boolean indicating whether the training version of the - computation graph should be constructed. - anchor_generator: an anchor_generator.AnchorGenerator object. - box_predictor: a box_predictor.BoxPredictor object. - box_coder: a box_coder.BoxCoder object. - feature_extractor: a SSDFeatureExtractor object. - matcher: a matcher.Matcher object. - region_similarity_calculator: a - region_similarity_calculator.RegionSimilarityCalculator object. - image_resizer_fn: a callable for image resizing. This callable always - takes a rank-3 image tensor (corresponding to a single image) and - returns a rank-3 image tensor, possibly with new spatial dimensions. - See builders/image_resizer_builder.py. - non_max_suppression_fn: batch_multiclass_non_max_suppression - callable that takes `boxes`, `scores` and optional `clip_window` - inputs (with all other inputs already set) and returns a dictionary - hold tensors with keys: `detection_boxes`, `detection_scores`, - `detection_classes` and `num_detections`. See `post_processing. - batch_multiclass_non_max_suppression` for the type and shape of these - tensors. - score_conversion_fn: callable elementwise nonlinearity (that takes tensors - as inputs and returns tensors). This is usually used to convert logits - to probabilities. - classification_loss: an object_detection.core.losses.Loss object. - localization_loss: a object_detection.core.losses.Loss object. - classification_loss_weight: float - localization_loss_weight: float - normalize_loss_by_num_matches: boolean - hard_example_miner: a losses.HardExampleMiner object (can be None) - add_summaries: boolean (default: True) controlling whether summary ops - should be added to tensorflow graph. - """ - super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes) - self._is_training = is_training - - # Needed for fine-tuning from classification checkpoints whose - # variables do not have the feature extractor scope. - self._extract_features_scope = 'FeatureExtractor' - - self._anchor_generator = anchor_generator - self._box_predictor = box_predictor - - self._box_coder = box_coder - self._feature_extractor = feature_extractor - self._matcher = matcher - self._region_similarity_calculator = region_similarity_calculator - - # TODO: handle agnostic mode and positive/negative class weights - unmatched_cls_target = None - unmatched_cls_target = tf.constant([1] + self.num_classes * [0], tf.float32) - self._target_assigner = target_assigner.TargetAssigner( - self._region_similarity_calculator, - self._matcher, - self._box_coder, - positive_class_weight=1.0, - negative_class_weight=1.0, - unmatched_cls_target=unmatched_cls_target) - - self._classification_loss = classification_loss - self._localization_loss = localization_loss - self._classification_loss_weight = classification_loss_weight - self._localization_loss_weight = localization_loss_weight - self._normalize_loss_by_num_matches = normalize_loss_by_num_matches - self._hard_example_miner = hard_example_miner - - self._image_resizer_fn = image_resizer_fn - self._non_max_suppression_fn = non_max_suppression_fn - self._score_conversion_fn = score_conversion_fn - - self._anchors = None - self._add_summaries = add_summaries - - @property - def anchors(self): - if not self._anchors: - raise RuntimeError('anchors have not been constructed yet!') - if not isinstance(self._anchors, box_list.BoxList): - raise RuntimeError('anchors should be a BoxList object, but is not.') - return self._anchors - - def preprocess(self, inputs): - """Feature-extractor specific preprocessing. - - See base class. - - Args: - inputs: a [batch, height_in, width_in, channels] float tensor representing - a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, height_out, width_out, channels] float - tensor representing a batch of images. - Raises: - ValueError: if inputs tensor does not have type tf.float32 - """ - if inputs.dtype is not tf.float32: - raise ValueError('`preprocess` expects a tf.float32 tensor') - with tf.name_scope('Preprocessor'): - # TODO: revisit whether to always use batch size as the number of parallel - # iterations vs allow for dynamic batching. - resized_inputs = tf.map_fn(self._image_resizer_fn, - elems=inputs, - dtype=tf.float32) - return self._feature_extractor.preprocess(resized_inputs) - - def predict(self, preprocessed_inputs): - """Predicts unpostprocessed tensors from input tensor. - - This function takes an input batch of images and runs it through the forward - pass of the network to yield unpostprocessesed predictions. - - A side effect of calling the predict method is that self._anchors is - populated with a box_list.BoxList of anchors. These anchors must be - constructed before the postprocess or loss functions can be called. - - Args: - preprocessed_inputs: a [batch, height, width, channels] image tensor. - - Returns: - prediction_dict: a dictionary holding "raw" prediction tensors: - 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions (at class index 0). - 3) feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i]. - 4) anchors: 2-D float tensor of shape [num_anchors, 4] containing - the generated anchors in normalized coordinates. - """ - with tf.variable_scope(None, self._extract_features_scope, - [preprocessed_inputs]): - feature_maps = self._feature_extractor.extract_features( - preprocessed_inputs) - feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps) - image_shape = tf.shape(preprocessed_inputs) - self._anchors = self._anchor_generator.generate( - feature_map_spatial_dims, - im_height=image_shape[1], - im_width=image_shape[2]) - (box_encodings, class_predictions_with_background - ) = self._add_box_predictions_to_feature_maps(feature_maps) - predictions_dict = { - 'box_encodings': box_encodings, - 'class_predictions_with_background': class_predictions_with_background, - 'feature_maps': feature_maps, - 'anchors': self._anchors.get() - } - return predictions_dict - - def _add_box_predictions_to_feature_maps(self, feature_maps): - """Adds box predictors to each feature map and returns concatenated results. - - Args: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - - Returns: - box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions (at class index 0). - - Raises: - RuntimeError: if the number of feature maps extracted via the - extract_features method does not match the length of the - num_anchors_per_locations list that was passed to the constructor. - RuntimeError: if box_encodings from the box_predictor does not have - shape of the form [batch_size, num_anchors, 1, code_size]. - """ - num_anchors_per_location_list = ( - self._anchor_generator.num_anchors_per_location()) - if len(feature_maps) != len(num_anchors_per_location_list): - raise RuntimeError('the number of feature maps must match the ' - 'length of self.anchors.NumAnchorsPerLocation().') - box_encodings_list = [] - cls_predictions_with_background_list = [] - for idx, (feature_map, num_anchors_per_location - ) in enumerate(zip(feature_maps, num_anchors_per_location_list)): - box_predictor_scope = 'BoxPredictor_{}'.format(idx) - box_predictions = self._box_predictor.predict(feature_map, - num_anchors_per_location, - box_predictor_scope) - box_encodings = box_predictions[bpredictor.BOX_ENCODINGS] - cls_predictions_with_background = box_predictions[ - bpredictor.CLASS_PREDICTIONS_WITH_BACKGROUND] - - box_encodings_shape = box_encodings.get_shape().as_list() - if len(box_encodings_shape) != 4 or box_encodings_shape[2] != 1: - raise RuntimeError('box_encodings from the box_predictor must be of ' - 'shape `[batch_size, num_anchors, 1, code_size]`; ' - 'actual shape', box_encodings_shape) - box_encodings = tf.squeeze(box_encodings, axis=2) - box_encodings_list.append(box_encodings) - cls_predictions_with_background_list.append( - cls_predictions_with_background) - - num_predictions = sum( - [tf.shape(box_encodings)[1] for box_encodings in box_encodings_list]) - num_anchors = self.anchors.num_boxes() - anchors_assert = tf.assert_equal(num_anchors, num_predictions, [ - 'Mismatch: number of anchors vs number of predictions', num_anchors, - num_predictions - ]) - with tf.control_dependencies([anchors_assert]): - box_encodings = tf.concat(box_encodings_list, 1) - class_predictions_with_background = tf.concat( - cls_predictions_with_background_list, 1) - return box_encodings, class_predictions_with_background - - def _get_feature_map_spatial_dims(self, feature_maps): - """Return list of spatial dimensions for each feature map in a list. - - Args: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i]. - - Returns: - a list of pairs (height, width) for each feature map in feature_maps - """ - feature_map_shapes = [ - shape_utils.combined_static_and_dynamic_shape( - feature_map) for feature_map in feature_maps - ] - return [(shape[1], shape[2]) for shape in feature_map_shapes] - - def postprocess(self, prediction_dict): - """Converts prediction tensors to final detections. - - This function converts raw predictions tensors to final detection results by - slicing off the background class, decoding box predictions and applying - non max suppression and clipping to the image window. - - See base class for output format conventions. Note also that by default, - scores are to be interpreted as logits, but if a score_conversion_fn is - used, then scores are remapped (and may thus have a different - interpretation). - - Args: - prediction_dict: a dictionary holding prediction tensors with - 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - - Returns: - detections: a dictionary containing the following fields - detection_boxes: [batch, max_detections, 4] - detection_scores: [batch, max_detections] - detection_classes: [batch, max_detections] - detection_keypoints: [batch, max_detections, num_keypoints, 2] (if - encoded in the prediction_dict 'box_encodings') - num_detections: [batch] - Raises: - ValueError: if prediction_dict does not contain `box_encodings` or - `class_predictions_with_background` fields. - """ - if ('box_encodings' not in prediction_dict or - 'class_predictions_with_background' not in prediction_dict): - raise ValueError('prediction_dict does not contain expected entries.') - with tf.name_scope('Postprocessor'): - box_encodings = prediction_dict['box_encodings'] - class_predictions = prediction_dict['class_predictions_with_background'] - detection_boxes, detection_keypoints = self._batch_decode(box_encodings) - detection_boxes = tf.expand_dims(detection_boxes, axis=2) - - class_predictions_without_background = tf.slice(class_predictions, - [0, 0, 1], - [-1, -1, -1]) - detection_scores = self._score_conversion_fn( - class_predictions_without_background) - clip_window = tf.constant([0, 0, 1, 1], tf.float32) - additional_fields = None - if detection_keypoints is not None: - additional_fields = { - fields.BoxListFields.keypoints: detection_keypoints} - (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields, - num_detections) = self._non_max_suppression_fn( - detection_boxes, - detection_scores, - clip_window=clip_window, - additional_fields=additional_fields) - detection_dict = {'detection_boxes': nmsed_boxes, - 'detection_scores': nmsed_scores, - 'detection_classes': nmsed_classes, - 'num_detections': tf.to_float(num_detections)} - if (nmsed_additional_fields is not None and - fields.BoxListFields.keypoints in nmsed_additional_fields): - detection_dict['detection_keypoints'] = nmsed_additional_fields[ - fields.BoxListFields.keypoints] - return detection_dict - - def loss(self, prediction_dict, scope=None): - """Compute scalar loss tensors with respect to provided groundtruth. - - Calling this function requires that groundtruth tensors have been - provided via the provide_groundtruth function. - - Args: - prediction_dict: a dictionary holding prediction tensors with - 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - scope: Optional scope name. - - Returns: - a dictionary mapping loss keys (`localization_loss` and - `classification_loss`) to scalar tensors representing corresponding loss - values. - """ - with tf.name_scope(scope, 'Loss', prediction_dict.values()): - keypoints = None - if self.groundtruth_has_field(fields.BoxListFields.keypoints): - keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints) - (batch_cls_targets, batch_cls_weights, batch_reg_targets, - batch_reg_weights, match_list) = self._assign_targets( - self.groundtruth_lists(fields.BoxListFields.boxes), - self.groundtruth_lists(fields.BoxListFields.classes), - keypoints) - if self._add_summaries: - self._summarize_input( - self.groundtruth_lists(fields.BoxListFields.boxes), match_list) - num_matches = tf.stack( - [match.num_matched_columns() for match in match_list]) - location_losses = self._localization_loss( - prediction_dict['box_encodings'], - batch_reg_targets, - ignore_nan_targets=True, - weights=batch_reg_weights) - cls_losses = self._classification_loss( - prediction_dict['class_predictions_with_background'], - batch_cls_targets, - weights=batch_cls_weights) - - if self._hard_example_miner: - (localization_loss, classification_loss) = self._apply_hard_mining( - location_losses, cls_losses, prediction_dict, match_list) - if self._add_summaries: - self._hard_example_miner.summarize() - else: - if self._add_summaries: - class_ids = tf.argmax(batch_cls_targets, axis=2) - flattened_class_ids = tf.reshape(class_ids, [-1]) - flattened_classification_losses = tf.reshape(cls_losses, [-1]) - self._summarize_anchor_classification_loss( - flattened_class_ids, flattened_classification_losses) - localization_loss = tf.reduce_sum(location_losses) - classification_loss = tf.reduce_sum(cls_losses) - - # Optionally normalize by number of positive matches - normalizer = tf.constant(1.0, dtype=tf.float32) - if self._normalize_loss_by_num_matches: - normalizer = tf.maximum(tf.to_float(tf.reduce_sum(num_matches)), 1.0) - - with tf.name_scope('localization_loss'): - localization_loss = ((self._localization_loss_weight / normalizer) * - localization_loss) - with tf.name_scope('classification_loss'): - classification_loss = ((self._classification_loss_weight / normalizer) * - classification_loss) - - loss_dict = { - 'localization_loss': localization_loss, - 'classification_loss': classification_loss - } - return loss_dict - - def _summarize_anchor_classification_loss(self, class_ids, cls_losses): - positive_indices = tf.where(tf.greater(class_ids, 0)) - positive_anchor_cls_loss = tf.squeeze( - tf.gather(cls_losses, positive_indices), axis=1) - visualization_utils.add_cdf_image_summary(positive_anchor_cls_loss, - 'PositiveAnchorLossCDF') - negative_indices = tf.where(tf.equal(class_ids, 0)) - negative_anchor_cls_loss = tf.squeeze( - tf.gather(cls_losses, negative_indices), axis=1) - visualization_utils.add_cdf_image_summary(negative_anchor_cls_loss, - 'NegativeAnchorLossCDF') - - def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list, - groundtruth_keypoints_list=None): - """Assign groundtruth targets. - - Adds a background class to each one-hot encoding of groundtruth classes - and uses target assigner to obtain regression and classification targets. - - Args: - groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] - containing coordinates of the groundtruth boxes. - Groundtruth boxes are provided in [y_min, x_min, y_max, x_max] - format and assumed to be normalized and clipped - relative to the image window with y_min <= y_max and x_min <= x_max. - groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of - shape [num_boxes, num_classes] containing the class targets with the 0th - index assumed to map to the first non-background class. - groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape - [num_boxes, num_keypoints, 2] - - Returns: - batch_cls_targets: a tensor with shape [batch_size, num_anchors, - num_classes], - batch_cls_weights: a tensor with shape [batch_size, num_anchors], - batch_reg_targets: a tensor with shape [batch_size, num_anchors, - box_code_dimension] - batch_reg_weights: a tensor with shape [batch_size, num_anchors], - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - """ - groundtruth_boxlists = [ - box_list.BoxList(boxes) for boxes in groundtruth_boxes_list - ] - groundtruth_classes_with_background_list = [ - tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') - for one_hot_encoding in groundtruth_classes_list - ] - if groundtruth_keypoints_list is not None: - for boxlist, keypoints in zip( - groundtruth_boxlists, groundtruth_keypoints_list): - boxlist.add_field(fields.BoxListFields.keypoints, keypoints) - return target_assigner.batch_assign_targets( - self._target_assigner, self.anchors, groundtruth_boxlists, - groundtruth_classes_with_background_list) - - def _summarize_input(self, groundtruth_boxes_list, match_list): - """Creates tensorflow summaries for the input boxes and anchors. - - This function creates four summaries corresponding to the average - number (over images in a batch) of (1) groundtruth boxes, (2) anchors - marked as positive, (3) anchors marked as negative, and (4) anchors marked - as ignored. - - Args: - groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] - containing corners of the groundtruth boxes. - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - """ - num_boxes_per_image = tf.stack( - [tf.shape(x)[0] for x in groundtruth_boxes_list]) - pos_anchors_per_image = tf.stack( - [match.num_matched_columns() for match in match_list]) - neg_anchors_per_image = tf.stack( - [match.num_unmatched_columns() for match in match_list]) - ignored_anchors_per_image = tf.stack( - [match.num_ignored_columns() for match in match_list]) - tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage', - tf.reduce_mean(tf.to_float(num_boxes_per_image))) - tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage', - tf.reduce_mean(tf.to_float(pos_anchors_per_image))) - tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage', - tf.reduce_mean(tf.to_float(neg_anchors_per_image))) - tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage', - tf.reduce_mean(tf.to_float(ignored_anchors_per_image))) - - def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict, - match_list): - """Applies hard mining to anchorwise losses. - - Args: - location_losses: Float tensor of shape [batch_size, num_anchors] - representing anchorwise location losses. - cls_losses: Float tensor of shape [batch_size, num_anchors] - representing anchorwise classification losses. - prediction_dict: p a dictionary holding prediction tensors with - 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, - box_code_dimension] containing predicted boxes. - 2) class_predictions_with_background: 3-D float tensor of shape - [batch_size, num_anchors, num_classes+1] containing class predictions - (logits) for each of the anchors. Note that this tensor *includes* - background class predictions. - match_list: a list of matcher.Match objects encoding the match between - anchors and groundtruth boxes for each image of the batch, - with rows of the Match objects corresponding to groundtruth boxes - and columns corresponding to anchors. - - Returns: - mined_location_loss: a float scalar with sum of localization losses from - selected hard examples. - mined_cls_loss: a float scalar with sum of classification losses from - selected hard examples. - """ - class_predictions = tf.slice( - prediction_dict['class_predictions_with_background'], [0, 0, - 1], [-1, -1, -1]) - - decoded_boxes, _ = self._batch_decode(prediction_dict['box_encodings']) - decoded_box_tensors_list = tf.unstack(decoded_boxes) - class_prediction_list = tf.unstack(class_predictions) - decoded_boxlist_list = [] - for box_location, box_score in zip(decoded_box_tensors_list, - class_prediction_list): - decoded_boxlist = box_list.BoxList(box_location) - decoded_boxlist.add_field('scores', box_score) - decoded_boxlist_list.append(decoded_boxlist) - return self._hard_example_miner( - location_losses=location_losses, - cls_losses=cls_losses, - decoded_boxlist_list=decoded_boxlist_list, - match_list=match_list) - - def _batch_decode(self, box_encodings): - """Decodes a batch of box encodings with respect to the anchors. - - Args: - box_encodings: A float32 tensor of shape - [batch_size, num_anchors, box_code_size] containing box encodings. - - Returns: - decoded_boxes: A float32 tensor of shape - [batch_size, num_anchors, 4] containing the decoded boxes. - decoded_keypoints: A float32 tensor of shape - [batch_size, num_anchors, num_keypoints, 2] containing the decoded - keypoints if present in the input `box_encodings`, None otherwise. - """ - combined_shape = shape_utils.combined_static_and_dynamic_shape( - box_encodings) - batch_size = combined_shape[0] - tiled_anchor_boxes = tf.tile( - tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1]) - tiled_anchors_boxlist = box_list.BoxList( - tf.reshape(tiled_anchor_boxes, [-1, 4])) - decoded_boxes = self._box_coder.decode( - tf.reshape(box_encodings, [-1, self._box_coder.code_size]), - tiled_anchors_boxlist) - decoded_keypoints = None - if decoded_boxes.has_field(fields.BoxListFields.keypoints): - decoded_keypoints = decoded_boxes.get_field( - fields.BoxListFields.keypoints) - num_keypoints = decoded_keypoints.get_shape()[1] - decoded_keypoints = tf.reshape( - decoded_keypoints, - tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2])) - decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack( - [combined_shape[0], combined_shape[1], 4])) - return decoded_boxes, decoded_keypoints - - def restore_map(self, from_detection_checkpoint=True): - """Returns a map of variables to load from a foreign checkpoint. - - See parent class for details. - - Args: - from_detection_checkpoint: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith(self._extract_features_scope): - var_name = variable.op.name - if not from_detection_checkpoint: - var_name = (re.split('^' + self._extract_features_scope + '/', - var_name)[-1]) - variables_to_restore[var_name] = variable - return variables_to_restore diff --git a/object_detection/meta_architectures/ssd_meta_arch_test.py b/object_detection/meta_architectures/ssd_meta_arch_test.py deleted file mode 100644 index 9112ed09..00000000 --- a/object_detection/meta_architectures/ssd_meta_arch_test.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.meta_architectures.ssd_meta_arch.""" -import functools -import numpy as np -import tensorflow as tf - -from object_detection.core import anchor_generator -from object_detection.core import box_list -from object_detection.core import losses -from object_detection.core import post_processing -from object_detection.core import region_similarity_calculator as sim_calc -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.utils import test_utils - -slim = tf.contrib.slim - - -class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - - def __init__(self): - super(FakeSSDFeatureExtractor, self).__init__( - is_training=True, - depth_multiplier=0, - min_depth=0, - pad_to_multiple=1, - batch_norm_trainable=True, - conv_hyperparams=None) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def extract_features(self, preprocessed_inputs): - with tf.variable_scope('mock_model'): - features = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32, - kernel_size=[1, 1], scope='layer1') - return [features] - - -class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator): - """Sets up a simple 2x2 anchor grid on the unit square.""" - - def name_scope(self): - return 'MockAnchorGenerator' - - def num_anchors_per_location(self): - return [1] - - def _generate(self, feature_map_shape_list, im_height, im_width): - return box_list.BoxList( - tf.constant([[0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [.5, .5, 1, 1]], tf.float32)) - - -class SsdMetaArchTest(tf.test.TestCase): - - def setUp(self): - """Set up mock SSD model. - - Here we set up a simple mock SSD model that will always predict 4 - detections that happen to always be exactly the anchors that are set up - in the above MockAnchorGenerator. Because we let max_detections=5, - we will also always end up with an extra padded row in the detection - results. - """ - is_training = False - self._num_classes = 1 - mock_anchor_generator = MockAnchorGenerator2x2() - mock_box_predictor = test_utils.MockBoxPredictor( - is_training, self._num_classes) - mock_box_coder = test_utils.MockBoxCoder() - fake_feature_extractor = FakeSSDFeatureExtractor() - mock_matcher = test_utils.MockMatcher() - region_similarity_calculator = sim_calc.IouSimilarity() - - def image_resizer_fn(image): - return tf.identity(image) - - classification_loss = losses.WeightedSigmoidClassificationLoss( - anchorwise_output=True) - localization_loss = losses.WeightedSmoothL1LocalizationLoss( - anchorwise_output=True) - non_max_suppression_fn = functools.partial( - post_processing.batch_multiclass_non_max_suppression, - score_thresh=-20.0, - iou_thresh=1.0, - max_size_per_class=5, - max_total_size=5) - classification_loss_weight = 1.0 - localization_loss_weight = 1.0 - normalize_loss_by_num_matches = False - - # This hard example miner is expected to be a no-op. - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=None, - iou_threshold=1.0) - - self._num_anchors = 4 - self._code_size = 4 - self._model = ssd_meta_arch.SSDMetaArch( - is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder, - fake_feature_extractor, mock_matcher, region_similarity_calculator, - image_resizer_fn, non_max_suppression_fn, tf.identity, - classification_loss, localization_loss, classification_loss_weight, - localization_loss_weight, normalize_loss_by_num_matches, - hard_example_miner) - - def test_preprocess_preserves_input_shapes(self): - image_shapes = [(3, None, None, 3), - (None, 10, 10, 3), - (None, None, None, 3)] - for image_shape in image_shapes: - image_placeholder = tf.placeholder(tf.float32, shape=image_shape) - preprocessed_inputs = self._model.preprocess(image_placeholder) - self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape) - - def test_predict_results_have_correct_keys_and_shapes(self): - batch_size = 3 - image_size = 2 - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - expected_box_encodings_shape_out = ( - batch_size, self._num_anchors, self._code_size) - expected_class_predictions_with_background_shape_out = ( - batch_size, self._num_anchors, self._num_classes+1) - - for input_shape in input_shapes: - tf_graph = tf.Graph() - with tf_graph.as_default(): - preprocessed_input_placeholder = tf.placeholder(tf.float32, - shape=input_shape) - prediction_dict = self._model.predict(preprocessed_input_placeholder) - - self.assertTrue('box_encodings' in prediction_dict) - self.assertTrue('class_predictions_with_background' in prediction_dict) - self.assertTrue('feature_maps' in prediction_dict) - self.assertTrue('anchors' in prediction_dict) - - init_op = tf.global_variables_initializer() - with self.test_session(graph=tf_graph) as sess: - sess.run(init_op) - prediction_out = sess.run(prediction_dict, - feed_dict={ - preprocessed_input_placeholder: - np.random.uniform( - size=(batch_size, 2, 2, 3))}) - self.assertAllEqual(prediction_out['box_encodings'].shape, - expected_box_encodings_shape_out) - self.assertAllEqual( - prediction_out['class_predictions_with_background'].shape, - expected_class_predictions_with_background_shape_out) - - def test_postprocess_results_are_correct(self): - batch_size = 2 - image_size = 2 - input_shapes = [(batch_size, image_size, image_size, 3), - (None, image_size, image_size, 3), - (batch_size, None, None, 3), - (None, None, None, 3)] - - expected_boxes = np.array([[[0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [.5, .5, 1, 1], - [0, 0, 0, 0]], - [[0, 0, .5, .5], - [0, .5, .5, 1], - [.5, 0, 1, .5], - [.5, .5, 1, 1], - [0, 0, 0, 0]]]) - expected_scores = np.array([[0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]]) - expected_classes = np.array([[0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]]) - expected_num_detections = np.array([4, 4]) - - for input_shape in input_shapes: - tf_graph = tf.Graph() - with tf_graph.as_default(): - preprocessed_input_placeholder = tf.placeholder(tf.float32, - shape=input_shape) - prediction_dict = self._model.predict(preprocessed_input_placeholder) - detections = self._model.postprocess(prediction_dict) - self.assertTrue('detection_boxes' in detections) - self.assertTrue('detection_scores' in detections) - self.assertTrue('detection_classes' in detections) - self.assertTrue('num_detections' in detections) - init_op = tf.global_variables_initializer() - with self.test_session(graph=tf_graph) as sess: - sess.run(init_op) - detections_out = sess.run(detections, - feed_dict={ - preprocessed_input_placeholder: - np.random.uniform( - size=(batch_size, 2, 2, 3))}) - self.assertAllClose(detections_out['detection_boxes'], expected_boxes) - self.assertAllClose(detections_out['detection_scores'], expected_scores) - self.assertAllClose(detections_out['detection_classes'], expected_classes) - self.assertAllClose(detections_out['num_detections'], - expected_num_detections) - - def test_loss_results_are_correct(self): - batch_size = 2 - preprocessed_input = tf.random_uniform((batch_size, 2, 2, 3), - dtype=tf.float32) - groundtruth_boxes_list = [tf.constant([[0, 0, .5, .5]], dtype=tf.float32), - tf.constant([[0, 0, .5, .5]], dtype=tf.float32)] - groundtruth_classes_list = [tf.constant([[1]], dtype=tf.float32), - tf.constant([[1]], dtype=tf.float32)] - self._model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list) - prediction_dict = self._model.predict(preprocessed_input) - loss_dict = self._model.loss(prediction_dict) - - self.assertTrue('localization_loss' in loss_dict) - self.assertTrue('classification_loss' in loss_dict) - - expected_localization_loss = 0.0 - expected_classification_loss = (batch_size * self._num_anchors - * (self._num_classes+1) * np.log(2.0)) - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - losses_out = sess.run(loss_dict) - - self.assertAllClose(losses_out['localization_loss'], - expected_localization_loss) - self.assertAllClose(losses_out['classification_loss'], - expected_classification_loss) - - def test_restore_map_for_detection_ckpt(self): - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session() as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - var_map = self._model.restore_map(from_detection_checkpoint=True) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn('FeatureExtractor', var.name) - - def test_restore_map_for_classification_ckpt(self): - # Define mock tensorflow classification graph and save variables. - test_graph_classification = tf.Graph() - with test_graph_classification.as_default(): - image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3]) - with tf.variable_scope('mock_model'): - net = slim.conv2d(image, num_outputs=32, kernel_size=1, scope='layer1') - slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2') - - init_op = tf.global_variables_initializer() - saver = tf.train.Saver() - save_path = self.get_temp_dir() - with self.test_session() as sess: - sess.run(init_op) - saved_model_path = saver.save(sess, save_path) - - # Create tensorflow detection graph and load variables from - # classification checkpoint. - test_graph_detection = tf.Graph() - with test_graph_detection.as_default(): - inputs_shape = [2, 2, 2, 3] - inputs = tf.to_float(tf.random_uniform( - inputs_shape, minval=0, maxval=255, dtype=tf.int32)) - preprocessed_inputs = self._model.preprocess(inputs) - prediction_dict = self._model.predict(preprocessed_inputs) - self._model.postprocess(prediction_dict) - var_map = self._model.restore_map(from_detection_checkpoint=False) - self.assertIsInstance(var_map, dict) - saver = tf.train.Saver(var_map) - with self.test_session() as sess: - saver.restore(sess, saved_model_path) - for var in sess.run(tf.report_uninitialized_variables()): - self.assertNotIn('FeatureExtractor', var.name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/metrics/BUILD b/object_detection/metrics/BUILD deleted file mode 100644 index 878f16a6..00000000 --- a/object_detection/metrics/BUILD +++ /dev/null @@ -1,55 +0,0 @@ -# Tensorflow Object Detection API: main runnables. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_binary( - name = "offline_eval_map_corloc", - srcs = [ - "offline_eval_map_corloc.py", - ], - deps = [ - ":tf_example_parser", - "//tensorflow_models/object_detection:evaluator", - "//tensorflow_models/object_detection/builders:input_reader_builder", - "//tensorflow_models/object_detection/core:standard_fields", - "//tensorflow_models/object_detection/utils:config_util", - "//tensorflow_models/object_detection/utils:label_map_util", - ], -) - -py_test( - name = "offline_eval_map_corloc_test", - srcs = [ - "offline_eval_map_corloc_test.py", - ], - deps = [ - ":offline_eval_map_corloc", - "//tensorflow", - ], -) - -py_library( - name = "tf_example_parser", - srcs = ["tf_example_parser.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:data_parser", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_test( - name = "tf_example_parser_test", - srcs = ["tf_example_parser_test.py"], - deps = [ - ":tf_example_parser", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) diff --git a/object_detection/metrics/offline_eval_map_corloc.py b/object_detection/metrics/offline_eval_map_corloc.py deleted file mode 100644 index 421b4d1f..00000000 --- a/object_detection/metrics/offline_eval_map_corloc.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Evaluation executable for detection data. - -This executable evaluates precomputed detections produced by a detection -model and writes the evaluation results into csv file metrics.csv, stored -in the directory, specified by --eval_dir. - -The evaluation metrics set is supplied in object_detection.protos.EvalConfig -in metrics_set field. -Currently two set of metrics are supported: -- pascal_voc_metrics: standard PASCAL VOC 2007 metric -- open_images_metrics: Open Image V2 metric -All other field of object_detection.protos.EvalConfig are ignored. - -Example usage: - ./compute_metrics \ - --eval_dir=path/to/eval_dir \ - --eval_config_path=path/to/evaluation/configuration/file \ - --input_config_path=path/to/input/configuration/file -""" -import csv -import os -import re -import tensorflow as tf - -from object_detection import evaluator -from object_detection.core import standard_fields -from object_detection.metrics import tf_example_parser -from object_detection.utils import config_util -from object_detection.utils import label_map_util - -flags = tf.app.flags -tf.logging.set_verbosity(tf.logging.INFO) - -flags.DEFINE_string('eval_dir', None, 'Directory to write eval summaries to.') -flags.DEFINE_string('eval_config_path', None, - 'Path to an eval_pb2.EvalConfig config file.') -flags.DEFINE_string('input_config_path', None, - 'Path to an eval_pb2.InputConfig config file.') - -FLAGS = flags.FLAGS - - -def _generate_sharded_filenames(filename): - m = re.search(r'@(\d{1,})', filename) - if m: - num_shards = int(m.group(1)) - return [ - re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards), filename) - for i in range(num_shards) - ] - else: - return [filename] - - -def _generate_filenames(filenames): - result = [] - for filename in filenames: - result += _generate_sharded_filenames(filename) - return result - - -def read_data_and_evaluate(input_config, eval_config): - """Reads pre-computed object detections and groundtruth from tf_record. - - Args: - input_config: input config proto of type - object_detection.protos.InputReader. - eval_config: evaluation config proto of type - object_detection.protos.EvalConfig. - - Returns: - Evaluated detections metrics. - - Raises: - ValueError: if input_reader type is not supported or metric type is unknown. - """ - if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': - input_paths = input_config.tf_record_input_reader.input_path - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - object_detection_evaluators = evaluator.get_evaluators( - eval_config, categories) - # Support a single evaluator - object_detection_evaluator = object_detection_evaluators[0] - - skipped_images = 0 - processed_images = 0 - for input_path in _generate_filenames(input_paths): - tf.logging.info('Processing file: {0}'.format(input_path)) - - record_iterator = tf.python_io.tf_record_iterator(path=input_path) - data_parser = tf_example_parser.TfExampleDetectionAndGTParser() - - for string_record in record_iterator: - tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, - processed_images) - processed_images += 1 - - example = tf.train.Example() - example.ParseFromString(string_record) - decoded_dict = data_parser.parse(example) - - if decoded_dict: - object_detection_evaluator.add_single_ground_truth_image_info( - decoded_dict[standard_fields.DetectionResultFields.key], - decoded_dict) - object_detection_evaluator.add_single_detected_image_info( - decoded_dict[standard_fields.DetectionResultFields.key], - decoded_dict) - else: - skipped_images += 1 - tf.logging.info('Skipped images: {0}'.format(skipped_images)) - - return object_detection_evaluator.evaluate() - - raise ValueError('Unsupported input_reader_config.') - - -def write_metrics(metrics, output_dir): - """Write metrics to the output directory. - - Args: - metrics: A dictionary containing metric names and values. - output_dir: Directory to write metrics to. - """ - tf.logging.info('Writing metrics.') - - with open(os.path.join(output_dir, 'metrics.csv'), 'w') as csvfile: - metrics_writer = csv.writer(csvfile, delimiter=',') - for metric_name, metric_value in metrics.items(): - metrics_writer.writerow([metric_name, str(metric_value)]) - - -def main(argv): - del argv - required_flags = ['input_config_path', 'eval_config_path', 'eval_dir'] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - configs = config_util.get_configs_from_multiple_files( - eval_input_config_path=FLAGS.input_config_path, - eval_config_path=FLAGS.eval_config_path) - - eval_config = configs['eval_config'] - input_config = configs['eval_input_config'] - - metrics = read_data_and_evaluate(input_config, eval_config) - - # Save metrics - write_metrics(metrics, FLAGS.eval_dir) - - -if __name__ == '__main__': - tf.app.run(main) diff --git a/object_detection/metrics/offline_eval_map_corloc_test.py b/object_detection/metrics/offline_eval_map_corloc_test.py deleted file mode 100644 index 68ac3893..00000000 --- a/object_detection/metrics/offline_eval_map_corloc_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for utilities in offline_eval_map_corloc binary.""" - -import tensorflow as tf - -from object_detection.metrics import offline_eval_map_corloc as offline_eval - - -class OfflineEvalMapCorlocTest(tf.test.TestCase): - - def test_generateShardedFilenames(self): - test_filename = '/path/to/file' - result = offline_eval._generate_sharded_filenames(test_filename) - self.assertEqual(result, [test_filename]) - - test_filename = '/path/to/file-00000-of-00050' - result = offline_eval._generate_sharded_filenames(test_filename) - self.assertEqual(result, [test_filename]) - - result = offline_eval._generate_sharded_filenames('/path/to/@3.record') - self.assertEqual(result, [ - '/path/to/-00000-of-00003.record', '/path/to/-00001-of-00003.record', - '/path/to/-00002-of-00003.record' - ]) - - result = offline_eval._generate_sharded_filenames('/path/to/abc@3') - self.assertEqual(result, [ - '/path/to/abc-00000-of-00003', '/path/to/abc-00001-of-00003', - '/path/to/abc-00002-of-00003' - ]) - - result = offline_eval._generate_sharded_filenames('/path/to/@1') - self.assertEqual(result, ['/path/to/-00000-of-00001']) - - def test_generateFilenames(self): - test_filenames = ['/path/to/file', '/path/to/@3.record'] - result = offline_eval._generate_filenames(test_filenames) - self.assertEqual(result, [ - '/path/to/file', '/path/to/-00000-of-00003.record', - '/path/to/-00001-of-00003.record', '/path/to/-00002-of-00003.record' - ]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/metrics/tf_example_parser.py b/object_detection/metrics/tf_example_parser.py deleted file mode 100644 index 5b8ab7af..00000000 --- a/object_detection/metrics/tf_example_parser.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tensorflow Example proto parser for data loading. - -A parser to decode data containing serialized tensorflow.Example -protos into materialized tensors (numpy arrays). -""" - -import numpy as np - -from object_detection.core import data_parser -from object_detection.core import standard_fields as fields - - -class FloatParser(data_parser.DataToNumpyParser): - """Tensorflow Example float parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return np.array( - tf_example.features.feature[self.field_name].float_list.value, - dtype=np.float).transpose() if tf_example.features.feature[ - self.field_name].HasField("float_list") else None - - -class StringParser(data_parser.DataToNumpyParser): - """Tensorflow Example string parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return "".join(tf_example.features.feature[self.field_name] - .bytes_list.value) if tf_example.features.feature[ - self.field_name].HasField("bytes_list") else None - - -class Int64Parser(data_parser.DataToNumpyParser): - """Tensorflow Example int64 parser.""" - - def __init__(self, field_name): - self.field_name = field_name - - def parse(self, tf_example): - return np.array( - tf_example.features.feature[self.field_name].int64_list.value, - dtype=np.int64).transpose() if tf_example.features.feature[ - self.field_name].HasField("int64_list") else None - - -class BoundingBoxParser(data_parser.DataToNumpyParser): - """Tensorflow Example bounding box parser.""" - - def __init__(self, xmin_field_name, ymin_field_name, xmax_field_name, - ymax_field_name): - self.field_names = [ - ymin_field_name, xmin_field_name, ymax_field_name, xmax_field_name - ] - - def parse(self, tf_example): - result = [] - parsed = True - for field_name in self.field_names: - result.append(tf_example.features.feature[field_name].float_list.value) - parsed &= ( - tf_example.features.feature[field_name].HasField("float_list")) - - return np.array(result).transpose() if parsed else None - - -class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser): - """Tensorflow Example proto parser.""" - - def __init__(self): - self.items_to_handlers = { - fields.DetectionResultFields.key: - StringParser(fields.TfExampleFields.source_id), - # Object ground truth boxes and classes. - fields.InputDataFields.groundtruth_boxes: (BoundingBoxParser( - fields.TfExampleFields.object_bbox_xmin, - fields.TfExampleFields.object_bbox_ymin, - fields.TfExampleFields.object_bbox_xmax, - fields.TfExampleFields.object_bbox_ymax)), - fields.InputDataFields.groundtruth_classes: ( - Int64Parser(fields.TfExampleFields.object_class_label)), - # Object detections. - fields.DetectionResultFields.detection_boxes: (BoundingBoxParser( - fields.TfExampleFields.detection_bbox_xmin, - fields.TfExampleFields.detection_bbox_ymin, - fields.TfExampleFields.detection_bbox_xmax, - fields.TfExampleFields.detection_bbox_ymax)), - fields.DetectionResultFields.detection_classes: ( - Int64Parser(fields.TfExampleFields.detection_class_label)), - fields.DetectionResultFields.detection_scores: ( - FloatParser(fields.TfExampleFields.detection_score)), - } - - self.optional_items_to_handlers = { - fields.InputDataFields.groundtruth_difficult: - Int64Parser(fields.TfExampleFields.object_difficult), - fields.InputDataFields.groundtruth_group_of: - Int64Parser(fields.TfExampleFields.object_group_of) - } - - def parse(self, tf_example): - """Parses tensorflow example and returns a tensor dictionary. - - Args: - tf_example: a tf.Example object. - - Returns: - A dictionary of the following numpy arrays: - fields.DetectionResultFields.source_id - string containing original image - id. - fields.InputDataFields.groundtruth_boxes - a numpy array containing - groundtruth boxes. - fields.InputDataFields.groundtruth_classes - a numpy array containing - groundtruth classes. - fields.InputDataFields.groundtruth_group_of - a numpy array containing - groundtruth group of flag (optional, None if not specified). - fields.InputDataFields.groundtruth_difficult - a numpy array containing - groundtruth difficult flag (optional, None if not specified). - fields.DetectionResultFields.detection_boxes - a numpy array containing - detection boxes. - fields.DetectionResultFields.detection_classes - a numpy array containing - detection class labels. - fields.DetectionResultFields.detection_scores - a numpy array containing - detection scores. - Returns None if tf.Example was not parsed or non-optional fields were not - found. - """ - results_dict = {} - parsed = True - for key, parser in self.items_to_handlers.items(): - results_dict[key] = parser.parse(tf_example) - parsed &= (results_dict[key] is not None) - - for key, parser in self.optional_items_to_handlers.items(): - results_dict[key] = parser.parse(tf_example) - - return results_dict if parsed else None diff --git a/object_detection/metrics/tf_example_parser_test.py b/object_detection/metrics/tf_example_parser_test.py deleted file mode 100644 index 6d9ce748..00000000 --- a/object_detection/metrics/tf_example_parser_test.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.data_decoders.tf_example_parser.""" - -import numpy as np -import numpy.testing as np_testing -import tensorflow as tf - -from object_detection.core import standard_fields as fields -from object_detection.metrics import tf_example_parser - - -class TfExampleDecoderTest(tf.test.TestCase): - - def _Int64Feature(self, value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - def _FloatFeature(self, value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - def _BytesFeature(self, value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def testParseDetectionsAndGT(self): - source_id = 'abc.jpg' - # y_min, x_min, y_max, x_max - object_bb = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], [1.0, 0.6, 0.8], - [1.0, 0.6, 0.7]]).transpose() - detection_bb = np.array([[0.1, 0.2], [0.0, 0.8], [1.0, 0.6], - [1.0, 0.85]]).transpose() - - object_class_label = [1, 1, 2] - object_difficult = [1, 0, 0] - object_group_of = [0, 0, 1] - detection_class_label = [2, 1] - detection_score = [0.5, 0.3] - features = { - fields.TfExampleFields.source_id: - self._BytesFeature(source_id), - fields.TfExampleFields.object_bbox_ymin: - self._FloatFeature(object_bb[:, 0].tolist()), - fields.TfExampleFields.object_bbox_xmin: - self._FloatFeature(object_bb[:, 1].tolist()), - fields.TfExampleFields.object_bbox_ymax: - self._FloatFeature(object_bb[:, 2].tolist()), - fields.TfExampleFields.object_bbox_xmax: - self._FloatFeature(object_bb[:, 3].tolist()), - fields.TfExampleFields.detection_bbox_ymin: - self._FloatFeature(detection_bb[:, 0].tolist()), - fields.TfExampleFields.detection_bbox_xmin: - self._FloatFeature(detection_bb[:, 1].tolist()), - fields.TfExampleFields.detection_bbox_ymax: - self._FloatFeature(detection_bb[:, 2].tolist()), - fields.TfExampleFields.detection_bbox_xmax: - self._FloatFeature(detection_bb[:, 3].tolist()), - fields.TfExampleFields.detection_class_label: - self._Int64Feature(detection_class_label), - fields.TfExampleFields.detection_score: - self._FloatFeature(detection_score), - } - - example = tf.train.Example(features=tf.train.Features(feature=features)) - parser = tf_example_parser.TfExampleDetectionAndGTParser() - - results_dict = parser.parse(example) - self.assertIsNone(results_dict) - - features[fields.TfExampleFields.object_class_label] = ( - self._Int64Feature(object_class_label)) - features[fields.TfExampleFields.object_difficult] = ( - self._Int64Feature(object_difficult)) - - example = tf.train.Example(features=tf.train.Features(feature=features)) - results_dict = parser.parse(example) - - self.assertIsNotNone(results_dict) - self.assertEqual(source_id, results_dict[fields.DetectionResultFields.key]) - np_testing.assert_almost_equal( - object_bb, results_dict[fields.InputDataFields.groundtruth_boxes]) - np_testing.assert_almost_equal( - detection_bb, - results_dict[fields.DetectionResultFields.detection_boxes]) - np_testing.assert_almost_equal( - detection_score, - results_dict[fields.DetectionResultFields.detection_scores]) - np_testing.assert_almost_equal( - detection_class_label, - results_dict[fields.DetectionResultFields.detection_classes]) - np_testing.assert_almost_equal( - object_difficult, - results_dict[fields.InputDataFields.groundtruth_difficult]) - np_testing.assert_almost_equal( - object_class_label, - results_dict[fields.InputDataFields.groundtruth_classes]) - - parser = tf_example_parser.TfExampleDetectionAndGTParser() - - features[fields.TfExampleFields.object_group_of] = ( - self._Int64Feature(object_group_of)) - - example = tf.train.Example(features=tf.train.Features(feature=features)) - results_dict = parser.parse(example) - self.assertIsNotNone(results_dict) - np_testing.assert_almost_equal( - object_group_of, - results_dict[fields.InputDataFields.groundtruth_group_of]) - - def testParseString(self): - string_val = 'abc' - features = {'string': self._BytesFeature(string_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.StringParser('string') - result = parser.parse(example) - self.assertIsNotNone(result) - self.assertEqual(result, string_val) - - parser = tf_example_parser.StringParser('another_string') - result = parser.parse(example) - self.assertIsNone(result) - - def testParseFloat(self): - float_array_val = [1.5, 1.4, 2.0] - features = {'floats': self._FloatFeature(float_array_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.FloatParser('floats') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, float_array_val) - - parser = tf_example_parser.StringParser('another_floats') - result = parser.parse(example) - self.assertIsNone(result) - - def testInt64Parser(self): - int_val = [1, 2, 3] - features = {'ints': self._Int64Feature(int_val)} - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.Int64Parser('ints') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, int_val) - - parser = tf_example_parser.Int64Parser('another_ints') - result = parser.parse(example) - self.assertIsNone(result) - - def testBoundingBoxParser(self): - bounding_boxes = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], - [1.0, 0.6, 0.8], [1.0, 0.6, 0.7]]).transpose() - features = { - 'ymin': self._FloatFeature(bounding_boxes[:, 0]), - 'xmin': self._FloatFeature(bounding_boxes[:, 1]), - 'ymax': self._FloatFeature(bounding_boxes[:, 2]), - 'xmax': self._FloatFeature(bounding_boxes[:, 3]) - } - - example = tf.train.Example(features=tf.train.Features(feature=features)) - - parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', 'ymax') - result = parser.parse(example) - self.assertIsNotNone(result) - np_testing.assert_almost_equal(result, bounding_boxes) - - parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', - 'another_ymax') - result = parser.parse(example) - self.assertIsNone(result) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/BUILD b/object_detection/models/BUILD deleted file mode 100644 index 36efaba5..00000000 --- a/object_detection/models/BUILD +++ /dev/null @@ -1,229 +0,0 @@ -# Tensorflow Object Detection API: Models. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_library( - name = "feature_map_generators", - srcs = [ - "feature_map_generators.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_test( - name = "feature_map_generators_test", - srcs = [ - "feature_map_generators_test.py", - ], - deps = [ - ":feature_map_generators", - "//tensorflow", - ], -) - -py_library( - name = "ssd_feature_extractor_test", - srcs = [ - "ssd_feature_extractor_test.py", - ], - deps = [ - "//tensorflow", - ], -) - -py_library( - name = "ssd_inception_v2_feature_extractor", - srcs = [ - "ssd_inception_v2_feature_extractor.py", - ], - deps = [ - ":feature_map_generators", - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/slim:inception_v2", - ], -) - -py_library( - name = "ssd_inception_v3_feature_extractor", - srcs = [ - "ssd_inception_v3_feature_extractor.py", - ], - deps = [ - ":feature_map_generators", - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/slim:inception_v3", - ], -) - -py_library( - name = "ssd_mobilenet_v1_feature_extractor", - srcs = ["ssd_mobilenet_v1_feature_extractor.py"], - deps = [ - ":feature_map_generators", - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/slim:mobilenet_v1", - ], -) - -py_library( - name = "embedded_ssd_mobilenet_v1_feature_extractor", - srcs = ["embedded_ssd_mobilenet_v1_feature_extractor.py"], - deps = [ - ":feature_map_generators", - ":ssd_mobilenet_v1_feature_extractor", - "//tensorflow", - "//tensorflow_models/object_detection/utils:ops", - "//tensorflow_models/slim:mobilenet_v1", - ], -) - -py_test( - name = "ssd_inception_v2_feature_extractor_test", - srcs = [ - "ssd_inception_v2_feature_extractor_test.py", - ], - deps = [ - ":ssd_feature_extractor_test", - ":ssd_inception_v2_feature_extractor", - "//tensorflow", - ], -) - -py_test( - name = "ssd_inception_v3_feature_extractor_test", - srcs = [ - "ssd_inception_v3_feature_extractor_test.py", - ], - deps = [ - ":ssd_feature_extractor_test", - ":ssd_inception_v3_feature_extractor", - "//tensorflow", - ], -) - -py_test( - name = "ssd_mobilenet_v1_feature_extractor_test", - srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"], - deps = [ - ":ssd_feature_extractor_test", - ":ssd_mobilenet_v1_feature_extractor", - "//tensorflow", - ], -) - -py_test( - name = "embedded_ssd_mobilenet_v1_feature_extractor_test", - srcs = ["embedded_ssd_mobilenet_v1_feature_extractor_test.py"], - deps = [ - ":embedded_ssd_mobilenet_v1_feature_extractor", - ":ssd_feature_extractor_test", - "//tensorflow", - ], -) - -py_test( - name = "faster_rcnn_nas_feature_extractor_test", - srcs = [ - "faster_rcnn_nas_feature_extractor_test.py", - ], - deps = [ - ":faster_rcnn_nas_feature_extractor", - "//tensorflow", - ], -) - -py_library( - name = "faster_rcnn_nas_feature_extractor", - srcs = [ - "faster_rcnn_nas_feature_extractor.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/slim:nasnet", - ], -) - -py_library( - name = "faster_rcnn_inception_resnet_v2_feature_extractor", - srcs = [ - "faster_rcnn_inception_resnet_v2_feature_extractor.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/slim:inception_resnet_v2", - ], -) - -py_test( - name = "faster_rcnn_inception_resnet_v2_feature_extractor_test", - srcs = [ - "faster_rcnn_inception_resnet_v2_feature_extractor_test.py", - ], - deps = [ - ":faster_rcnn_inception_resnet_v2_feature_extractor", - "//tensorflow", - ], -) - -py_library( - name = "faster_rcnn_inception_v2_feature_extractor", - srcs = [ - "faster_rcnn_inception_v2_feature_extractor.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/slim:inception_v2", - ], -) - -py_test( - name = "faster_rcnn_inception_v2_feature_extractor_test", - srcs = [ - "faster_rcnn_inception_v2_feature_extractor_test.py", - ], - deps = [ - ":faster_rcnn_inception_v2_feature_extractor", - "//tensorflow", - ], -) - -py_library( - name = "faster_rcnn_resnet_v1_feature_extractor", - srcs = [ - "faster_rcnn_resnet_v1_feature_extractor.py", - ], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch", - "//tensorflow_models/slim:resnet_utils", - "//tensorflow_models/slim:resnet_v1", - ], -) - -py_test( - name = "faster_rcnn_resnet_v1_feature_extractor_test", - srcs = [ - "faster_rcnn_resnet_v1_feature_extractor_test.py", - ], - deps = [ - ":faster_rcnn_resnet_v1_feature_extractor", - "//tensorflow", - ], -) diff --git a/object_detection/models/__init__.py b/object_detection/models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/models/__pycache__/__init__.cpython-35.pyc b/object_detection/models/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 944348c8..00000000 Binary files a/object_detection/models/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc deleted file mode 100644 index 3771c902..00000000 Binary files a/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc deleted file mode 100644 index e8afff51..00000000 Binary files a/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc deleted file mode 100644 index 75128279..00000000 Binary files a/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc deleted file mode 100644 index bfd986a2..00000000 Binary files a/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc deleted file mode 100644 index a324c94c..00000000 Binary files a/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc b/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc deleted file mode 100644 index da26f4f0..00000000 Binary files a/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc deleted file mode 100644 index bba597f1..00000000 Binary files a/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc deleted file mode 100644 index 7e884ee1..00000000 Binary files a/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc deleted file mode 100644 index 3fb2c037..00000000 Binary files a/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc and /dev/null differ diff --git a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py b/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py deleted file mode 100644 index a29cb84f..00000000 --- a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Embedded-friendly SSDFeatureExtractor for MobilenetV1 features.""" - -import tensorflow as tf - -from object_detection.models import feature_map_generators -from object_detection.models import ssd_mobilenet_v1_feature_extractor -from object_detection.utils import ops -from nets import mobilenet_v1 - -slim = tf.contrib.slim - - -class EmbeddedSSDMobileNetV1FeatureExtractor( - ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor): - """Embedded-friendly SSD Feature Extractor using MobilenetV1 features. - - This feature extractor is similar to SSD MobileNetV1 feature extractor, and - it fixes input resolution to be 256x256, reduces the number of feature maps - used for box prediction and ensures convolution kernel to be no larger - than input tensor in spatial dimensions. - - This feature extractor requires support of the following ops if used in - embedded devices: - - Conv - - DepthwiseConv - - Relu6 - - All conv/depthwiseconv use SAME padding, and no additional spatial padding is - needed. - """ - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams, - batch_norm_trainable=True, - reuse_weights=None): - """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. For EmbeddedSSD it must be set to 1. - conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a small batch size - (e.g. 1), it is desirable to disable batch norm update and use - pretrained batch norm params. - reuse_weights: Whether to reuse variables. Default is None. - - Raises: - ValueError: upon invalid `pad_to_multiple` values. - """ - if pad_to_multiple != 1: - raise ValueError('Embedded-specific SSD only supports `pad_to_multiple` ' - 'of 1.') - - super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable, reuse_weights) - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and( - tf.equal(tf.shape(preprocessed_inputs)[1], 256), - tf.equal(tf.shape(preprocessed_inputs)[2], 256)), - ['image size must be 256 in both height and width.']) - - feature_map_layout = { - 'from_layer': [ - 'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '' - ], - 'layer_depth': [-1, -1, 512, 256, 256], - 'conv_kernel_size': [-1, -1, 3, 3, 2], - } - - with tf.control_dependencies([shape_assert]): - with slim.arg_scope(self._conv_hyperparams): - with tf.variable_scope('MobilenetV1', - reuse=self._reuse_weights) as scope: - _, image_features = mobilenet_v1.mobilenet_v1_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Conv2d_13_pointwise', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py b/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py deleted file mode 100644 index cef5de51..00000000 --- a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for embedded_ssd_mobilenet_v1_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor -from object_detection.models import ssd_feature_extractor_test - - -class EmbeddedSSDMobileNetV1FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True, batch_norm_trainable=True): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - batch_norm_trainable: whether to update batch norm parameters during - training. - - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - min_depth = 32 - conv_hyperparams = {} - return (embedded_ssd_mobilenet_v1_feature_extractor. - EmbeddedSSDMobileNetV1FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable)) - - def test_extract_features_returns_correct_shapes_256(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024), - (4, 4, 4, 512), (4, 2, 2, 256), - (4, 1, 1, 256)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 256 - image_width = 256 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32), - (4, 2, 2, 32), (4, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1( - self): - image_height = 256 - image_width = 256 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024), - (4, 4, 4, 512), (4, 2, 2, 256), - (4, 1, 1, 256)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_pad_to_multiple_not_1(self): - depth_multiplier = 1.0 - pad_to_multiple = 2 - with self.assertRaises(ValueError): - _ = self._create_feature_extractor(depth_multiplier, pad_to_multiple) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 256 - image_width = 256 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'MobilenetV1' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py b/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py deleted file mode 100644 index 29430d86..00000000 --- a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Inception Resnet v2 Faster R-CNN implementation. - -See "Inception-v4, Inception-ResNet and the Impact of Residual Connections on -Learning" by Szegedy et al. (https://arxiv.org/abs/1602.07261) -as well as -"Speed/accuracy trade-offs for modern convolutional object detectors" by -Huang et al. (https://arxiv.org/abs/1611.10012) -""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import inception_resnet_v2 - -slim = tf.contrib.slim - - -class FasterRCNNInceptionResnetV2FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN with Inception Resnet v2 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN with Inception Resnet v2 preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Extracts features using the first half of the Inception Resnet v2 network. - We construct the network in `align_feature_maps=True` mode, which means - that all VALID paddings in the network are changed to SAME padding so that - the feature maps are aligned. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - - with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( - weight_decay=self._weight_decay)): - # Forces is_training to False to disable batch norm update. - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - with tf.variable_scope('InceptionResnetV2', - reuse=self._reuse_weights) as scope: - rpn_feature_map, _ = ( - inception_resnet_v2.inception_resnet_v2_base( - preprocessed_inputs, final_endpoint='PreAuxLogits', - scope=scope, output_stride=self._first_stage_features_stride, - align_feature_maps=True)) - return rpn_feature_map - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - This function reconstructs the "second half" of the Inception ResNet v2 - network after the part defined in `_extract_proposal_features`. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights): - with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( - weight_decay=self._weight_decay)): - # Forces is_training to False to disable batch norm update. - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(proposal_feature_maps, - 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d( - tower_conv, 384, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d( - proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d( - tower_conv1, 288, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d( - proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d( - tower_conv2_1, 320, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d( - proposal_feature_maps, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat( - [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20) - net = inception_resnet_v2.block8(net, activation_fn=None) - proposal_classifier_features = slim.conv2d( - net, 1536, 1, scope='Conv2d_7b_1x1') - return proposal_classifier_features - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Note that this overrides the default implementation in - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for - InceptionResnetV2 checkpoints. - - TODO: revisit whether it's possible to force the - `Repeat` namescope as created in `_extract_box_classifier_features` to - start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can - be used. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith( - first_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - first_stage_feature_extractor_scope + '/', '') - variables_to_restore[var_name] = variable - if variable.op.name.startswith( - second_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - second_stage_feature_extractor_scope - + '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2') - var_name = var_name.replace( - second_stage_feature_extractor_scope + '/', '') - variables_to_restore[var_name] = variable - return variables_to_restore - diff --git a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py b/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py deleted file mode 100644 index 600efe65..00000000 --- a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor.""" - -import tensorflow as tf - -from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res - - -class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 299, 299, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 19, 19, 1088]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [1, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 28, 28, 1088]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 1088]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [2, 17, 17, 1088], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [2, 8, 8, 1536]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py b/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py deleted file mode 100644 index ff00a2b3..00000000 --- a/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Inception V2 Faster R-CNN implementation. - -See "Rethinking the Inception Architecture for Computer Vision" -https://arxiv.org/abs/1512.00567 -""" -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import inception_v2 - -slim = tf.contrib.slim - - -def _batch_norm_arg_scope(list_ops, - use_batch_norm=True, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001, - batch_norm_scale=False, - train_batch_norm=False): - """Slim arg scope for InceptionV2 batch norm.""" - if use_batch_norm: - batch_norm_params = { - 'is_training': train_batch_norm, - 'scale': batch_norm_scale, - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon - } - normalizer_fn = slim.batch_norm - else: - normalizer_fn = None - batch_norm_params = None - - return slim.arg_scope(list_ops, - normalizer_fn=normalizer_fn, - normalizer_params=batch_norm_params) - - -class FasterRCNNInceptionV2FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN Inception V2 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0, - depth_multiplier=1.0, - min_depth=16): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - self._depth_multiplier = depth_multiplier - self._min_depth = min_depth - super(FasterRCNNInceptionV2FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN Inception V2 preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - with tf.control_dependencies([shape_assert]): - with tf.variable_scope('InceptionV2', - reuse=self._reuse_weights) as scope: - with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], - batch_norm_scale=True, - train_batch_norm=self._train_batch_norm): - _, activations = inception_v2.inception_v2_base( - preprocessed_inputs, - final_endpoint='Mixed_4e', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - - return activations['Mixed_4e'] - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name (unused). - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - net = proposal_feature_maps - - depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) - trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - data_format = 'NHWC' - concat_dim = 3 if data_format == 'NHWC' else 1 - - with tf.variable_scope('InceptionV2', reuse=self._reuse_weights): - with slim.arg_scope( - [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, - padding='SAME', - data_format=data_format): - with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], - batch_norm_scale=True, - train_batch_norm=self._train_batch_norm): - - with tf.variable_scope('Mixed_5a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], - scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(net, [3, 3], stride=2, - scope='MaxPool_1a_3x3') - net = tf.concat([branch_0, branch_1, branch_2], concat_dim) - - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], - scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(160), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat([branch_0, branch_1, branch_2, branch_3], - concat_dim) - - with tf.variable_scope('Mixed_5c'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], - scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - proposal_classifier_features = tf.concat( - [branch_0, branch_1, branch_2, branch_3], concat_dim) - - return proposal_classifier_features diff --git a/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py b/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py deleted file mode 100644 index 06b7478f..00000000 --- a/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for faster_rcnn_inception_v2_feature_extractor.""" - -import numpy as np -import tensorflow as tf - -from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2 - - -class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return faster_rcnn_inception_v2.FasterRCNNInceptionV2FeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 576]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 576]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 576]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_on_very_small_images(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run( - features_shape, - feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [3, 14, 14, 576], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [3, 7, 7, 1024]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/faster_rcnn_nas_feature_extractor.py b/object_detection/models/faster_rcnn_nas_feature_extractor.py deleted file mode 100644 index 5abedebd..00000000 --- a/object_detection/models/faster_rcnn_nas_feature_extractor.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""NASNet Faster R-CNN implementation. - -Learning Transferable Architectures for Scalable Image Recognition -Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le -https://arxiv.org/abs/1707.07012 -""" - -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets.nasnet import nasnet -from nets.nasnet import nasnet_utils - -arg_scope = tf.contrib.framework.arg_scope -slim = tf.contrib.slim - - -def nasnet_large_arg_scope_for_detection(is_batch_norm_training=False): - """Defines the default arg scope for the NASNet-A Large for object detection. - - This provides a small edit to switch batch norm training on and off. - - Args: - is_batch_norm_training: Boolean indicating whether to train with batch norm. - - Returns: - An `arg_scope` to use for the NASNet Large Model. - """ - imagenet_scope = nasnet.nasnet_large_arg_scope() - with arg_scope(imagenet_scope): - with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc: - return sc - - -# Note: This is largely a copy of _build_nasnet_base inside nasnet.py but -# with special edits to remove instantiation of the stem and the special -# ability to receive as input a pair of hidden states. -def _build_nasnet_base(hidden_previous, - hidden, - normal_cell, - reduction_cell, - hparams, - true_cell_num, - start_cell_num): - """Constructs a NASNet image model.""" - - # Find where to place the reduction cells or stride normal cells - reduction_indices = nasnet_utils.calc_reduction_layers( - hparams.num_cells, hparams.num_reduction_layers) - - # Note: The None is prepended to match the behavior of _imagenet_stem() - cell_outputs = [None, hidden_previous, hidden] - net = hidden - - # NOTE: In the nasnet.py code, filter_scaling starts at 1.0. We instead - # start at 2.0 because 1 reduction cell has been created which would - # update the filter_scaling to 2.0. - filter_scaling = 2.0 - - # Run the cells - for cell_num in range(start_cell_num, hparams.num_cells): - stride = 1 - if hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - if cell_num in reduction_indices: - filter_scaling *= hparams.filter_scaling_rate - net = reduction_cell( - net, - scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)), - filter_scaling=filter_scaling, - stride=2, - prev_layer=cell_outputs[-2], - cell_num=true_cell_num) - true_cell_num += 1 - cell_outputs.append(net) - if not hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - net = normal_cell( - net, - scope='cell_{}'.format(cell_num), - filter_scaling=filter_scaling, - stride=stride, - prev_layer=prev_layer, - cell_num=true_cell_num) - true_cell_num += 1 - cell_outputs.append(net) - - # Final nonlinearity. - # Note that we have dropped the final pooling, dropout and softmax layers - # from the default nasnet version. - with tf.variable_scope('final_layer'): - net = tf.nn.relu(net) - return net - - -# TODO: Only fixed_shape_resizer is currently supported for NASNet -# featurization. The reason for this is that nasnet.py only supports -# inputs with fully known shapes. We need to update nasnet.py to handle -# shapes not known at compile time. -class FasterRCNNNASFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN with NASNet-A feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 16. - """ - if first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 16.') - super(FasterRCNNNASFeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN with NAS preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Extracts features using the first half of the NASNet network. - We construct the network in `align_feature_maps=True` mode, which means - that all VALID paddings in the network are changed to SAME padding so that - the feature maps are aligned. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - Raises: - ValueError: If the created network is missing the required activation. - """ - del scope - - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - - with slim.arg_scope(nasnet_large_arg_scope_for_detection( - is_batch_norm_training=self._train_batch_norm)): - _, end_points = nasnet.build_nasnet_large( - preprocessed_inputs, num_classes=None, - is_training=self._is_training, - final_endpoint='Cell_11') - - # Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016. - rpn_feature_map = tf.concat([end_points['Cell_10'], - end_points['Cell_11']], 3) - - # nasnet.py does not maintain the batch size in the first dimension. - # This work around permits us retaining the batch for below. - batch = preprocessed_inputs.get_shape().as_list()[0] - shape_without_batch = rpn_feature_map.get_shape().as_list()[1:] - rpn_feature_map_shape = [batch] + shape_without_batch - rpn_feature_map.set_shape(rpn_feature_map_shape) - - return rpn_feature_map - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - This function reconstructs the "second half" of the NASNet-A - network after the part defined in `_extract_proposal_features`. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name. - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - del scope - - # Note that we always feed into 2 layers of equal depth - # where the first N channels corresponds to previous hidden layer - # and the second N channels correspond to the final hidden layer. - hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) - - # Note that what follows is largely a copy of build_nasnet_large() within - # nasnet.py. We are copying to minimize code pollution in slim. - - # pylint: disable=protected-access - hparams = nasnet._large_imagenet_config(is_training=self._is_training) - # pylint: enable=protected-access - - # Calculate the total number of cells in the network - # -- Add 2 for the reduction cells. - total_num_cells = hparams.num_cells + 2 - # -- And add 2 for the stem cells for ImageNet training. - total_num_cells += 2 - - normal_cell = nasnet_utils.NasNetANormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - reduction_cell = nasnet_utils.NasNetAReductionCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path], - is_training=self._is_training): - with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - - # This corresponds to the cell number just past 'Cell_11' used by - # by _extract_proposal_features(). - start_cell_num = 12 - # Note that this number equals: - # start_cell_num + 2 stem cells + 1 reduction cell - true_cell_num = 15 - - with slim.arg_scope(nasnet.nasnet_large_arg_scope()): - net = _build_nasnet_base(hidden_previous, - hidden, - normal_cell=normal_cell, - reduction_cell=reduction_cell, - hparams=hparams, - true_cell_num=true_cell_num, - start_cell_num=start_cell_num) - - proposal_classifier_features = net - return proposal_classifier_features - - def restore_from_classification_checkpoint_fn( - self, - first_stage_feature_extractor_scope, - second_stage_feature_extractor_scope): - """Returns a map of variables to load from a foreign checkpoint. - - Note that this overrides the default implementation in - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for - NASNet-A checkpoints. - - Args: - first_stage_feature_extractor_scope: A scope name for the first stage - feature extractor. - second_stage_feature_extractor_scope: A scope name for the second stage - feature extractor. - - Returns: - A dict mapping variable names (to load from a checkpoint) to variables in - the model graph. - """ - # Note that the NAS checkpoint only contains the moving average version of - # the Variables so we need to generate an appropriate dictionary mapping. - variables_to_restore = {} - for variable in tf.global_variables(): - if variable.op.name.startswith( - first_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - first_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - if variable.op.name.startswith( - second_stage_feature_extractor_scope): - var_name = variable.op.name.replace( - second_stage_feature_extractor_scope + '/', '') - var_name += '/ExponentialMovingAverage' - variables_to_restore[var_name] = variable - return variables_to_restore - diff --git a/object_detection/models/faster_rcnn_nas_feature_extractor_test.py b/object_detection/models/faster_rcnn_nas_feature_extractor_test.py deleted file mode 100644 index 84268d55..00000000 --- a/object_detection/models/faster_rcnn_nas_feature_extractor_test.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for models.faster_rcnn_nas_feature_extractor.""" - -import tensorflow as tf - -from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas - - -class FasterRcnnNASFeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, first_stage_features_stride): - return frcnn_nas.FasterRCNNNASFeatureExtractor( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 299, 299, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 19, 19, 4032]) - - def test_extract_proposal_features_input_size_224(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 14, 14, 4032]) - - def test_extract_proposal_features_input_size_112(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 4032]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [2, 17, 17, 1088], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [2, 9, 9, 4032]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py b/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py deleted file mode 100644 index 8575bf33..00000000 --- a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Resnet V1 Faster R-CNN implementation. - -See "Deep Residual Learning for Image Recognition" by He et al., 2015. -https://arxiv.org/abs/1512.03385 - -Note: this implementation assumes that the classification checkpoint used -to finetune this model is trained using the same configuration as that of -the MSRA provided checkpoints -(see https://github.com/KaimingHe/deep-residual-networks), e.g., with -same preprocessing, batch norm scaling, etc. -""" -import tensorflow as tf - -from object_detection.meta_architectures import faster_rcnn_meta_arch -from nets import resnet_utils -from nets import resnet_v1 - -slim = tf.contrib.slim - - -class FasterRCNNResnetV1FeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Faster R-CNN Resnet V1 feature extractor implementation.""" - - def __init__(self, - architecture, - resnet_model, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - architecture: Architecture name of the Resnet V1 model. - resnet_model: Definition of the Resnet V1 model. - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16. - """ - if first_stage_features_stride != 8 and first_stage_features_stride != 16: - raise ValueError('`first_stage_features_stride` must be 8 or 16.') - self._architecture = architecture - self._resnet_model = resnet_model - super(FasterRCNNResnetV1FeatureExtractor, self).__init__( - is_training, first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - def preprocess(self, resized_inputs): - """Faster R-CNN Resnet V1 preprocessing. - - VGG style channel mean subtraction as described here: - https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md - - Args: - resized_inputs: A [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: A [batch, height_out, width_out, channels] float32 - tensor representing a batch of images. - - """ - channel_means = [123.68, 116.779, 103.939] - return resized_inputs - [[channel_means]] - - def _extract_proposal_features(self, preprocessed_inputs, scope): - """Extracts first stage RPN features. - - Args: - preprocessed_inputs: A [batch, height, width, channels] float32 tensor - representing a batch of images. - scope: A scope name. - - Returns: - rpn_feature_map: A tensor with shape [batch, height, width, depth] - Raises: - InvalidArgumentError: If the spatial size of `preprocessed_inputs` - (height or width) is less than 33. - ValueError: If the created network is missing the required activation. - """ - if len(preprocessed_inputs.get_shape().as_list()) != 4: - raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' - 'tensor of shape %s' % preprocessed_inputs.get_shape()) - shape_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - with tf.control_dependencies([shape_assert]): - # Disables batchnorm for fine-tuning with smaller batch sizes. - # TODO: Figure out if it is needed when image batch size is bigger. - with slim.arg_scope( - resnet_utils.resnet_arg_scope( - batch_norm_epsilon=1e-5, - batch_norm_scale=True, - weight_decay=self._weight_decay)): - with tf.variable_scope( - self._architecture, reuse=self._reuse_weights) as var_scope: - _, activations = self._resnet_model( - preprocessed_inputs, - num_classes=None, - is_training=self._train_batch_norm, - global_pool=False, - output_stride=self._first_stage_features_stride, - spatial_squeeze=False, - scope=var_scope) - - handle = scope + '/%s/block3' % self._architecture - return activations[handle] - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - """Extracts second stage box classifier features. - - Args: - proposal_feature_maps: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, crop_height, crop_width, depth] - representing the feature map cropped to each proposal. - scope: A scope name (unused). - - Returns: - proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, height, width, depth] - representing box classifier features for each proposal. - """ - with tf.variable_scope(self._architecture, reuse=self._reuse_weights): - with slim.arg_scope( - resnet_utils.resnet_arg_scope( - batch_norm_epsilon=1e-5, - batch_norm_scale=True, - weight_decay=self._weight_decay)): - with slim.arg_scope([slim.batch_norm], - is_training=self._train_batch_norm): - blocks = [ - resnet_utils.Block('block4', resnet_v1.bottleneck, [{ - 'depth': 2048, - 'depth_bottleneck': 512, - 'stride': 1 - }] * 3) - ] - proposal_classifier_features = resnet_utils.stack_blocks_dense( - proposal_feature_maps, blocks) - return proposal_classifier_features - - -class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 50 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet50FeatureExtractor, self).__init__( - 'resnet_v1_50', resnet_v1.resnet_v1_50, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - -class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 101 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet101FeatureExtractor, self).__init__( - 'resnet_v1_101', resnet_v1.resnet_v1_101, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) - - -class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): - """Faster R-CNN Resnet 152 feature extractor implementation.""" - - def __init__(self, - is_training, - first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0): - """Constructor. - - Args: - is_training: See base class. - first_stage_features_stride: See base class. - batch_norm_trainable: See base class. - reuse_weights: See base class. - weight_decay: See base class. - - Raises: - ValueError: If `first_stage_features_stride` is not 8 or 16, - or if `architecture` is not supported. - """ - super(FasterRCNNResnet152FeatureExtractor, self).__init__( - 'resnet_v1_152', resnet_v1.resnet_v1_152, is_training, - first_stage_features_stride, batch_norm_trainable, - reuse_weights, weight_decay) diff --git a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py b/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py deleted file mode 100644 index 8f77ee26..00000000 --- a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor.""" - -import numpy as np -import tensorflow as tf - -from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1 - - -class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase): - - def _build_feature_extractor(self, - first_stage_features_stride, - architecture='resnet_v1_101'): - feature_extractor_map = { - 'resnet_v1_50': - faster_rcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor, - 'resnet_v1_101': - faster_rcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor, - 'resnet_v1_152': - faster_rcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor - } - return feature_extractor_map[architecture]( - is_training=False, - first_stage_features_stride=first_stage_features_stride, - batch_norm_trainable=False, - reuse_weights=None, - weight_decay=0.0) - - def test_extract_proposal_features_returns_expected_size(self): - for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']: - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16, architecture=architecture) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 14, 14, 1024]) - - def test_extract_proposal_features_stride_eight(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=8) - preprocessed_inputs = tf.random_uniform( - [4, 224, 224, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [4, 28, 28, 1024]) - - def test_extract_proposal_features_half_size_input(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [1, 112, 112, 3], maxval=255, dtype=tf.float32) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [1, 7, 7, 1024]) - - def test_extract_proposal_features_dies_on_invalid_stride(self): - with self.assertRaises(ValueError): - self._build_feature_extractor(first_stage_features_stride=99) - - def test_extract_proposal_features_dies_on_very_small_images(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - rpn_feature_map = feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - features_shape = tf.shape(rpn_feature_map) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run( - features_shape, - feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)}) - - def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - preprocessed_inputs = tf.random_uniform( - [224, 224, 3], maxval=255, dtype=tf.float32) - with self.assertRaises(ValueError): - feature_extractor.extract_proposal_features( - preprocessed_inputs, scope='TestScope') - - def test_extract_box_classifier_features_returns_expected_size(self): - feature_extractor = self._build_feature_extractor( - first_stage_features_stride=16) - proposal_feature_maps = tf.random_uniform( - [3, 7, 7, 1024], maxval=255, dtype=tf.float32) - proposal_classifier_features = ( - feature_extractor.extract_box_classifier_features( - proposal_feature_maps, scope='TestScope')) - features_shape = tf.shape(proposal_classifier_features) - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - features_shape_out = sess.run(features_shape) - self.assertAllEqual(features_shape_out, [3, 7, 7, 2048]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/feature_map_generators.py b/object_detection/models/feature_map_generators.py deleted file mode 100644 index 8eb7e621..00000000 --- a/object_detection/models/feature_map_generators.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to generate a list of feature maps based on image features. - -Provides several feature map generators that can be used to build object -detection feature extractors. - -Object detection feature extractors usually are built by stacking two components -- A base feature extractor such as Inception V3 and a feature map generator. -Feature map generators build on the base feature extractors and produce a list -of final feature maps. -""" -import collections -import tensorflow as tf -slim = tf.contrib.slim - - -def get_depth_fn(depth_multiplier, min_depth): - """Builds a callable to compute depth (output channels) of conv filters. - - Args: - depth_multiplier: a multiplier for the nominal depth. - min_depth: a lower bound on the depth of filters. - - Returns: - A callable that takes in a nominal depth and returns the depth to use. - """ - def multiply_depth(depth): - new_depth = int(depth * depth_multiplier) - return max(new_depth, min_depth) - return multiply_depth - - -def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, - min_depth, insert_1x1_conv, image_features): - """Generates multi resolution feature maps from input image features. - - Generates multi-scale feature maps for detection as in the SSD papers by - Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. - - More specifically, it performs the following two tasks: - 1) If a layer name is provided in the configuration, returns that layer as a - feature map. - 2) If a layer name is left as an empty string, constructs a new feature map - based on the spatial shape and depth configuration. Note that the current - implementation only supports generating new layers using convolution of - stride 2 resulting in a spatial resolution reduction by a factor of 2. - By default convolution kernel size is set to 3, and it can be customized - by caller. - - An example of the configuration for Inception V3: - { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - - Args: - feature_map_layout: Dictionary of specifications for the feature map - layouts in the following format (Inception V2/V3 respectively): - { - 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - or - { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128] - } - If 'from_layer' is specified, the specified feature map is directly used - as a box predictor layer, and the layer_depth is directly infered from the - feature map (instead of using the provided 'layer_depth' parameter). In - this case, our convention is to set 'layer_depth' to -1 for clarity. - Otherwise, if 'from_layer' is an empty string, then the box predictor - layer will be built from the previous layer using convolution operations. - Note that the current implementation only supports generating new layers - using convolutions of stride 2 (resulting in a spatial resolution - reduction by a factor of 2), and will be extended to a more flexible - design. Convolution kernel size is set to 3 by default, and can be - customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size' - should be set to -1 if 'from_layer' is specified). The created convolution - operation will be a normal 2D convolution by default, and a depthwise - convolution followed by 1x1 convolution if 'use_depthwise' is set to True. - depth_multiplier: Depth multiplier for convolutional layers. - min_depth: Minimum depth for convolutional layers. - insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution - should be inserted before shrinking the feature map. - image_features: A dictionary of handles to activation tensors from the - base feature extractor. - - Returns: - feature_maps: an OrderedDict mapping keys (feature map names) to - tensors where each tensor has shape [batch, height_i, width_i, depth_i]. - - Raises: - ValueError: if the number entries in 'from_layer' and - 'layer_depth' do not match. - ValueError: if the generated layer does not have the same resolution - as specified. - """ - depth_fn = get_depth_fn(depth_multiplier, min_depth) - - feature_map_keys = [] - feature_maps = [] - base_from_layer = '' - use_depthwise = False - if 'use_depthwise' in feature_map_layout: - use_depthwise = feature_map_layout['use_depthwise'] - for index, from_layer in enumerate(feature_map_layout['from_layer']): - layer_depth = feature_map_layout['layer_depth'][index] - conv_kernel_size = 3 - if 'conv_kernel_size' in feature_map_layout: - conv_kernel_size = feature_map_layout['conv_kernel_size'][index] - if from_layer: - feature_map = image_features[from_layer] - base_from_layer = from_layer - feature_map_keys.append(from_layer) - else: - pre_layer = feature_maps[-1] - intermediate_layer = pre_layer - if insert_1x1_conv: - layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( - base_from_layer, index, depth_fn(layer_depth / 2)) - intermediate_layer = slim.conv2d( - pre_layer, - depth_fn(layer_depth / 2), [1, 1], - padding='SAME', - stride=1, - scope=layer_name) - stride = 2 - layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( - base_from_layer, index, conv_kernel_size, conv_kernel_size, - depth_fn(layer_depth)) - if use_depthwise: - feature_map = slim.separable_conv2d( - intermediate_layer, - None, [conv_kernel_size, conv_kernel_size], - depth_multiplier=1, - padding='SAME', - stride=stride, - scope=layer_name + '_depthwise') - feature_map = slim.conv2d( - feature_map, - depth_fn(layer_depth), [1, 1], - padding='SAME', - stride=1, - scope=layer_name) - else: - feature_map = slim.conv2d( - intermediate_layer, - depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], - padding='SAME', - stride=stride, - scope=layer_name) - feature_map_keys.append(layer_name) - feature_maps.append(feature_map) - return collections.OrderedDict( - [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)]) diff --git a/object_detection/models/feature_map_generators_test.py b/object_detection/models/feature_map_generators_test.py deleted file mode 100644 index cb69f0e4..00000000 --- a/object_detection/models/feature_map_generators_test.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for feature map generators.""" - -import tensorflow as tf - -from object_detection.models import feature_map_generators - -INCEPTION_V2_LAYOUT = { - 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 256], - 'anchor_strides': [16, 32, 64, -1, -1, -1], - 'layer_target_norm': [20.0, -1, -1, -1, -1, -1], -} - -INCEPTION_V3_LAYOUT = { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128], - 'anchor_strides': [16, 32, 64, -1, -1, -1], - 'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3] -} - -EMBEDDED_SSD_MOBILENET_V1_LAYOUT = { - 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], - 'layer_depth': [-1, -1, 512, 256, 256], - 'conv_kernel_size': [-1, -1, 3, 3, 2], -} - - -# TODO(rathodv): add tests with different anchor strides. -class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase): - - def test_get_expected_feature_map_shapes_with_inception_v2(self): - image_features = { - 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32), - 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32), - 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32) - } - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=INCEPTION_V2_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Mixed_3c': (4, 28, 28, 256), - 'Mixed_4c': (4, 14, 14, 576), - 'Mixed_5c': (4, 7, 7, 1024), - 'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), - 'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), - 'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - def test_get_expected_feature_map_shapes_with_inception_v3(self): - image_features = { - 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), - 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), - 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) - } - - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=INCEPTION_V3_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Mixed_5d': (4, 35, 35, 256), - 'Mixed_6e': (4, 17, 17, 576), - 'Mixed_7c': (4, 8, 8, 1024), - 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), - 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), - 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( - self): - image_features = { - 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], - dtype=tf.float32), - 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], - dtype=tf.float32), - } - - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, - depth_multiplier=1, - min_depth=32, - insert_1x1_conv=True, - image_features=image_features) - - expected_feature_map_shapes = { - 'Conv2d_11_pointwise': (4, 16, 16, 512), - 'Conv2d_13_pointwise': (4, 8, 8, 1024), - 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), - 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), - 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)} - - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - out_feature_maps = sess.run(feature_maps) - out_feature_map_shapes = dict( - (key, value.shape) for key, value in out_feature_maps.items()) - self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes) - - -class GetDepthFunctionTest(tf.test.TestCase): - - def test_return_min_depth_when_multiplier_is_small(self): - depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5, - min_depth=16) - self.assertEqual(depth_fn(16), 16) - - def test_return_correct_depth_with_multiplier(self): - depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5, - min_depth=16) - self.assertEqual(depth_fn(64), 32) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/ssd_feature_extractor_test.py b/object_detection/models/ssd_feature_extractor_test.py deleted file mode 100644 index 0b3da468..00000000 --- a/object_detection/models/ssd_feature_extractor_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base test class SSDFeatureExtractors.""" - -from abc import abstractmethod - -import numpy as np -import tensorflow as tf - - -class SsdFeatureExtractorTestBase(object): - - def _validate_features_shape(self, - feature_extractor, - preprocessed_inputs, - expected_feature_map_shapes): - """Checks the extracted features are of correct shape. - - Args: - feature_extractor: The feature extractor to test. - preprocessed_inputs: A [batch, height, width, 3] tensor to extract - features with. - expected_feature_map_shapes: The expected shape of the extracted features. - """ - feature_maps = feature_extractor.extract_features(preprocessed_inputs) - feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - feature_map_shapes_out = sess.run(feature_map_shapes) - for shape_out, exp_shape_out in zip( - feature_map_shapes_out, expected_feature_map_shapes): - self.assertAllEqual(shape_out, exp_shape_out) - - @abstractmethod - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - pass - - def check_extract_features_returns_correct_shape( - self, image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shapes_out): - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_inputs = tf.random_uniform( - [4, image_height, image_width, 3], dtype=tf.float32) - self._validate_features_shape( - feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out) - - def check_extract_features_raises_error_with_invalid_image_size( - self, image_height, image_width, depth_multiplier, pad_to_multiple): - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - feature_maps = feature_extractor.extract_features(preprocessed_inputs) - test_preprocessed_image = np.random.rand(4, image_height, image_width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - with self.assertRaises(tf.errors.InvalidArgumentError): - sess.run(feature_maps, - feed_dict={preprocessed_inputs: test_preprocessed_image}) - - def check_feature_extractor_variables_under_scope( - self, depth_multiplier, pad_to_multiple, scope_name): - g = tf.Graph() - with g.as_default(): - feature_extractor = self._create_feature_extractor( - depth_multiplier, pad_to_multiple) - preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3)) - feature_extractor.extract_features(preprocessed_inputs) - variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) - for variable in variables: - self.assertTrue(variable.name.startswith(scope_name)) diff --git a/object_detection/models/ssd_inception_v2_feature_extractor.py b/object_detection/models/ssd_inception_v2_feature_extractor.py deleted file mode 100644 index d1685d7f..00000000 --- a/object_detection/models/ssd_inception_v2_feature_extractor.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for InceptionV2 features.""" -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import ops -from nets import inception_v2 - -slim = tf.contrib.slim - - -class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using InceptionV2 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams, - batch_norm_trainable=True, - reuse_weights=None): - """InceptionV2 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a small batch size - (e.g. 1), it is desirable to disable batch norm update and use - pretrained batch norm params. - reuse_weights: Whether to reuse variables. Default is None. - """ - super(SSDInceptionV2FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable, reuse_weights) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - feature_map_layout = { - 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], - 'layer_depth': [-1, -1, 512, 256, 256, 128], - } - - with tf.control_dependencies([shape_assert]): - with slim.arg_scope(self._conv_hyperparams): - with tf.variable_scope('InceptionV2', - reuse=self._reuse_weights) as scope: - _, image_features = inception_v2.inception_v2_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Mixed_5c', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/object_detection/models/ssd_inception_v2_feature_extractor_test.py b/object_detection/models/ssd_inception_v2_feature_extractor_test.py deleted file mode 100644 index b265ccb0..00000000 --- a/object_detection/models/ssd_inception_v2_feature_extractor_test.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.ssd_inception_v2_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_inception_v2_feature_extractor - - -class SsdInceptionV2FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True, batch_norm_trainable=True): - """Constructs a SsdInceptionV2FeatureExtractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - batch_norm_trainable: Whether to update batch norm parameters during - training or not - Returns: - an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor. - """ - min_depth = 32 - conv_hyperparams = {} - return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024), - (4, 2, 2, 512), (4, 1, 1, 256), - (4, 1, 1, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024), - (4, 5, 5, 512), (4, 3, 3, 256), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128), - (4, 5, 5, 32), (4, 3, 3, 32), - (4, 2, 2, 32), (4, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024), - (4, 5, 5, 512), (4, 3, 3, 256), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'InceptionV2' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/ssd_inception_v3_feature_extractor.py b/object_detection/models/ssd_inception_v3_feature_extractor.py deleted file mode 100644 index 3a782eb2..00000000 --- a/object_detection/models/ssd_inception_v3_feature_extractor.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for InceptionV3 features.""" -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import ops -from nets import inception_v3 - -slim = tf.contrib.slim - - -class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using InceptionV3 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams, - batch_norm_trainable=True, - reuse_weights=None): - """InceptionV3 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a small batch size - (e.g. 1), it is desirable to disable batch norm update and use - pretrained batch norm params. - reuse_weights: Whether to reuse variables. Default is None. - """ - super(SSDInceptionV3FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable, reuse_weights) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - feature_map_layout = { - 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], - 'layer_depth': [-1, -1, -1, 512, 256, 128], - } - - with tf.control_dependencies([shape_assert]): - with slim.arg_scope(self._conv_hyperparams): - with tf.variable_scope('InceptionV3', - reuse=self._reuse_weights) as scope: - _, image_features = inception_v3.inception_v3_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Mixed_7c', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/object_detection/models/ssd_inception_v3_feature_extractor_test.py b/object_detection/models/ssd_inception_v3_feature_extractor_test.py deleted file mode 100644 index 89c1a288..00000000 --- a/object_detection/models/ssd_inception_v3_feature_extractor_test.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.models.ssd_inception_v3_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_inception_v3_feature_extractor - - -class SsdInceptionV3FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True, batch_norm_trainable=True): - """Constructs a SsdInceptionV3FeatureExtractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - batch_norm_trainable: Whether to update batch norm parameters during - training or not - Returns: - an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor. - """ - min_depth = 32 - conv_hyperparams = {} - return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768), - (4, 2, 2, 2048), (4, 1, 1, 512), - (4, 1, 1, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768), - (4, 8, 8, 2048), (4, 4, 4, 512), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128), - (4, 8, 8, 192), (4, 4, 4, 32), - (4, 2, 2, 32), (4, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768), - (4, 8, 8, 2048), (4, 4, 4, 512), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'InceptionV3' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/models/ssd_mobilenet_v1_feature_extractor.py b/object_detection/models/ssd_mobilenet_v1_feature_extractor.py deleted file mode 100644 index 456e2d1d..00000000 --- a/object_detection/models/ssd_mobilenet_v1_feature_extractor.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SSDFeatureExtractor for MobilenetV1 features.""" - -import tensorflow as tf - -from object_detection.meta_architectures import ssd_meta_arch -from object_detection.models import feature_map_generators -from object_detection.utils import ops -from nets import mobilenet_v1 - -slim = tf.contrib.slim - - -class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): - """SSD Feature Extractor using MobilenetV1 features.""" - - def __init__(self, - is_training, - depth_multiplier, - min_depth, - pad_to_multiple, - conv_hyperparams, - batch_norm_trainable=True, - reuse_weights=None): - """MobileNetV1 Feature Extractor for SSD Models. - - Args: - is_training: whether the network is in training mode. - depth_multiplier: float depth multiplier for feature extractor. - min_depth: minimum feature extractor depth. - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. When training with a small batch size - (e.g. 1), it is desirable to disable batch norm update and use - pretrained batch norm params. - reuse_weights: Whether to reuse variables. Default is None. - """ - super(SSDMobileNetV1FeatureExtractor, self).__init__( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable, reuse_weights) - - def preprocess(self, resized_inputs): - """SSD preprocessing. - - Maps pixel values to the range [-1, 1]. - - Args: - resized_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - """ - return (2.0 / 255.0) * resized_inputs - 1.0 - - def extract_features(self, preprocessed_inputs): - """Extract features from preprocessed inputs. - - Args: - preprocessed_inputs: a [batch, height, width, channels] float tensor - representing a batch of images. - - Returns: - feature_maps: a list of tensors where the ith tensor has shape - [batch, height_i, width_i, depth_i] - """ - preprocessed_inputs.get_shape().assert_has_rank(4) - shape_assert = tf.Assert( - tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), - tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), - ['image size must at least be 33 in both height and width.']) - - feature_map_layout = { - 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', - '', ''], - 'layer_depth': [-1, -1, 512, 256, 256, 128], - } - - with tf.control_dependencies([shape_assert]): - with slim.arg_scope(self._conv_hyperparams): - with slim.arg_scope([slim.batch_norm], fused=False): - with tf.variable_scope('MobilenetV1', - reuse=self._reuse_weights) as scope: - _, image_features = mobilenet_v1.mobilenet_v1_base( - ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), - final_endpoint='Conv2d_13_pointwise', - min_depth=self._min_depth, - depth_multiplier=self._depth_multiplier, - scope=scope) - feature_maps = feature_map_generators.multi_resolution_feature_maps( - feature_map_layout=feature_map_layout, - depth_multiplier=self._depth_multiplier, - min_depth=self._min_depth, - insert_1x1_conv=True, - image_features=image_features) - - return feature_maps.values() diff --git a/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py b/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py deleted file mode 100644 index 9159ceb1..00000000 --- a/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for ssd_mobilenet_v1_feature_extractor.""" -import numpy as np -import tensorflow as tf - -from object_detection.models import ssd_feature_extractor_test -from object_detection.models import ssd_mobilenet_v1_feature_extractor - -slim = tf.contrib.slim - - -class SsdMobilenetV1FeatureExtractorTest( - ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase): - - def _create_feature_extractor(self, depth_multiplier, pad_to_multiple, - is_training=True, batch_norm_trainable=True): - """Constructs a new feature extractor. - - Args: - depth_multiplier: float depth multiplier for feature extractor - pad_to_multiple: the nearest multiple to zero pad the input height and - width dimensions to. - is_training: whether the network is in training mode. - batch_norm_trainable: Whether to update batch norm parameters during - training or not. - Returns: - an ssd_meta_arch.SSDFeatureExtractor object. - """ - min_depth = 32 - with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc: - conv_hyperparams = sc - return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor( - is_training, depth_multiplier, min_depth, pad_to_multiple, - conv_hyperparams, batch_norm_trainable) - - def test_extract_features_returns_correct_shapes_128(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024), - (4, 2, 2, 512), (4, 1, 1, 256), - (4, 1, 1, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_299(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024), - (4, 5, 5, 512), (4, 3, 3, 256), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_enforcing_min_depth(self): - image_height = 299 - image_width = 299 - depth_multiplier = 0.5**12 - pad_to_multiple = 1 - expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32), - (4, 5, 5, 32), (4, 3, 3, 32), - (4, 2, 2, 32), (4, 1, 1, 32)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self): - image_height = 299 - image_width = 299 - depth_multiplier = 1.0 - pad_to_multiple = 32 - expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024), - (4, 5, 5, 512), (4, 3, 3, 256), - (4, 2, 2, 256), (4, 1, 1, 128)] - self.check_extract_features_returns_correct_shape( - image_height, image_width, depth_multiplier, pad_to_multiple, - expected_feature_map_shape) - - def test_extract_features_raises_error_with_invalid_image_size(self): - image_height = 32 - image_width = 32 - depth_multiplier = 1.0 - pad_to_multiple = 1 - self.check_extract_features_raises_error_with_invalid_image_size( - image_height, image_width, depth_multiplier, pad_to_multiple) - - def test_preprocess_returns_correct_value_range(self): - image_height = 128 - image_width = 128 - depth_multiplier = 1 - pad_to_multiple = 1 - test_image = np.random.rand(4, image_height, image_width, 3) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(test_image) - self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0))) - - def test_variables_only_created_in_scope(self): - depth_multiplier = 1 - pad_to_multiple = 1 - scope_name = 'MobilenetV1' - self.check_feature_extractor_variables_under_scope( - depth_multiplier, pad_to_multiple, scope_name) - - def test_nofused_batchnorm(self): - image_height = 40 - image_width = 40 - depth_multiplier = 1 - pad_to_multiple = 1 - image_placeholder = tf.placeholder(tf.float32, - [1, image_height, image_width, 3]) - feature_extractor = self._create_feature_extractor(depth_multiplier, - pad_to_multiple) - preprocessed_image = feature_extractor.preprocess(image_placeholder) - _ = feature_extractor.extract_features(preprocessed_image) - self.assertFalse(any(op.type == 'FusedBatchNorm' - for op in tf.get_default_graph().get_operations())) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/object_detection_tutorial.ipynb b/object_detection/object_detection_tutorial.ipynb deleted file mode 100644 index 6e251ff4..00000000 --- a/object_detection/object_detection_tutorial.ipynb +++ /dev/null @@ -1,298 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Object Detection Demo\n", - "Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import os\n", - "import six.moves.urllib as urllib\n", - "import sys\n", - "import tarfile\n", - "import tensorflow as tf\n", - "import zipfile\n", - "\n", - "from collections import defaultdict\n", - "from io import StringIO\n", - "from matplotlib import pyplot as plt\n", - "from PIL import Image\n", - "\n", - "if tf.__version__ < '1.4.0':\n", - " raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Env setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is needed to display the images.\n", - "%matplotlib inline\n", - "\n", - "# This is needed since the notebook is stored in the object_detection folder.\n", - "sys.path.append(\"..\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Object detection imports\n", - "Here are the imports from the object detection module." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import label_map_util\n", - "\n", - "from utils import visualization_utils as vis_util" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Model preparation " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Variables\n", - "\n", - "Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. \n", - "\n", - "By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# What model to download.\n", - "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n", - "MODEL_FILE = MODEL_NAME + '.tar.gz'\n", - "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n", - "\n", - "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n", - "PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n", - "\n", - "# List of the strings that is used to add correct label for each box.\n", - "PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n", - "\n", - "NUM_CLASSES = 90" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "opener = urllib.request.URLopener()\n", - "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n", - "tar_file = tarfile.open(MODEL_FILE)\n", - "for file in tar_file.getmembers():\n", - " file_name = os.path.basename(file.name)\n", - " if 'frozen_inference_graph.pb' in file_name:\n", - " tar_file.extract(file, os.getcwd())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load a (frozen) Tensorflow model into memory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "detection_graph = tf.Graph()\n", - "with detection_graph.as_default():\n", - " od_graph_def = tf.GraphDef()\n", - " with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n", - " serialized_graph = fid.read()\n", - " od_graph_def.ParseFromString(serialized_graph)\n", - " tf.import_graph_def(od_graph_def, name='')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading label map\n", - "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n", - "categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n", - "category_index = label_map_util.create_category_index(categories)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Helper code" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def load_image_into_numpy_array(image):\n", - " (im_width, im_height) = image.size\n", - " return np.array(image.getdata()).reshape(\n", - " (im_height, im_width, 3)).astype(np.uint8)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Detection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# For the sake of simplicity we will use only 2 images:\n", - "# image1.jpg\n", - "# image2.jpg\n", - "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n", - "PATH_TO_TEST_IMAGES_DIR = 'test_images'\n", - "TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n", - "\n", - "# Size, in inches, of the output images.\n", - "IMAGE_SIZE = (12, 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "with detection_graph.as_default():\n", - " with tf.Session(graph=detection_graph) as sess:\n", - " # Definite input and output Tensors for detection_graph\n", - " image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n", - " # Each box represents a part of the image where a particular object was detected.\n", - " detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n", - " # Each score represent how level of confidence for each of the objects.\n", - " # Score is shown on the result image, together with the class label.\n", - " detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')\n", - " detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')\n", - " num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n", - " for image_path in TEST_IMAGE_PATHS:\n", - " image = Image.open(image_path)\n", - " # the array based representation of the image will be used later in order to prepare the\n", - " # result image with boxes and labels on it.\n", - " image_np = load_image_into_numpy_array(image)\n", - " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", - " image_np_expanded = np.expand_dims(image_np, axis=0)\n", - " # Actual detection.\n", - " (boxes, scores, classes, num) = sess.run(\n", - " [detection_boxes, detection_scores, detection_classes, num_detections],\n", - " feed_dict={image_tensor: image_np_expanded})\n", - " # Visualization of the results of a detection.\n", - " vis_util.visualize_boxes_and_labels_on_image_array(\n", - " image_np,\n", - " np.squeeze(boxes),\n", - " np.squeeze(classes).astype(np.int32),\n", - " np.squeeze(scores),\n", - " category_index,\n", - " use_normalized_coordinates=True,\n", - " line_thickness=8)\n", - " plt.figure(figsize=IMAGE_SIZE)\n", - " plt.imshow(image_np)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/object_detection/protos/BUILD b/object_detection/protos/BUILD deleted file mode 100644 index 1b7eb148..00000000 --- a/object_detection/protos/BUILD +++ /dev/null @@ -1,341 +0,0 @@ -# Tensorflow Object Detection API: Configuration protos. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -proto_library( - name = "argmax_matcher_proto", - srcs = ["argmax_matcher.proto"], -) - -py_proto_library( - name = "argmax_matcher_py_pb2", - api_version = 2, - deps = [":argmax_matcher_proto"], -) - -proto_library( - name = "bipartite_matcher_proto", - srcs = ["bipartite_matcher.proto"], -) - -py_proto_library( - name = "bipartite_matcher_py_pb2", - api_version = 2, - deps = [":bipartite_matcher_proto"], -) - -proto_library( - name = "matcher_proto", - srcs = ["matcher.proto"], - deps = [ - ":argmax_matcher_proto", - ":bipartite_matcher_proto", - ], -) - -py_proto_library( - name = "matcher_py_pb2", - api_version = 2, - deps = [":matcher_proto"], -) - -proto_library( - name = "faster_rcnn_box_coder_proto", - srcs = ["faster_rcnn_box_coder.proto"], -) - -py_proto_library( - name = "faster_rcnn_box_coder_py_pb2", - api_version = 2, - deps = [":faster_rcnn_box_coder_proto"], -) - -proto_library( - name = "keypoint_box_coder_proto", - srcs = ["keypoint_box_coder.proto"], -) - -py_proto_library( - name = "keypoint_box_coder_py_pb2", - api_version = 2, - deps = [":keypoint_box_coder_proto"], -) - -proto_library( - name = "mean_stddev_box_coder_proto", - srcs = ["mean_stddev_box_coder.proto"], -) - -py_proto_library( - name = "mean_stddev_box_coder_py_pb2", - api_version = 2, - deps = [":mean_stddev_box_coder_proto"], -) - -proto_library( - name = "square_box_coder_proto", - srcs = ["square_box_coder.proto"], -) - -py_proto_library( - name = "square_box_coder_py_pb2", - api_version = 2, - deps = [":square_box_coder_proto"], -) - -proto_library( - name = "box_coder_proto", - srcs = ["box_coder.proto"], - deps = [ - ":faster_rcnn_box_coder_proto", - ":keypoint_box_coder_proto", - ":mean_stddev_box_coder_proto", - ":square_box_coder_proto", - ], -) - -py_proto_library( - name = "box_coder_py_pb2", - api_version = 2, - deps = [":box_coder_proto"], -) - -proto_library( - name = "grid_anchor_generator_proto", - srcs = ["grid_anchor_generator.proto"], -) - -py_proto_library( - name = "grid_anchor_generator_py_pb2", - api_version = 2, - deps = [":grid_anchor_generator_proto"], -) - -proto_library( - name = "ssd_anchor_generator_proto", - srcs = ["ssd_anchor_generator.proto"], -) - -py_proto_library( - name = "ssd_anchor_generator_py_pb2", - api_version = 2, - deps = [":ssd_anchor_generator_proto"], -) - -proto_library( - name = "anchor_generator_proto", - srcs = ["anchor_generator.proto"], - deps = [ - ":grid_anchor_generator_proto", - ":ssd_anchor_generator_proto", - ], -) - -py_proto_library( - name = "anchor_generator_py_pb2", - api_version = 2, - deps = [":anchor_generator_proto"], -) - -proto_library( - name = "input_reader_proto", - srcs = ["input_reader.proto"], -) - -py_proto_library( - name = "input_reader_py_pb2", - api_version = 2, - deps = [":input_reader_proto"], -) - -proto_library( - name = "losses_proto", - srcs = ["losses.proto"], -) - -py_proto_library( - name = "losses_py_pb2", - api_version = 2, - deps = [":losses_proto"], -) - -proto_library( - name = "optimizer_proto", - srcs = ["optimizer.proto"], -) - -py_proto_library( - name = "optimizer_py_pb2", - api_version = 2, - deps = [":optimizer_proto"], -) - -proto_library( - name = "post_processing_proto", - srcs = ["post_processing.proto"], -) - -py_proto_library( - name = "post_processing_py_pb2", - api_version = 2, - deps = [":post_processing_proto"], -) - -proto_library( - name = "hyperparams_proto", - srcs = ["hyperparams.proto"], -) - -py_proto_library( - name = "hyperparams_py_pb2", - api_version = 2, - deps = [":hyperparams_proto"], -) - -proto_library( - name = "box_predictor_proto", - srcs = ["box_predictor.proto"], - deps = [":hyperparams_proto"], -) - -py_proto_library( - name = "box_predictor_py_pb2", - api_version = 2, - deps = [":box_predictor_proto"], -) - -proto_library( - name = "region_similarity_calculator_proto", - srcs = ["region_similarity_calculator.proto"], - deps = [], -) - -py_proto_library( - name = "region_similarity_calculator_py_pb2", - api_version = 2, - deps = [":region_similarity_calculator_proto"], -) - -proto_library( - name = "preprocessor_proto", - srcs = ["preprocessor.proto"], -) - -py_proto_library( - name = "preprocessor_py_pb2", - api_version = 2, - deps = [":preprocessor_proto"], -) - -proto_library( - name = "train_proto", - srcs = ["train.proto"], - deps = [ - ":optimizer_proto", - ":preprocessor_proto", - ], -) - -py_proto_library( - name = "train_py_pb2", - api_version = 2, - deps = [":train_proto"], -) - -proto_library( - name = "eval_proto", - srcs = ["eval.proto"], -) - -py_proto_library( - name = "eval_py_pb2", - api_version = 2, - deps = [":eval_proto"], -) - -proto_library( - name = "image_resizer_proto", - srcs = ["image_resizer.proto"], -) - -py_proto_library( - name = "image_resizer_py_pb2", - api_version = 2, - deps = [":image_resizer_proto"], -) - -proto_library( - name = "faster_rcnn_proto", - srcs = ["faster_rcnn.proto"], - deps = [ - ":box_predictor_proto", - "//object_detection/protos:anchor_generator_proto", - "//object_detection/protos:hyperparams_proto", - "//object_detection/protos:image_resizer_proto", - "//object_detection/protos:losses_proto", - "//object_detection/protos:post_processing_proto", - ], -) - -proto_library( - name = "ssd_proto", - srcs = ["ssd.proto"], - deps = [ - ":anchor_generator_proto", - ":box_coder_proto", - ":box_predictor_proto", - ":hyperparams_proto", - ":image_resizer_proto", - ":losses_proto", - ":matcher_proto", - ":post_processing_proto", - ":region_similarity_calculator_proto", - ], -) - -proto_library( - name = "model_proto", - srcs = ["model.proto"], - deps = [ - ":faster_rcnn_proto", - ":ssd_proto", - ], -) - -py_proto_library( - name = "model_py_pb2", - api_version = 2, - deps = [":model_proto"], -) - -proto_library( - name = "pipeline_proto", - srcs = ["pipeline.proto"], - deps = [ - ":eval_proto", - ":input_reader_proto", - ":model_proto", - ":train_proto", - ], -) - -py_proto_library( - name = "pipeline_py_pb2", - api_version = 2, - deps = [":pipeline_proto"], -) - -proto_library( - name = "string_int_label_map_proto", - srcs = ["string_int_label_map.proto"], -) - -py_proto_library( - name = "string_int_label_map_py_pb2", - api_version = 2, - deps = [":string_int_label_map_proto"], -) diff --git a/object_detection/protos/__init__.py b/object_detection/protos/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/protos/__pycache__/__init__.cpython-35.pyc b/object_detection/protos/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 4feb7bdd..00000000 Binary files a/object_detection/protos/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc deleted file mode 100644 index e2124087..00000000 Binary files a/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc deleted file mode 100644 index 9bf3b1ca..00000000 Binary files a/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc deleted file mode 100644 index 50042354..00000000 Binary files a/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc deleted file mode 100644 index 59ba53c1..00000000 Binary files a/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc deleted file mode 100644 index 5fa2b6d0..00000000 Binary files a/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc deleted file mode 100644 index 204e286b..00000000 Binary files a/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc deleted file mode 100644 index 373d5d2d..00000000 Binary files a/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc deleted file mode 100644 index ab8f8fa6..00000000 Binary files a/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc deleted file mode 100644 index 39621fcb..00000000 Binary files a/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc deleted file mode 100644 index be194d05..00000000 Binary files a/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc deleted file mode 100644 index cafe3f70..00000000 Binary files a/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc deleted file mode 100644 index c09e601b..00000000 Binary files a/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc deleted file mode 100644 index 805f3829..00000000 Binary files a/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc deleted file mode 100644 index 030d1a3b..00000000 Binary files a/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc deleted file mode 100644 index c7983bf9..00000000 Binary files a/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc deleted file mode 100644 index 1154fa6b..00000000 Binary files a/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc deleted file mode 100644 index 80e7e0e4..00000000 Binary files a/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc deleted file mode 100644 index ce52d0f8..00000000 Binary files a/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc deleted file mode 100644 index 3c1c062c..00000000 Binary files a/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc deleted file mode 100644 index 9091ba12..00000000 Binary files a/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc deleted file mode 100644 index dd4491c8..00000000 Binary files a/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc deleted file mode 100644 index bf9f25f2..00000000 Binary files a/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc deleted file mode 100644 index 1c38e2ee..00000000 Binary files a/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc deleted file mode 100644 index 19f6c085..00000000 Binary files a/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc deleted file mode 100644 index 9a4405d9..00000000 Binary files a/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc deleted file mode 100644 index 5c608546..00000000 Binary files a/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc deleted file mode 100644 index aa7ae131..00000000 Binary files a/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc and /dev/null differ diff --git a/object_detection/protos/anchor_generator.proto b/object_detection/protos/anchor_generator.proto deleted file mode 100644 index 4b7b1d62..00000000 --- a/object_detection/protos/anchor_generator.proto +++ /dev/null @@ -1,15 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/grid_anchor_generator.proto"; -import "object_detection/protos/ssd_anchor_generator.proto"; - -// Configuration proto for the anchor generator to use in the object detection -// pipeline. See core/anchor_generator.py for details. -message AnchorGenerator { - oneof anchor_generator_oneof { - GridAnchorGenerator grid_anchor_generator = 1; - SsdAnchorGenerator ssd_anchor_generator = 2; - } -} diff --git a/object_detection/protos/anchor_generator_pb2.py b/object_detection/protos/anchor_generator_pb2.py deleted file mode 100644 index 748848a9..00000000 --- a/object_detection/protos/anchor_generator_pb2.py +++ /dev/null @@ -1,90 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/anchor_generator.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import grid_anchor_generator_pb2 as object__detection_dot_protos_dot_grid__anchor__generator__pb2 -from object_detection.protos import ssd_anchor_generator_pb2 as object__detection_dot_protos_dot_ssd__anchor__generator__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/anchor_generator.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n.object_detection/protos/anchor_generator.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/grid_anchor_generator.proto\x1a\x32object_detection/protos/ssd_anchor_generator.proto\"\xc7\x01\n\x0f\x41nchorGenerator\x12M\n\x15grid_anchor_generator\x18\x01 \x01(\x0b\x32,.object_detection.protos.GridAnchorGeneratorH\x00\x12K\n\x14ssd_anchor_generator\x18\x02 \x01(\x0b\x32+.object_detection.protos.SsdAnchorGeneratorH\x00\x42\x18\n\x16\x61nchor_generator_oneof') - , - dependencies=[object__detection_dot_protos_dot_grid__anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_ssd__anchor__generator__pb2.DESCRIPTOR,]) - - - - -_ANCHORGENERATOR = _descriptor.Descriptor( - name='AnchorGenerator', - full_name='object_detection.protos.AnchorGenerator', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='grid_anchor_generator', full_name='object_detection.protos.AnchorGenerator.grid_anchor_generator', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd_anchor_generator', full_name='object_detection.protos.AnchorGenerator.ssd_anchor_generator', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='anchor_generator_oneof', full_name='object_detection.protos.AnchorGenerator.anchor_generator_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=181, - serialized_end=380, -) - -_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].message_type = object__detection_dot_protos_dot_grid__anchor__generator__pb2._GRIDANCHORGENERATOR -_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].message_type = object__detection_dot_protos_dot_ssd__anchor__generator__pb2._SSDANCHORGENERATOR -_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append( - _ANCHORGENERATOR.fields_by_name['grid_anchor_generator']) -_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'] -_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append( - _ANCHORGENERATOR.fields_by_name['ssd_anchor_generator']) -_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'] -DESCRIPTOR.message_types_by_name['AnchorGenerator'] = _ANCHORGENERATOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -AnchorGenerator = _reflection.GeneratedProtocolMessageType('AnchorGenerator', (_message.Message,), dict( - DESCRIPTOR = _ANCHORGENERATOR, - __module__ = 'object_detection.protos.anchor_generator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.AnchorGenerator) - )) -_sym_db.RegisterMessage(AnchorGenerator) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/argmax_matcher.proto b/object_detection/protos/argmax_matcher.proto deleted file mode 100644 index 88c50318..00000000 --- a/object_detection/protos/argmax_matcher.proto +++ /dev/null @@ -1,25 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for ArgMaxMatcher. See -// matchers/argmax_matcher.py for details. -message ArgMaxMatcher { - // Threshold for positive matches. - optional float matched_threshold = 1 [default = 0.5]; - - // Threshold for negative matches. - optional float unmatched_threshold = 2 [default = 0.5]; - - // Whether to construct ArgMaxMatcher without thresholds. - optional bool ignore_thresholds = 3 [default = false]; - - // If True then negative matches are the ones below the unmatched_threshold, - // whereas ignored matches are in between the matched and umatched - // threshold. If False, then negative matches are in between the matched - // and unmatched threshold, and everything lower than unmatched is ignored. - optional bool negatives_lower_than_unmatched = 4 [default = true]; - - // Whether to ensure each row is matched to at least one column. - optional bool force_match_for_each_row = 5 [default = false]; -} diff --git a/object_detection/protos/argmax_matcher_pb2.py b/object_detection/protos/argmax_matcher_pb2.py deleted file mode 100644 index 8c78f3ab..00000000 --- a/object_detection/protos/argmax_matcher_pb2.py +++ /dev/null @@ -1,97 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/argmax_matcher.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/argmax_matcher.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n,object_detection/protos/argmax_matcher.proto\x12\x17object_detection.protos\"\xca\x01\n\rArgMaxMatcher\x12\x1e\n\x11matched_threshold\x18\x01 \x01(\x02:\x03\x30.5\x12 \n\x13unmatched_threshold\x18\x02 \x01(\x02:\x03\x30.5\x12 \n\x11ignore_thresholds\x18\x03 \x01(\x08:\x05\x66\x61lse\x12,\n\x1enegatives_lower_than_unmatched\x18\x04 \x01(\x08:\x04true\x12\'\n\x18\x66orce_match_for_each_row\x18\x05 \x01(\x08:\x05\x66\x61lse') -) - - - - -_ARGMAXMATCHER = _descriptor.Descriptor( - name='ArgMaxMatcher', - full_name='object_detection.protos.ArgMaxMatcher', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='matched_threshold', full_name='object_detection.protos.ArgMaxMatcher.matched_threshold', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='unmatched_threshold', full_name='object_detection.protos.ArgMaxMatcher.unmatched_threshold', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ignore_thresholds', full_name='object_detection.protos.ArgMaxMatcher.ignore_thresholds', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='negatives_lower_than_unmatched', full_name='object_detection.protos.ArgMaxMatcher.negatives_lower_than_unmatched', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='force_match_for_each_row', full_name='object_detection.protos.ArgMaxMatcher.force_match_for_each_row', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=74, - serialized_end=276, -) - -DESCRIPTOR.message_types_by_name['ArgMaxMatcher'] = _ARGMAXMATCHER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ArgMaxMatcher = _reflection.GeneratedProtocolMessageType('ArgMaxMatcher', (_message.Message,), dict( - DESCRIPTOR = _ARGMAXMATCHER, - __module__ = 'object_detection.protos.argmax_matcher_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ArgMaxMatcher) - )) -_sym_db.RegisterMessage(ArgMaxMatcher) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/bipartite_matcher.proto b/object_detection/protos/bipartite_matcher.proto deleted file mode 100644 index 7e5a9e5c..00000000 --- a/object_detection/protos/bipartite_matcher.proto +++ /dev/null @@ -1,8 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for bipartite matcher. See -// matchers/bipartite_matcher.py for details. -message BipartiteMatcher { -} diff --git a/object_detection/protos/bipartite_matcher_pb2.py b/object_detection/protos/bipartite_matcher_pb2.py deleted file mode 100644 index dc258ec8..00000000 --- a/object_detection/protos/bipartite_matcher_pb2.py +++ /dev/null @@ -1,62 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/bipartite_matcher.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/bipartite_matcher.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n/object_detection/protos/bipartite_matcher.proto\x12\x17object_detection.protos\"\x12\n\x10\x42ipartiteMatcher') -) - - - - -_BIPARTITEMATCHER = _descriptor.Descriptor( - name='BipartiteMatcher', - full_name='object_detection.protos.BipartiteMatcher', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=76, - serialized_end=94, -) - -DESCRIPTOR.message_types_by_name['BipartiteMatcher'] = _BIPARTITEMATCHER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -BipartiteMatcher = _reflection.GeneratedProtocolMessageType('BipartiteMatcher', (_message.Message,), dict( - DESCRIPTOR = _BIPARTITEMATCHER, - __module__ = 'object_detection.protos.bipartite_matcher_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BipartiteMatcher) - )) -_sym_db.RegisterMessage(BipartiteMatcher) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/box_coder.proto b/object_detection/protos/box_coder.proto deleted file mode 100644 index 79b81812..00000000 --- a/object_detection/protos/box_coder.proto +++ /dev/null @@ -1,19 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/faster_rcnn_box_coder.proto"; -import "object_detection/protos/keypoint_box_coder.proto"; -import "object_detection/protos/mean_stddev_box_coder.proto"; -import "object_detection/protos/square_box_coder.proto"; - -// Configuration proto for the box coder to be used in the object detection -// pipeline. See core/box_coder.py for details. -message BoxCoder { - oneof box_coder_oneof { - FasterRcnnBoxCoder faster_rcnn_box_coder = 1; - MeanStddevBoxCoder mean_stddev_box_coder = 2; - SquareBoxCoder square_box_coder = 3; - KeypointBoxCoder keypoint_box_coder = 4; - } -} diff --git a/object_detection/protos/box_coder_pb2.py b/object_detection/protos/box_coder_pb2.py deleted file mode 100644 index 6e91d095..00000000 --- a/object_detection/protos/box_coder_pb2.py +++ /dev/null @@ -1,114 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import faster_rcnn_box_coder_pb2 as object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2 -from object_detection.protos import keypoint_box_coder_pb2 as object__detection_dot_protos_dot_keypoint__box__coder__pb2 -from object_detection.protos import mean_stddev_box_coder_pb2 as object__detection_dot_protos_dot_mean__stddev__box__coder__pb2 -from object_detection.protos import square_box_coder_pb2 as object__detection_dot_protos_dot_square__box__coder__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n\'object_detection/protos/box_coder.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/faster_rcnn_box_coder.proto\x1a\x30object_detection/protos/keypoint_box_coder.proto\x1a\x33object_detection/protos/mean_stddev_box_coder.proto\x1a.object_detection/protos/square_box_coder.proto\"\xc7\x02\n\x08\x42oxCoder\x12L\n\x15\x66\x61ster_rcnn_box_coder\x18\x01 \x01(\x0b\x32+.object_detection.protos.FasterRcnnBoxCoderH\x00\x12L\n\x15mean_stddev_box_coder\x18\x02 \x01(\x0b\x32+.object_detection.protos.MeanStddevBoxCoderH\x00\x12\x43\n\x10square_box_coder\x18\x03 \x01(\x0b\x32\'.object_detection.protos.SquareBoxCoderH\x00\x12G\n\x12keypoint_box_coder\x18\x04 \x01(\x0b\x32).object_detection.protos.KeypointBoxCoderH\x00\x42\x11\n\x0f\x62ox_coder_oneof') - , - dependencies=[object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_keypoint__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_mean__stddev__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_square__box__coder__pb2.DESCRIPTOR,]) - - - - -_BOXCODER = _descriptor.Descriptor( - name='BoxCoder', - full_name='object_detection.protos.BoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='faster_rcnn_box_coder', full_name='object_detection.protos.BoxCoder.faster_rcnn_box_coder', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='mean_stddev_box_coder', full_name='object_detection.protos.BoxCoder.mean_stddev_box_coder', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='square_box_coder', full_name='object_detection.protos.BoxCoder.square_box_coder', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='keypoint_box_coder', full_name='object_detection.protos.BoxCoder.keypoint_box_coder', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='box_coder_oneof', full_name='object_detection.protos.BoxCoder.box_coder_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=273, - serialized_end=600, -) - -_BOXCODER.fields_by_name['faster_rcnn_box_coder'].message_type = object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2._FASTERRCNNBOXCODER -_BOXCODER.fields_by_name['mean_stddev_box_coder'].message_type = object__detection_dot_protos_dot_mean__stddev__box__coder__pb2._MEANSTDDEVBOXCODER -_BOXCODER.fields_by_name['square_box_coder'].message_type = object__detection_dot_protos_dot_square__box__coder__pb2._SQUAREBOXCODER -_BOXCODER.fields_by_name['keypoint_box_coder'].message_type = object__detection_dot_protos_dot_keypoint__box__coder__pb2._KEYPOINTBOXCODER -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['faster_rcnn_box_coder']) -_BOXCODER.fields_by_name['faster_rcnn_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['mean_stddev_box_coder']) -_BOXCODER.fields_by_name['mean_stddev_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['square_box_coder']) -_BOXCODER.fields_by_name['square_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append( - _BOXCODER.fields_by_name['keypoint_box_coder']) -_BOXCODER.fields_by_name['keypoint_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof'] -DESCRIPTOR.message_types_by_name['BoxCoder'] = _BOXCODER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -BoxCoder = _reflection.GeneratedProtocolMessageType('BoxCoder', (_message.Message,), dict( - DESCRIPTOR = _BOXCODER, - __module__ = 'object_detection.protos.box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BoxCoder) - )) -_sym_db.RegisterMessage(BoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/box_predictor.proto b/object_detection/protos/box_predictor.proto deleted file mode 100644 index 4aa445cc..00000000 --- a/object_detection/protos/box_predictor.proto +++ /dev/null @@ -1,101 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/hyperparams.proto"; - - -// Configuration proto for box predictor. See core/box_predictor.py for details. -message BoxPredictor { - oneof box_predictor_oneof { - ConvolutionalBoxPredictor convolutional_box_predictor = 1; - MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2; - RfcnBoxPredictor rfcn_box_predictor = 3; - } -} - -// Configuration proto for Convolutional box predictor. -message ConvolutionalBoxPredictor { - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 1; - - // Minumum feature depth prior to predicting box encodings and class - // predictions. - optional int32 min_depth = 2 [default = 0]; - - // Maximum feature depth prior to predicting box encodings and class - // predictions. If max_depth is set to 0, no additional feature map will be - // inserted before location and class predictions. - optional int32 max_depth = 3 [default = 0]; - - // Number of the additional conv layers before the predictor. - optional int32 num_layers_before_predictor = 4 [default = 0]; - - // Whether to use dropout for class prediction. - optional bool use_dropout = 5 [default = true]; - - // Keep probability for dropout - optional float dropout_keep_probability = 6 [default = 0.8]; - - // Size of final convolution kernel. If the spatial resolution of the feature - // map is smaller than the kernel size, then the kernel size is set to - // min(feature_width, feature_height). - optional int32 kernel_size = 7 [default = 1]; - - // Size of the encoding for boxes. - optional int32 box_code_size = 8 [default = 4]; - - // Whether to apply sigmoid to the output of class predictions. - // TODO: Do we need this since we have a post processing module.? - optional bool apply_sigmoid_to_scores = 9 [default = false]; - - optional float class_prediction_bias_init = 10 [default = 0.0]; -} - -message MaskRCNNBoxPredictor { - // Hyperparameters for fully connected ops used in the box predictor. - optional Hyperparams fc_hyperparams = 1; - - // Whether to use dropout op prior to the both box and class predictions. - optional bool use_dropout = 2 [default= false]; - - // Keep probability for dropout. This is only used if use_dropout is true. - optional float dropout_keep_probability = 3 [default = 0.5]; - - // Size of the encoding for the boxes. - optional int32 box_code_size = 4 [default = 4]; - - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 5; - - // Whether to predict instance masks inside detection boxes. - optional bool predict_instance_masks = 6 [default = false]; - - // The depth for the first conv2d_transpose op applied to the - // image_features in the mask prediciton branch - optional int32 mask_prediction_conv_depth = 7 [default = 256]; - - // Whether to predict keypoints inside detection boxes. - optional bool predict_keypoints = 8 [default = false]; -} - -message RfcnBoxPredictor { - // Hyperparameters for convolution ops used in the box predictor. - optional Hyperparams conv_hyperparams = 1; - - // Bin sizes for RFCN crops. - optional int32 num_spatial_bins_height = 2 [default = 3]; - - optional int32 num_spatial_bins_width = 3 [default = 3]; - - // Target depth to reduce the input image features to. - optional int32 depth = 4 [default=1024]; - - // Size of the encoding for the boxes. - optional int32 box_code_size = 5 [default = 4]; - - // Size to resize the rfcn crops to. - optional int32 crop_height = 6 [default= 12]; - - optional int32 crop_width = 7 [default=12]; -} diff --git a/object_detection/protos/box_predictor_pb2.py b/object_detection/protos/box_predictor_pb2.py deleted file mode 100644 index 79525e47..00000000 --- a/object_detection/protos/box_predictor_pb2.py +++ /dev/null @@ -1,375 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/box_predictor.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/box_predictor.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n+object_detection/protos/box_predictor.proto\x12\x17object_detection.protos\x1a)object_detection/protos/hyperparams.proto\"\x9b\x02\n\x0c\x42oxPredictor\x12Y\n\x1b\x63onvolutional_box_predictor\x18\x01 \x01(\x0b\x32\x32.object_detection.protos.ConvolutionalBoxPredictorH\x00\x12P\n\x17mask_rcnn_box_predictor\x18\x02 \x01(\x0b\x32-.object_detection.protos.MaskRCNNBoxPredictorH\x00\x12G\n\x12rfcn_box_predictor\x18\x03 \x01(\x0b\x32).object_detection.protos.RfcnBoxPredictorH\x00\x42\x15\n\x13\x62ox_predictor_oneof\"\xf2\x02\n\x19\x43onvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x14\n\tmin_depth\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\tmax_depth\x18\x03 \x01(\x05:\x01\x30\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x19\n\x0buse_dropout\x18\x05 \x01(\x08:\x04true\x12%\n\x18\x64ropout_keep_probability\x18\x06 \x01(\x02:\x03\x30.8\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x31\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12&\n\x17\x61pply_sigmoid_to_scores\x18\t \x01(\x08:\x05\x66\x61lse\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\"\xe3\x02\n\x14MaskRCNNBoxPredictor\x12<\n\x0e\x66\x63_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0buse_dropout\x18\x02 \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x03 \x01(\x02:\x03\x30.5\x12\x18\n\rbox_code_size\x18\x04 \x01(\x05:\x01\x34\x12>\n\x10\x63onv_hyperparams\x18\x05 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12%\n\x16predict_instance_masks\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\'\n\x1amask_prediction_conv_depth\x18\x07 \x01(\x05:\x03\x32\x35\x36\x12 \n\x11predict_keypoints\x18\x08 \x01(\x08:\x05\x66\x61lse\"\xf9\x01\n\x10RfcnBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\"\n\x17num_spatial_bins_height\x18\x02 \x01(\x05:\x01\x33\x12!\n\x16num_spatial_bins_width\x18\x03 \x01(\x05:\x01\x33\x12\x13\n\x05\x64\x65pth\x18\x04 \x01(\x05:\x04\x31\x30\x32\x34\x12\x18\n\rbox_code_size\x18\x05 \x01(\x05:\x01\x34\x12\x17\n\x0b\x63rop_height\x18\x06 \x01(\x05:\x02\x31\x32\x12\x16\n\ncrop_width\x18\x07 \x01(\x05:\x02\x31\x32') - , - dependencies=[object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,]) - - - - -_BOXPREDICTOR = _descriptor.Descriptor( - name='BoxPredictor', - full_name='object_detection.protos.BoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.convolutional_box_predictor', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='mask_rcnn_box_predictor', full_name='object_detection.protos.BoxPredictor.mask_rcnn_box_predictor', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='rfcn_box_predictor', full_name='object_detection.protos.BoxPredictor.rfcn_box_predictor', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='box_predictor_oneof', full_name='object_detection.protos.BoxPredictor.box_predictor_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=116, - serialized_end=399, -) - - -_CONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor( - name='ConvolutionalBoxPredictor', - full_name='object_detection.protos.ConvolutionalBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.ConvolutionalBoxPredictor.conv_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.min_depth', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.max_depth', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_layers_before_predictor', full_name='object_detection.protos.ConvolutionalBoxPredictor.num_layers_before_predictor', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='use_dropout', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_dropout', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='dropout_keep_probability', full_name='object_detection.protos.ConvolutionalBoxPredictor.dropout_keep_probability', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.8), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='kernel_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.kernel_size', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.box_code_size', index=7, - number=8, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='apply_sigmoid_to_scores', full_name='object_detection.protos.ConvolutionalBoxPredictor.apply_sigmoid_to_scores', index=8, - number=9, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='class_prediction_bias_init', full_name='object_detection.protos.ConvolutionalBoxPredictor.class_prediction_bias_init', index=9, - number=10, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=402, - serialized_end=772, -) - - -_MASKRCNNBOXPREDICTOR = _descriptor.Descriptor( - name='MaskRCNNBoxPredictor', - full_name='object_detection.protos.MaskRCNNBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='fc_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.fc_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='use_dropout', full_name='object_detection.protos.MaskRCNNBoxPredictor.use_dropout', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='dropout_keep_probability', full_name='object_detection.protos.MaskRCNNBoxPredictor.dropout_keep_probability', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.MaskRCNNBoxPredictor.box_code_size', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.conv_hyperparams', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='predict_instance_masks', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_instance_masks', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='mask_prediction_conv_depth', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_conv_depth', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=256, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='predict_keypoints', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_keypoints', index=7, - number=8, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=775, - serialized_end=1130, -) - - -_RFCNBOXPREDICTOR = _descriptor.Descriptor( - name='RfcnBoxPredictor', - full_name='object_detection.protos.RfcnBoxPredictor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.RfcnBoxPredictor.conv_hyperparams', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_spatial_bins_height', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_height', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_spatial_bins_width', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_width', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='depth', full_name='object_detection.protos.RfcnBoxPredictor.depth', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1024, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='box_code_size', full_name='object_detection.protos.RfcnBoxPredictor.box_code_size', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='crop_height', full_name='object_detection.protos.RfcnBoxPredictor.crop_height', index=5, - number=6, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=12, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='crop_width', full_name='object_detection.protos.RfcnBoxPredictor.crop_width', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=12, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1133, - serialized_end=1382, -) - -_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].message_type = _CONVOLUTIONALBOXPREDICTOR -_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].message_type = _MASKRCNNBOXPREDICTOR -_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].message_type = _RFCNBOXPREDICTOR -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['convolutional_box_predictor']) -_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor']) -_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append( - _BOXPREDICTOR.fields_by_name['rfcn_box_predictor']) -_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'] -_CONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_MASKRCNNBOXPREDICTOR.fields_by_name['fc_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_MASKRCNNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_RFCNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -DESCRIPTOR.message_types_by_name['BoxPredictor'] = _BOXPREDICTOR -DESCRIPTOR.message_types_by_name['ConvolutionalBoxPredictor'] = _CONVOLUTIONALBOXPREDICTOR -DESCRIPTOR.message_types_by_name['MaskRCNNBoxPredictor'] = _MASKRCNNBOXPREDICTOR -DESCRIPTOR.message_types_by_name['RfcnBoxPredictor'] = _RFCNBOXPREDICTOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -BoxPredictor = _reflection.GeneratedProtocolMessageType('BoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _BOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BoxPredictor) - )) -_sym_db.RegisterMessage(BoxPredictor) - -ConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('ConvolutionalBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _CONVOLUTIONALBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ConvolutionalBoxPredictor) - )) -_sym_db.RegisterMessage(ConvolutionalBoxPredictor) - -MaskRCNNBoxPredictor = _reflection.GeneratedProtocolMessageType('MaskRCNNBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _MASKRCNNBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.MaskRCNNBoxPredictor) - )) -_sym_db.RegisterMessage(MaskRCNNBoxPredictor) - -RfcnBoxPredictor = _reflection.GeneratedProtocolMessageType('RfcnBoxPredictor', (_message.Message,), dict( - DESCRIPTOR = _RFCNBOXPREDICTOR, - __module__ = 'object_detection.protos.box_predictor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RfcnBoxPredictor) - )) -_sym_db.RegisterMessage(RfcnBoxPredictor) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/eval.proto b/object_detection/protos/eval.proto deleted file mode 100644 index c5a30ec6..00000000 --- a/object_detection/protos/eval.proto +++ /dev/null @@ -1,47 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Message for configuring DetectionModel evaluation jobs (eval.py). -message EvalConfig { - // Number of visualization images to generate. - optional uint32 num_visualizations = 1 [default=10]; - - // Number of examples to process of evaluation. - optional uint32 num_examples = 2 [default=5000]; - - // How often to run evaluation. - optional uint32 eval_interval_secs = 3 [default=300]; - - // Maximum number of times to run evaluation. If set to 0, will run forever. - optional uint32 max_evals = 4 [default=0]; - - // Whether the TensorFlow graph used for evaluation should be saved to disk. - optional bool save_graph = 5 [default=false]; - - // Path to directory to store visualizations in. If empty, visualization - // images are not exported (only shown on Tensorboard). - optional string visualization_export_dir = 6 [default=""]; - - // BNS name of the TensorFlow master. - optional string eval_master = 7 [default=""]; - - // Type of metrics to use for evaluation. Currently supports only Pascal VOC - // detection metrics. - optional string metrics_set = 8 [default="pascal_voc_metrics"]; - - // Path to export detections to COCO compatible JSON format. - optional string export_path = 9 [default='']; - - // Option to not read groundtruth labels and only export detections to - // COCO-compatible JSON file. - optional bool ignore_groundtruth = 10 [default=false]; - - // Use exponential moving averages of variables for evaluation. - optional bool use_moving_averages = 11 [default=false]; - - // Whether to evaluate instance masks. - // Note that since there is no evaluation code currently for instance - // segmenation this option is unused. - optional bool eval_instance_masks = 12 [default=false]; -} diff --git a/object_detection/protos/eval_pb2.py b/object_detection/protos/eval_pb2.py deleted file mode 100644 index 9c33244a..00000000 --- a/object_detection/protos/eval_pb2.py +++ /dev/null @@ -1,146 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/eval.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/eval.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n\"object_detection/protos/eval.proto\x12\x17object_detection.protos\"\x80\x03\n\nEvalConfig\x12\x1e\n\x12num_visualizations\x18\x01 \x01(\r:\x02\x31\x30\x12\x1a\n\x0cnum_examples\x18\x02 \x01(\r:\x04\x35\x30\x30\x30\x12\x1f\n\x12\x65val_interval_secs\x18\x03 \x01(\r:\x03\x33\x30\x30\x12\x14\n\tmax_evals\x18\x04 \x01(\r:\x01\x30\x12\x19\n\nsave_graph\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\"\n\x18visualization_export_dir\x18\x06 \x01(\t:\x00\x12\x15\n\x0b\x65val_master\x18\x07 \x01(\t:\x00\x12\'\n\x0bmetrics_set\x18\x08 \x01(\t:\x12pascal_voc_metrics\x12\x15\n\x0b\x65xport_path\x18\t \x01(\t:\x00\x12!\n\x12ignore_groundtruth\x18\n \x01(\x08:\x05\x66\x61lse\x12\"\n\x13use_moving_averages\x18\x0b \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x65val_instance_masks\x18\x0c \x01(\x08:\x05\x66\x61lse') -) - - - - -_EVALCONFIG = _descriptor.Descriptor( - name='EvalConfig', - full_name='object_detection.protos.EvalConfig', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_visualizations', full_name='object_detection.protos.EvalConfig.num_visualizations', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=10, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_examples', full_name='object_detection.protos.EvalConfig.num_examples', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=5000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='eval_interval_secs', full_name='object_detection.protos.EvalConfig.eval_interval_secs', index=2, - number=3, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=300, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_evals', full_name='object_detection.protos.EvalConfig.max_evals', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='save_graph', full_name='object_detection.protos.EvalConfig.save_graph', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='visualization_export_dir', full_name='object_detection.protos.EvalConfig.visualization_export_dir', index=5, - number=6, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='eval_master', full_name='object_detection.protos.EvalConfig.eval_master', index=6, - number=7, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='metrics_set', full_name='object_detection.protos.EvalConfig.metrics_set', index=7, - number=8, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("pascal_voc_metrics").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='export_path', full_name='object_detection.protos.EvalConfig.export_path', index=8, - number=9, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ignore_groundtruth', full_name='object_detection.protos.EvalConfig.ignore_groundtruth', index=9, - number=10, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='use_moving_averages', full_name='object_detection.protos.EvalConfig.use_moving_averages', index=10, - number=11, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='eval_instance_masks', full_name='object_detection.protos.EvalConfig.eval_instance_masks', index=11, - number=12, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=64, - serialized_end=448, -) - -DESCRIPTOR.message_types_by_name['EvalConfig'] = _EVALCONFIG -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -EvalConfig = _reflection.GeneratedProtocolMessageType('EvalConfig', (_message.Message,), dict( - DESCRIPTOR = _EVALCONFIG, - __module__ = 'object_detection.protos.eval_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.EvalConfig) - )) -_sym_db.RegisterMessage(EvalConfig) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/faster_rcnn.proto b/object_detection/protos/faster_rcnn.proto deleted file mode 100644 index 20c859e2..00000000 --- a/object_detection/protos/faster_rcnn.proto +++ /dev/null @@ -1,149 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/anchor_generator.proto"; -import "object_detection/protos/box_predictor.proto"; -import "object_detection/protos/hyperparams.proto"; -import "object_detection/protos/image_resizer.proto"; -import "object_detection/protos/losses.proto"; -import "object_detection/protos/post_processing.proto"; - -// Configuration for Faster R-CNN models. -// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py -// -// Naming conventions: -// Faster R-CNN models have two stages: a first stage region proposal network -// (or RPN) and a second stage box classifier. We thus use the prefixes -// `first_stage_` and `second_stage_` to indicate the stage to which each -// parameter pertains when relevant. -message FasterRcnn { - - // Whether to construct only the Region Proposal Network (RPN). - optional bool first_stage_only = 1 [default=false]; - - // Number of classes to predict. - optional int32 num_classes = 3; - - // Image resizer for preprocessing the input image. - optional ImageResizer image_resizer = 4; - - // Feature extractor config. - optional FasterRcnnFeatureExtractor feature_extractor = 5; - - - // (First stage) region proposal network (RPN) parameters. - - // Anchor generator to compute RPN anchors. - optional AnchorGenerator first_stage_anchor_generator = 6; - - // Atrous rate for the convolution op applied to the - // `first_stage_features_to_crop` tensor to obtain box predictions. - optional int32 first_stage_atrous_rate = 7 [default=1]; - - // Hyperparameters for the convolutional RPN box predictor. - optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8; - - // Kernel size to use for the convolution op just prior to RPN box - // predictions. - optional int32 first_stage_box_predictor_kernel_size = 9 [default=3]; - - // Output depth for the convolution op just prior to RPN box predictions. - optional int32 first_stage_box_predictor_depth = 10 [default=512]; - - // The batch size to use for computing the first stage objectness and - // location losses. - optional int32 first_stage_minibatch_size = 11 [default=256]; - - // Fraction of positive examples per image for the RPN. - optional float first_stage_positive_balance_fraction = 12 [default=0.5]; - - // Non max suppression score threshold applied to first stage RPN proposals. - optional float first_stage_nms_score_threshold = 13 [default=0.0]; - - // Non max suppression IOU threshold applied to first stage RPN proposals. - optional float first_stage_nms_iou_threshold = 14 [default=0.7]; - - // Maximum number of RPN proposals retained after first stage postprocessing. - optional int32 first_stage_max_proposals = 15 [default=300]; - - // First stage RPN localization loss weight. - optional float first_stage_localization_loss_weight = 16 [default=1.0]; - - // First stage RPN objectness loss weight. - optional float first_stage_objectness_loss_weight = 17 [default=1.0]; - - - // Per-region cropping parameters. - // Note that if a R-FCN model is constructed the per region cropping - // parameters below are ignored. - - // Output size (width and height are set to be the same) of the initial - // bilinear interpolation based cropping during ROI pooling. - optional int32 initial_crop_size = 18; - - // Kernel size of the max pool op on the cropped feature map during - // ROI pooling. - optional int32 maxpool_kernel_size = 19; - - // Stride of the max pool op on the cropped feature map during ROI pooling. - optional int32 maxpool_stride = 20; - - - // (Second stage) box classifier parameters - - // Hyperparameters for the second stage box predictor. If box predictor type - // is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a - // Faster R-CNN model is constructed. - optional BoxPredictor second_stage_box_predictor = 21; - - // The batch size per image used for computing the classification and refined - // location loss of the box classifier. - // Note that this field is ignored if `hard_example_miner` is configured. - optional int32 second_stage_batch_size = 22 [default=64]; - - // Fraction of positive examples to use per image for the box classifier. - optional float second_stage_balance_fraction = 23 [default=0.25]; - - // Post processing to apply on the second stage box classifier predictions. - // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor - // is taken from this `second_stage_post_processing` proto. - optional PostProcessing second_stage_post_processing = 24; - - // Second stage refined localization loss weight. - optional float second_stage_localization_loss_weight = 25 [default=1.0]; - - // Second stage classification loss weight - optional float second_stage_classification_loss_weight = 26 [default=1.0]; - - // Second stage instance mask loss weight. Note that this is only applicable - // when `MaskRCNNBoxPredictor` is selected for second stage and configured to - // predict instance masks. - optional float second_stage_mask_prediction_loss_weight = 27 [default=1.0]; - - // If not left to default, applies hard example mining only to classification - // and localization loss.. - optional HardExampleMiner hard_example_miner = 28; - - // Loss for second stage box classifers, supports Softmax and Sigmoid. - // Note that score converter must be consistent with loss type. - // When there are multiple labels assigned to the same boxes, recommend - // to use sigmoid loss and enable merge_multiple_label_boxes. - // If not specified, Softmax loss is used as default. - optional ClassificationLoss second_stage_classification_loss = 29; -} - - -message FasterRcnnFeatureExtractor { - // Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101'; - // See builders/model_builder.py for expected types). - optional string type = 1; - - // Output stride of extracted RPN feature map. - optional int32 first_stage_features_stride = 2 [default=16]; - - // Whether to update batch norm parameters during training or not. - // When training with a relative large batch size (e.g. 8), it could be - // desirable to enable batch norm update. - optional bool batch_norm_trainable = 3 [default=false]; -} diff --git a/object_detection/protos/faster_rcnn_box_coder.proto b/object_detection/protos/faster_rcnn_box_coder.proto deleted file mode 100644 index 512a20a1..00000000 --- a/object_detection/protos/faster_rcnn_box_coder.proto +++ /dev/null @@ -1,17 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for FasterRCNNBoxCoder. See -// box_coders/faster_rcnn_box_coder.py for details. -message FasterRcnnBoxCoder { - // Scale factor for anchor encoded box center. - optional float y_scale = 1 [default = 10.0]; - optional float x_scale = 2 [default = 10.0]; - - // Scale factor for anchor encoded box height. - optional float height_scale = 3 [default = 5.0]; - - // Scale factor for anchor encoded box width. - optional float width_scale = 4 [default = 5.0]; -} diff --git a/object_detection/protos/faster_rcnn_box_coder_pb2.py b/object_detection/protos/faster_rcnn_box_coder_pb2.py deleted file mode 100644 index 6faee77d..00000000 --- a/object_detection/protos/faster_rcnn_box_coder_pb2.py +++ /dev/null @@ -1,90 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/faster_rcnn_box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/faster_rcnn_box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n3object_detection/protos/faster_rcnn_box_coder.proto\x12\x17object_detection.protos\"o\n\x12\x46\x61sterRcnnBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0cheight_scale\x18\x03 \x01(\x02:\x01\x35\x12\x16\n\x0bwidth_scale\x18\x04 \x01(\x02:\x01\x35') -) - - - - -_FASTERRCNNBOXCODER = _descriptor.Descriptor( - name='FasterRcnnBoxCoder', - full_name='object_detection.protos.FasterRcnnBoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='y_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.y_scale', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='x_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.x_scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.height_scale', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.width_scale', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=80, - serialized_end=191, -) - -DESCRIPTOR.message_types_by_name['FasterRcnnBoxCoder'] = _FASTERRCNNBOXCODER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -FasterRcnnBoxCoder = _reflection.GeneratedProtocolMessageType('FasterRcnnBoxCoder', (_message.Message,), dict( - DESCRIPTOR = _FASTERRCNNBOXCODER, - __module__ = 'object_detection.protos.faster_rcnn_box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnnBoxCoder) - )) -_sym_db.RegisterMessage(FasterRcnnBoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/faster_rcnn_pb2.py b/object_detection/protos/faster_rcnn_pb2.py deleted file mode 100644 index 2e98f9f9..00000000 --- a/object_detection/protos/faster_rcnn_pb2.py +++ /dev/null @@ -1,326 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/faster_rcnn.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import anchor_generator_pb2 as object__detection_dot_protos_dot_anchor__generator__pb2 -from object_detection.protos import box_predictor_pb2 as object__detection_dot_protos_dot_box__predictor__pb2 -from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2 -from object_detection.protos import image_resizer_pb2 as object__detection_dot_protos_dot_image__resizer__pb2 -from object_detection.protos import losses_pb2 as object__detection_dot_protos_dot_losses__pb2 -from object_detection.protos import post_processing_pb2 as object__detection_dot_protos_dot_post__processing__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/faster_rcnn.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n)object_detection/protos/faster_rcnn.proto\x12\x17object_detection.protos\x1a.object_detection/protos/anchor_generator.proto\x1a+object_detection/protos/box_predictor.proto\x1a)object_detection/protos/hyperparams.proto\x1a+object_detection/protos/image_resizer.proto\x1a$object_detection/protos/losses.proto\x1a-object_detection/protos/post_processing.proto\"\xb0\x0b\n\nFasterRcnn\x12\x1f\n\x10\x66irst_stage_only\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x0bnum_classes\x18\x03 \x01(\x05\x12<\n\rimage_resizer\x18\x04 \x01(\x0b\x32%.object_detection.protos.ImageResizer\x12N\n\x11\x66\x65\x61ture_extractor\x18\x05 \x01(\x0b\x32\x33.object_detection.protos.FasterRcnnFeatureExtractor\x12N\n\x1c\x66irst_stage_anchor_generator\x18\x06 \x01(\x0b\x32(.object_detection.protos.AnchorGenerator\x12\"\n\x17\x66irst_stage_atrous_rate\x18\x07 \x01(\x05:\x01\x31\x12X\n*first_stage_box_predictor_conv_hyperparams\x18\x08 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x30\n%first_stage_box_predictor_kernel_size\x18\t \x01(\x05:\x01\x33\x12,\n\x1f\x66irst_stage_box_predictor_depth\x18\n \x01(\x05:\x03\x35\x31\x32\x12\'\n\x1a\x66irst_stage_minibatch_size\x18\x0b \x01(\x05:\x03\x32\x35\x36\x12\x32\n%first_stage_positive_balance_fraction\x18\x0c \x01(\x02:\x03\x30.5\x12*\n\x1f\x66irst_stage_nms_score_threshold\x18\r \x01(\x02:\x01\x30\x12*\n\x1d\x66irst_stage_nms_iou_threshold\x18\x0e \x01(\x02:\x03\x30.7\x12&\n\x19\x66irst_stage_max_proposals\x18\x0f \x01(\x05:\x03\x33\x30\x30\x12/\n$first_stage_localization_loss_weight\x18\x10 \x01(\x02:\x01\x31\x12-\n\"first_stage_objectness_loss_weight\x18\x11 \x01(\x02:\x01\x31\x12\x19\n\x11initial_crop_size\x18\x12 \x01(\x05\x12\x1b\n\x13maxpool_kernel_size\x18\x13 \x01(\x05\x12\x16\n\x0emaxpool_stride\x18\x14 \x01(\x05\x12I\n\x1asecond_stage_box_predictor\x18\x15 \x01(\x0b\x32%.object_detection.protos.BoxPredictor\x12#\n\x17second_stage_batch_size\x18\x16 \x01(\x05:\x02\x36\x34\x12+\n\x1dsecond_stage_balance_fraction\x18\x17 \x01(\x02:\x04\x30.25\x12M\n\x1csecond_stage_post_processing\x18\x18 \x01(\x0b\x32\'.object_detection.protos.PostProcessing\x12\x30\n%second_stage_localization_loss_weight\x18\x19 \x01(\x02:\x01\x31\x12\x32\n\'second_stage_classification_loss_weight\x18\x1a \x01(\x02:\x01\x31\x12\x33\n(second_stage_mask_prediction_loss_weight\x18\x1b \x01(\x02:\x01\x31\x12\x45\n\x12hard_example_miner\x18\x1c \x01(\x0b\x32).object_detection.protos.HardExampleMiner\x12U\n second_stage_classification_loss\x18\x1d \x01(\x0b\x32+.object_detection.protos.ClassificationLoss\"x\n\x1a\x46\x61sterRcnnFeatureExtractor\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\'\n\x1b\x66irst_stage_features_stride\x18\x02 \x01(\x05:\x02\x31\x36\x12#\n\x14\x62\x61tch_norm_trainable\x18\x03 \x01(\x08:\x05\x66\x61lse') - , - dependencies=[object__detection_dot_protos_dot_anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__predictor__pb2.DESCRIPTOR,object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,object__detection_dot_protos_dot_image__resizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_losses__pb2.DESCRIPTOR,object__detection_dot_protos_dot_post__processing__pb2.DESCRIPTOR,]) - - - - -_FASTERRCNN = _descriptor.Descriptor( - name='FasterRcnn', - full_name='object_detection.protos.FasterRcnn', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='first_stage_only', full_name='object_detection.protos.FasterRcnn.first_stage_only', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_classes', full_name='object_detection.protos.FasterRcnn.num_classes', index=1, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='image_resizer', full_name='object_detection.protos.FasterRcnn.image_resizer', index=2, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='feature_extractor', full_name='object_detection.protos.FasterRcnn.feature_extractor', index=3, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_anchor_generator', full_name='object_detection.protos.FasterRcnn.first_stage_anchor_generator', index=4, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_atrous_rate', full_name='object_detection.protos.FasterRcnn.first_stage_atrous_rate', index=5, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_box_predictor_conv_hyperparams', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_conv_hyperparams', index=6, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_box_predictor_kernel_size', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_kernel_size', index=7, - number=9, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_box_predictor_depth', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_depth', index=8, - number=10, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=512, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_minibatch_size', full_name='object_detection.protos.FasterRcnn.first_stage_minibatch_size', index=9, - number=11, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=256, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_positive_balance_fraction', full_name='object_detection.protos.FasterRcnn.first_stage_positive_balance_fraction', index=10, - number=12, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_nms_score_threshold', full_name='object_detection.protos.FasterRcnn.first_stage_nms_score_threshold', index=11, - number=13, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_nms_iou_threshold', full_name='object_detection.protos.FasterRcnn.first_stage_nms_iou_threshold', index=12, - number=14, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.7), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_max_proposals', full_name='object_detection.protos.FasterRcnn.first_stage_max_proposals', index=13, - number=15, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=300, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_localization_loss_weight', full_name='object_detection.protos.FasterRcnn.first_stage_localization_loss_weight', index=14, - number=16, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_objectness_loss_weight', full_name='object_detection.protos.FasterRcnn.first_stage_objectness_loss_weight', index=15, - number=17, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='initial_crop_size', full_name='object_detection.protos.FasterRcnn.initial_crop_size', index=16, - number=18, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='maxpool_kernel_size', full_name='object_detection.protos.FasterRcnn.maxpool_kernel_size', index=17, - number=19, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='maxpool_stride', full_name='object_detection.protos.FasterRcnn.maxpool_stride', index=18, - number=20, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_box_predictor', full_name='object_detection.protos.FasterRcnn.second_stage_box_predictor', index=19, - number=21, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_batch_size', full_name='object_detection.protos.FasterRcnn.second_stage_batch_size', index=20, - number=22, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=64, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_balance_fraction', full_name='object_detection.protos.FasterRcnn.second_stage_balance_fraction', index=21, - number=23, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.25), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_post_processing', full_name='object_detection.protos.FasterRcnn.second_stage_post_processing', index=22, - number=24, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_localization_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_localization_loss_weight', index=23, - number=25, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_classification_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_classification_loss_weight', index=24, - number=26, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_mask_prediction_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_mask_prediction_loss_weight', index=25, - number=27, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='hard_example_miner', full_name='object_detection.protos.FasterRcnn.hard_example_miner', index=26, - number=28, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='second_stage_classification_loss', full_name='object_detection.protos.FasterRcnn.second_stage_classification_loss', index=27, - number=29, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=337, - serialized_end=1793, -) - - -_FASTERRCNNFEATUREEXTRACTOR = _descriptor.Descriptor( - name='FasterRcnnFeatureExtractor', - full_name='object_detection.protos.FasterRcnnFeatureExtractor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='type', full_name='object_detection.protos.FasterRcnnFeatureExtractor.type', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='first_stage_features_stride', full_name='object_detection.protos.FasterRcnnFeatureExtractor.first_stage_features_stride', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='batch_norm_trainable', full_name='object_detection.protos.FasterRcnnFeatureExtractor.batch_norm_trainable', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1795, - serialized_end=1915, -) - -_FASTERRCNN.fields_by_name['image_resizer'].message_type = object__detection_dot_protos_dot_image__resizer__pb2._IMAGERESIZER -_FASTERRCNN.fields_by_name['feature_extractor'].message_type = _FASTERRCNNFEATUREEXTRACTOR -_FASTERRCNN.fields_by_name['first_stage_anchor_generator'].message_type = object__detection_dot_protos_dot_anchor__generator__pb2._ANCHORGENERATOR -_FASTERRCNN.fields_by_name['first_stage_box_predictor_conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -_FASTERRCNN.fields_by_name['second_stage_box_predictor'].message_type = object__detection_dot_protos_dot_box__predictor__pb2._BOXPREDICTOR -_FASTERRCNN.fields_by_name['second_stage_post_processing'].message_type = object__detection_dot_protos_dot_post__processing__pb2._POSTPROCESSING -_FASTERRCNN.fields_by_name['hard_example_miner'].message_type = object__detection_dot_protos_dot_losses__pb2._HARDEXAMPLEMINER -_FASTERRCNN.fields_by_name['second_stage_classification_loss'].message_type = object__detection_dot_protos_dot_losses__pb2._CLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['FasterRcnn'] = _FASTERRCNN -DESCRIPTOR.message_types_by_name['FasterRcnnFeatureExtractor'] = _FASTERRCNNFEATUREEXTRACTOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -FasterRcnn = _reflection.GeneratedProtocolMessageType('FasterRcnn', (_message.Message,), dict( - DESCRIPTOR = _FASTERRCNN, - __module__ = 'object_detection.protos.faster_rcnn_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnn) - )) -_sym_db.RegisterMessage(FasterRcnn) - -FasterRcnnFeatureExtractor = _reflection.GeneratedProtocolMessageType('FasterRcnnFeatureExtractor', (_message.Message,), dict( - DESCRIPTOR = _FASTERRCNNFEATUREEXTRACTOR, - __module__ = 'object_detection.protos.faster_rcnn_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnnFeatureExtractor) - )) -_sym_db.RegisterMessage(FasterRcnnFeatureExtractor) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/grid_anchor_generator.proto b/object_detection/protos/grid_anchor_generator.proto deleted file mode 100644 index 85168f8f..00000000 --- a/object_detection/protos/grid_anchor_generator.proto +++ /dev/null @@ -1,34 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for GridAnchorGenerator. See -// anchor_generators/grid_anchor_generator.py for details. -message GridAnchorGenerator { - // Anchor height in pixels. - optional int32 height = 1 [default = 256]; - - // Anchor width in pixels. - optional int32 width = 2 [default = 256]; - - // Anchor stride in height dimension in pixels. - optional int32 height_stride = 3 [default = 16]; - - // Anchor stride in width dimension in pixels. - optional int32 width_stride = 4 [default = 16]; - - // Anchor height offset in pixels. - optional int32 height_offset = 5 [default = 0]; - - // Anchor width offset in pixels. - optional int32 width_offset = 6 [default = 0]; - - // At any given location, len(scales) * len(aspect_ratios) anchors are - // generated with all possible combinations of scales and aspect ratios. - - // List of scales for the anchors. - repeated float scales = 7; - - // List of aspect ratios for the anchors. - repeated float aspect_ratios = 8; -} diff --git a/object_detection/protos/grid_anchor_generator_pb2.py b/object_detection/protos/grid_anchor_generator_pb2.py deleted file mode 100644 index 83b6011c..00000000 --- a/object_detection/protos/grid_anchor_generator_pb2.py +++ /dev/null @@ -1,118 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/grid_anchor_generator.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/grid_anchor_generator.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n3object_detection/protos/grid_anchor_generator.proto\x12\x17object_detection.protos\"\xcd\x01\n\x13GridAnchorGenerator\x12\x13\n\x06height\x18\x01 \x01(\x05:\x03\x32\x35\x36\x12\x12\n\x05width\x18\x02 \x01(\x05:\x03\x32\x35\x36\x12\x19\n\rheight_stride\x18\x03 \x01(\x05:\x02\x31\x36\x12\x18\n\x0cwidth_stride\x18\x04 \x01(\x05:\x02\x31\x36\x12\x18\n\rheight_offset\x18\x05 \x01(\x05:\x01\x30\x12\x17\n\x0cwidth_offset\x18\x06 \x01(\x05:\x01\x30\x12\x0e\n\x06scales\x18\x07 \x03(\x02\x12\x15\n\raspect_ratios\x18\x08 \x03(\x02') -) - - - - -_GRIDANCHORGENERATOR = _descriptor.Descriptor( - name='GridAnchorGenerator', - full_name='object_detection.protos.GridAnchorGenerator', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='height', full_name='object_detection.protos.GridAnchorGenerator.height', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=256, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width', full_name='object_detection.protos.GridAnchorGenerator.width', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=256, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_stride', full_name='object_detection.protos.GridAnchorGenerator.height_stride', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_stride', full_name='object_detection.protos.GridAnchorGenerator.width_stride', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_offset', full_name='object_detection.protos.GridAnchorGenerator.height_offset', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_offset', full_name='object_detection.protos.GridAnchorGenerator.width_offset', index=5, - number=6, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='scales', full_name='object_detection.protos.GridAnchorGenerator.scales', index=6, - number=7, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='aspect_ratios', full_name='object_detection.protos.GridAnchorGenerator.aspect_ratios', index=7, - number=8, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=81, - serialized_end=286, -) - -DESCRIPTOR.message_types_by_name['GridAnchorGenerator'] = _GRIDANCHORGENERATOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -GridAnchorGenerator = _reflection.GeneratedProtocolMessageType('GridAnchorGenerator', (_message.Message,), dict( - DESCRIPTOR = _GRIDANCHORGENERATOR, - __module__ = 'object_detection.protos.grid_anchor_generator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.GridAnchorGenerator) - )) -_sym_db.RegisterMessage(GridAnchorGenerator) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/hyperparams.proto b/object_detection/protos/hyperparams.proto deleted file mode 100644 index b8b9972e..00000000 --- a/object_detection/protos/hyperparams.proto +++ /dev/null @@ -1,103 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for the convolution op hyperparameters to use in the -// object detection pipeline. -message Hyperparams { - - // Operations affected by hyperparameters. - enum Op { - // Convolution, Separable Convolution, Convolution transpose. - CONV = 1; - - // Fully connected - FC = 2; - } - optional Op op = 1 [default = CONV]; - - // Regularizer for the weights of the convolution op. - optional Regularizer regularizer = 2; - - // Initializer for the weights of the convolution op. - optional Initializer initializer = 3; - - // Type of activation to apply after convolution. - enum Activation { - // Use None (no activation) - NONE = 0; - - // Use tf.nn.relu - RELU = 1; - - // Use tf.nn.relu6 - RELU_6 = 2; - } - optional Activation activation = 4 [default = RELU]; - - // BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is - // not applied! - optional BatchNorm batch_norm = 5; -} - -// Proto with one-of field for regularizers. -message Regularizer { - oneof regularizer_oneof { - L1Regularizer l1_regularizer = 1; - L2Regularizer l2_regularizer = 2; - } -} - -// Configuration proto for L1 Regularizer. -// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer -message L1Regularizer { - optional float weight = 1 [default = 1.0]; -} - -// Configuration proto for L2 Regularizer. -// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer -message L2Regularizer { - optional float weight = 1 [default = 1.0]; -} - -// Proto with one-of field for initializers. -message Initializer { - oneof initializer_oneof { - TruncatedNormalInitializer truncated_normal_initializer = 1; - VarianceScalingInitializer variance_scaling_initializer = 2; - } -} - -// Configuration proto for truncated normal initializer. See -// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer -message TruncatedNormalInitializer { - optional float mean = 1 [default = 0.0]; - optional float stddev = 2 [default = 1.0]; -} - -// Configuration proto for variance scaling initializer. See -// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/ -// variance_scaling_initializer -message VarianceScalingInitializer { - optional float factor = 1 [default = 2.0]; - optional bool uniform = 2 [default = false]; - enum Mode { - FAN_IN = 0; - FAN_OUT = 1; - FAN_AVG = 2; - } - optional Mode mode = 3 [default = FAN_IN]; -} - -// Configuration proto for batch norm to apply after convolution op. See -// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm -message BatchNorm { - optional float decay = 1 [default = 0.999]; - optional bool center = 2 [default = true]; - optional bool scale = 3 [default = false]; - optional float epsilon = 4 [default = 0.001]; - // Whether to train the batch norm variables. If this is set to false during - // training, the current value of the batch_norm variables are used for - // forward pass but they are never updated. - optional bool train = 5 [default = true]; -} diff --git a/object_detection/protos/hyperparams_pb2.py b/object_detection/protos/hyperparams_pb2.py deleted file mode 100644 index 9aaaa032..00000000 --- a/object_detection/protos/hyperparams_pb2.py +++ /dev/null @@ -1,541 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/hyperparams.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/hyperparams.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n)object_detection/protos/hyperparams.proto\x12\x17object_detection.protos\"\x87\x03\n\x0bHyperparams\x12\x39\n\x02op\x18\x01 \x01(\x0e\x32\'.object_detection.protos.Hyperparams.Op:\x04\x43ONV\x12\x39\n\x0bregularizer\x18\x02 \x01(\x0b\x32$.object_detection.protos.Regularizer\x12\x39\n\x0binitializer\x18\x03 \x01(\x0b\x32$.object_detection.protos.Initializer\x12I\n\nactivation\x18\x04 \x01(\x0e\x32/.object_detection.protos.Hyperparams.Activation:\x04RELU\x12\x36\n\nbatch_norm\x18\x05 \x01(\x0b\x32\".object_detection.protos.BatchNorm\"\x16\n\x02Op\x12\x08\n\x04\x43ONV\x10\x01\x12\x06\n\x02\x46\x43\x10\x02\",\n\nActivation\x12\x08\n\x04NONE\x10\x00\x12\x08\n\x04RELU\x10\x01\x12\n\n\x06RELU_6\x10\x02\"\xa6\x01\n\x0bRegularizer\x12@\n\x0el1_regularizer\x18\x01 \x01(\x0b\x32&.object_detection.protos.L1RegularizerH\x00\x12@\n\x0el2_regularizer\x18\x02 \x01(\x0b\x32&.object_detection.protos.L2RegularizerH\x00\x42\x13\n\x11regularizer_oneof\"\"\n\rL1Regularizer\x12\x11\n\x06weight\x18\x01 \x01(\x02:\x01\x31\"\"\n\rL2Regularizer\x12\x11\n\x06weight\x18\x01 \x01(\x02:\x01\x31\"\xdc\x01\n\x0bInitializer\x12[\n\x1ctruncated_normal_initializer\x18\x01 \x01(\x0b\x32\x33.object_detection.protos.TruncatedNormalInitializerH\x00\x12[\n\x1cvariance_scaling_initializer\x18\x02 \x01(\x0b\x32\x33.object_detection.protos.VarianceScalingInitializerH\x00\x42\x13\n\x11initializer_oneof\"@\n\x1aTruncatedNormalInitializer\x12\x0f\n\x04mean\x18\x01 \x01(\x02:\x01\x30\x12\x11\n\x06stddev\x18\x02 \x01(\x02:\x01\x31\"\xc5\x01\n\x1aVarianceScalingInitializer\x12\x11\n\x06\x66\x61\x63tor\x18\x01 \x01(\x02:\x01\x32\x12\x16\n\x07uniform\x18\x02 \x01(\x08:\x05\x66\x61lse\x12N\n\x04mode\x18\x03 \x01(\x0e\x32\x38.object_detection.protos.VarianceScalingInitializer.Mode:\x06\x46\x41N_IN\",\n\x04Mode\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"z\n\tBatchNorm\x12\x14\n\x05\x64\x65\x63\x61y\x18\x01 \x01(\x02:\x05\x30.999\x12\x14\n\x06\x63\x65nter\x18\x02 \x01(\x08:\x04true\x12\x14\n\x05scale\x18\x03 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07\x65psilon\x18\x04 \x01(\x02:\x05\x30.001\x12\x13\n\x05train\x18\x05 \x01(\x08:\x04true') -) - - - -_HYPERPARAMS_OP = _descriptor.EnumDescriptor( - name='Op', - full_name='object_detection.protos.Hyperparams.Op', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='CONV', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FC', index=1, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=394, - serialized_end=416, -) -_sym_db.RegisterEnumDescriptor(_HYPERPARAMS_OP) - -_HYPERPARAMS_ACTIVATION = _descriptor.EnumDescriptor( - name='Activation', - full_name='object_detection.protos.Hyperparams.Activation', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='NONE', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='RELU', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='RELU_6', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=418, - serialized_end=462, -) -_sym_db.RegisterEnumDescriptor(_HYPERPARAMS_ACTIVATION) - -_VARIANCESCALINGINITIALIZER_MODE = _descriptor.EnumDescriptor( - name='Mode', - full_name='object_detection.protos.VarianceScalingInitializer.Mode', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='FAN_IN', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FAN_OUT', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FAN_AVG', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=1148, - serialized_end=1192, -) -_sym_db.RegisterEnumDescriptor(_VARIANCESCALINGINITIALIZER_MODE) - - -_HYPERPARAMS = _descriptor.Descriptor( - name='Hyperparams', - full_name='object_detection.protos.Hyperparams', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='op', full_name='object_detection.protos.Hyperparams.op', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='regularizer', full_name='object_detection.protos.Hyperparams.regularizer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='initializer', full_name='object_detection.protos.Hyperparams.initializer', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='activation', full_name='object_detection.protos.Hyperparams.activation', index=3, - number=4, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='batch_norm', full_name='object_detection.protos.Hyperparams.batch_norm', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _HYPERPARAMS_OP, - _HYPERPARAMS_ACTIVATION, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=71, - serialized_end=462, -) - - -_REGULARIZER = _descriptor.Descriptor( - name='Regularizer', - full_name='object_detection.protos.Regularizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='l1_regularizer', full_name='object_detection.protos.Regularizer.l1_regularizer', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='l2_regularizer', full_name='object_detection.protos.Regularizer.l2_regularizer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='regularizer_oneof', full_name='object_detection.protos.Regularizer.regularizer_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=465, - serialized_end=631, -) - - -_L1REGULARIZER = _descriptor.Descriptor( - name='L1Regularizer', - full_name='object_detection.protos.L1Regularizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='weight', full_name='object_detection.protos.L1Regularizer.weight', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=633, - serialized_end=667, -) - - -_L2REGULARIZER = _descriptor.Descriptor( - name='L2Regularizer', - full_name='object_detection.protos.L2Regularizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='weight', full_name='object_detection.protos.L2Regularizer.weight', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=669, - serialized_end=703, -) - - -_INITIALIZER = _descriptor.Descriptor( - name='Initializer', - full_name='object_detection.protos.Initializer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='truncated_normal_initializer', full_name='object_detection.protos.Initializer.truncated_normal_initializer', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='variance_scaling_initializer', full_name='object_detection.protos.Initializer.variance_scaling_initializer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='initializer_oneof', full_name='object_detection.protos.Initializer.initializer_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=706, - serialized_end=926, -) - - -_TRUNCATEDNORMALINITIALIZER = _descriptor.Descriptor( - name='TruncatedNormalInitializer', - full_name='object_detection.protos.TruncatedNormalInitializer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='mean', full_name='object_detection.protos.TruncatedNormalInitializer.mean', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='stddev', full_name='object_detection.protos.TruncatedNormalInitializer.stddev', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=928, - serialized_end=992, -) - - -_VARIANCESCALINGINITIALIZER = _descriptor.Descriptor( - name='VarianceScalingInitializer', - full_name='object_detection.protos.VarianceScalingInitializer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='factor', full_name='object_detection.protos.VarianceScalingInitializer.factor', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(2), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='uniform', full_name='object_detection.protos.VarianceScalingInitializer.uniform', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='mode', full_name='object_detection.protos.VarianceScalingInitializer.mode', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _VARIANCESCALINGINITIALIZER_MODE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=995, - serialized_end=1192, -) - - -_BATCHNORM = _descriptor.Descriptor( - name='BatchNorm', - full_name='object_detection.protos.BatchNorm', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='decay', full_name='object_detection.protos.BatchNorm.decay', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.999), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='center', full_name='object_detection.protos.BatchNorm.center', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='scale', full_name='object_detection.protos.BatchNorm.scale', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='epsilon', full_name='object_detection.protos.BatchNorm.epsilon', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.001), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='train', full_name='object_detection.protos.BatchNorm.train', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1194, - serialized_end=1316, -) - -_HYPERPARAMS.fields_by_name['op'].enum_type = _HYPERPARAMS_OP -_HYPERPARAMS.fields_by_name['regularizer'].message_type = _REGULARIZER -_HYPERPARAMS.fields_by_name['initializer'].message_type = _INITIALIZER -_HYPERPARAMS.fields_by_name['activation'].enum_type = _HYPERPARAMS_ACTIVATION -_HYPERPARAMS.fields_by_name['batch_norm'].message_type = _BATCHNORM -_HYPERPARAMS_OP.containing_type = _HYPERPARAMS -_HYPERPARAMS_ACTIVATION.containing_type = _HYPERPARAMS -_REGULARIZER.fields_by_name['l1_regularizer'].message_type = _L1REGULARIZER -_REGULARIZER.fields_by_name['l2_regularizer'].message_type = _L2REGULARIZER -_REGULARIZER.oneofs_by_name['regularizer_oneof'].fields.append( - _REGULARIZER.fields_by_name['l1_regularizer']) -_REGULARIZER.fields_by_name['l1_regularizer'].containing_oneof = _REGULARIZER.oneofs_by_name['regularizer_oneof'] -_REGULARIZER.oneofs_by_name['regularizer_oneof'].fields.append( - _REGULARIZER.fields_by_name['l2_regularizer']) -_REGULARIZER.fields_by_name['l2_regularizer'].containing_oneof = _REGULARIZER.oneofs_by_name['regularizer_oneof'] -_INITIALIZER.fields_by_name['truncated_normal_initializer'].message_type = _TRUNCATEDNORMALINITIALIZER -_INITIALIZER.fields_by_name['variance_scaling_initializer'].message_type = _VARIANCESCALINGINITIALIZER -_INITIALIZER.oneofs_by_name['initializer_oneof'].fields.append( - _INITIALIZER.fields_by_name['truncated_normal_initializer']) -_INITIALIZER.fields_by_name['truncated_normal_initializer'].containing_oneof = _INITIALIZER.oneofs_by_name['initializer_oneof'] -_INITIALIZER.oneofs_by_name['initializer_oneof'].fields.append( - _INITIALIZER.fields_by_name['variance_scaling_initializer']) -_INITIALIZER.fields_by_name['variance_scaling_initializer'].containing_oneof = _INITIALIZER.oneofs_by_name['initializer_oneof'] -_VARIANCESCALINGINITIALIZER.fields_by_name['mode'].enum_type = _VARIANCESCALINGINITIALIZER_MODE -_VARIANCESCALINGINITIALIZER_MODE.containing_type = _VARIANCESCALINGINITIALIZER -DESCRIPTOR.message_types_by_name['Hyperparams'] = _HYPERPARAMS -DESCRIPTOR.message_types_by_name['Regularizer'] = _REGULARIZER -DESCRIPTOR.message_types_by_name['L1Regularizer'] = _L1REGULARIZER -DESCRIPTOR.message_types_by_name['L2Regularizer'] = _L2REGULARIZER -DESCRIPTOR.message_types_by_name['Initializer'] = _INITIALIZER -DESCRIPTOR.message_types_by_name['TruncatedNormalInitializer'] = _TRUNCATEDNORMALINITIALIZER -DESCRIPTOR.message_types_by_name['VarianceScalingInitializer'] = _VARIANCESCALINGINITIALIZER -DESCRIPTOR.message_types_by_name['BatchNorm'] = _BATCHNORM -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Hyperparams = _reflection.GeneratedProtocolMessageType('Hyperparams', (_message.Message,), dict( - DESCRIPTOR = _HYPERPARAMS, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Hyperparams) - )) -_sym_db.RegisterMessage(Hyperparams) - -Regularizer = _reflection.GeneratedProtocolMessageType('Regularizer', (_message.Message,), dict( - DESCRIPTOR = _REGULARIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Regularizer) - )) -_sym_db.RegisterMessage(Regularizer) - -L1Regularizer = _reflection.GeneratedProtocolMessageType('L1Regularizer', (_message.Message,), dict( - DESCRIPTOR = _L1REGULARIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.L1Regularizer) - )) -_sym_db.RegisterMessage(L1Regularizer) - -L2Regularizer = _reflection.GeneratedProtocolMessageType('L2Regularizer', (_message.Message,), dict( - DESCRIPTOR = _L2REGULARIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.L2Regularizer) - )) -_sym_db.RegisterMessage(L2Regularizer) - -Initializer = _reflection.GeneratedProtocolMessageType('Initializer', (_message.Message,), dict( - DESCRIPTOR = _INITIALIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Initializer) - )) -_sym_db.RegisterMessage(Initializer) - -TruncatedNormalInitializer = _reflection.GeneratedProtocolMessageType('TruncatedNormalInitializer', (_message.Message,), dict( - DESCRIPTOR = _TRUNCATEDNORMALINITIALIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.TruncatedNormalInitializer) - )) -_sym_db.RegisterMessage(TruncatedNormalInitializer) - -VarianceScalingInitializer = _reflection.GeneratedProtocolMessageType('VarianceScalingInitializer', (_message.Message,), dict( - DESCRIPTOR = _VARIANCESCALINGINITIALIZER, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.VarianceScalingInitializer) - )) -_sym_db.RegisterMessage(VarianceScalingInitializer) - -BatchNorm = _reflection.GeneratedProtocolMessageType('BatchNorm', (_message.Message,), dict( - DESCRIPTOR = _BATCHNORM, - __module__ = 'object_detection.protos.hyperparams_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BatchNorm) - )) -_sym_db.RegisterMessage(BatchNorm) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/image_resizer.proto b/object_detection/protos/image_resizer.proto deleted file mode 100644 index 67f6cacd..00000000 --- a/object_detection/protos/image_resizer.proto +++ /dev/null @@ -1,44 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for image resizing operations. -// See builders/image_resizer_builder.py for details. -message ImageResizer { - oneof image_resizer_oneof { - KeepAspectRatioResizer keep_aspect_ratio_resizer = 1; - FixedShapeResizer fixed_shape_resizer = 2; - } -} - -// Enumeration type for image resizing methods provided in TensorFlow. -enum ResizeType { - BILINEAR = 0; // Corresponds to tf.image.ResizeMethod.BILINEAR - NEAREST_NEIGHBOR = 1; // Corresponds to tf.image.ResizeMethod.NEAREST_NEIGHBOR - BICUBIC = 2; // Corresponds to tf.image.ResizeMethod.BICUBIC - AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA -} - -// Configuration proto for image resizer that keeps aspect ratio. -message KeepAspectRatioResizer { - // Desired size of the smaller image dimension in pixels. - optional int32 min_dimension = 1 [default = 600]; - - // Desired size of the larger image dimension in pixels. - optional int32 max_dimension = 2 [default = 1024]; - - // Desired method when resizing image. - optional ResizeType resize_method = 3 [default = BILINEAR]; -} - -// Configuration proto for image resizer that resizes to a fixed shape. -message FixedShapeResizer { - // Desired height of image in pixels. - optional int32 height = 1 [default = 300]; - - // Desired width of image in pixels. - optional int32 width = 2 [default = 300]; - - // Desired method when resizing image. - optional ResizeType resize_method = 3 [default = BILINEAR]; -} diff --git a/object_detection/protos/image_resizer_pb2.py b/object_detection/protos/image_resizer_pb2.py deleted file mode 100644 index 2c953b77..00000000 --- a/object_detection/protos/image_resizer_pb2.py +++ /dev/null @@ -1,232 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/image_resizer.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf.internal import enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/image_resizer.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n+object_detection/protos/image_resizer.proto\x12\x17object_detection.protos\"\xc6\x01\n\x0cImageResizer\x12T\n\x19keep_aspect_ratio_resizer\x18\x01 \x01(\x0b\x32/.object_detection.protos.KeepAspectRatioResizerH\x00\x12I\n\x13\x66ixed_shape_resizer\x18\x02 \x01(\x0b\x32*.object_detection.protos.FixedShapeResizerH\x00\x42\x15\n\x13image_resizer_oneof\"\x97\x01\n\x16KeepAspectRatioResizer\x12\x1a\n\rmin_dimension\x18\x01 \x01(\x05:\x03\x36\x30\x30\x12\x1b\n\rmax_dimension\x18\x02 \x01(\x05:\x04\x31\x30\x32\x34\x12\x44\n\rresize_method\x18\x03 \x01(\x0e\x32#.object_detection.protos.ResizeType:\x08\x42ILINEAR\"\x82\x01\n\x11\x46ixedShapeResizer\x12\x13\n\x06height\x18\x01 \x01(\x05:\x03\x33\x30\x30\x12\x12\n\x05width\x18\x02 \x01(\x05:\x03\x33\x30\x30\x12\x44\n\rresize_method\x18\x03 \x01(\x0e\x32#.object_detection.protos.ResizeType:\x08\x42ILINEAR*G\n\nResizeType\x12\x0c\n\x08\x42ILINEAR\x10\x00\x12\x14\n\x10NEAREST_NEIGHBOR\x10\x01\x12\x0b\n\x07\x42ICUBIC\x10\x02\x12\x08\n\x04\x41REA\x10\x03') -) - -_RESIZETYPE = _descriptor.EnumDescriptor( - name='ResizeType', - full_name='object_detection.protos.ResizeType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='BILINEAR', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='NEAREST_NEIGHBOR', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BICUBIC', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AREA', index=3, number=3, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=560, - serialized_end=631, -) -_sym_db.RegisterEnumDescriptor(_RESIZETYPE) - -ResizeType = enum_type_wrapper.EnumTypeWrapper(_RESIZETYPE) -BILINEAR = 0 -NEAREST_NEIGHBOR = 1 -BICUBIC = 2 -AREA = 3 - - - -_IMAGERESIZER = _descriptor.Descriptor( - name='ImageResizer', - full_name='object_detection.protos.ImageResizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='keep_aspect_ratio_resizer', full_name='object_detection.protos.ImageResizer.keep_aspect_ratio_resizer', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='fixed_shape_resizer', full_name='object_detection.protos.ImageResizer.fixed_shape_resizer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='image_resizer_oneof', full_name='object_detection.protos.ImageResizer.image_resizer_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=73, - serialized_end=271, -) - - -_KEEPASPECTRATIORESIZER = _descriptor.Descriptor( - name='KeepAspectRatioResizer', - full_name='object_detection.protos.KeepAspectRatioResizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_dimension', full_name='object_detection.protos.KeepAspectRatioResizer.min_dimension', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=600, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_dimension', full_name='object_detection.protos.KeepAspectRatioResizer.max_dimension', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1024, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='resize_method', full_name='object_detection.protos.KeepAspectRatioResizer.resize_method', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=274, - serialized_end=425, -) - - -_FIXEDSHAPERESIZER = _descriptor.Descriptor( - name='FixedShapeResizer', - full_name='object_detection.protos.FixedShapeResizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='height', full_name='object_detection.protos.FixedShapeResizer.height', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=300, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width', full_name='object_detection.protos.FixedShapeResizer.width', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=300, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='resize_method', full_name='object_detection.protos.FixedShapeResizer.resize_method', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=428, - serialized_end=558, -) - -_IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer'].message_type = _KEEPASPECTRATIORESIZER -_IMAGERESIZER.fields_by_name['fixed_shape_resizer'].message_type = _FIXEDSHAPERESIZER -_IMAGERESIZER.oneofs_by_name['image_resizer_oneof'].fields.append( - _IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer']) -_IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer'].containing_oneof = _IMAGERESIZER.oneofs_by_name['image_resizer_oneof'] -_IMAGERESIZER.oneofs_by_name['image_resizer_oneof'].fields.append( - _IMAGERESIZER.fields_by_name['fixed_shape_resizer']) -_IMAGERESIZER.fields_by_name['fixed_shape_resizer'].containing_oneof = _IMAGERESIZER.oneofs_by_name['image_resizer_oneof'] -_KEEPASPECTRATIORESIZER.fields_by_name['resize_method'].enum_type = _RESIZETYPE -_FIXEDSHAPERESIZER.fields_by_name['resize_method'].enum_type = _RESIZETYPE -DESCRIPTOR.message_types_by_name['ImageResizer'] = _IMAGERESIZER -DESCRIPTOR.message_types_by_name['KeepAspectRatioResizer'] = _KEEPASPECTRATIORESIZER -DESCRIPTOR.message_types_by_name['FixedShapeResizer'] = _FIXEDSHAPERESIZER -DESCRIPTOR.enum_types_by_name['ResizeType'] = _RESIZETYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ImageResizer = _reflection.GeneratedProtocolMessageType('ImageResizer', (_message.Message,), dict( - DESCRIPTOR = _IMAGERESIZER, - __module__ = 'object_detection.protos.image_resizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ImageResizer) - )) -_sym_db.RegisterMessage(ImageResizer) - -KeepAspectRatioResizer = _reflection.GeneratedProtocolMessageType('KeepAspectRatioResizer', (_message.Message,), dict( - DESCRIPTOR = _KEEPASPECTRATIORESIZER, - __module__ = 'object_detection.protos.image_resizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.KeepAspectRatioResizer) - )) -_sym_db.RegisterMessage(KeepAspectRatioResizer) - -FixedShapeResizer = _reflection.GeneratedProtocolMessageType('FixedShapeResizer', (_message.Message,), dict( - DESCRIPTOR = _FIXEDSHAPERESIZER, - __module__ = 'object_detection.protos.image_resizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.FixedShapeResizer) - )) -_sym_db.RegisterMessage(FixedShapeResizer) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/input_reader.proto b/object_detection/protos/input_reader.proto deleted file mode 100644 index ed460dad..00000000 --- a/object_detection/protos/input_reader.proto +++ /dev/null @@ -1,60 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for defining input readers that generate Object Detection -// Examples from input sources. Input readers are expected to generate a -// dictionary of tensors, with the following fields populated: -// -// 'image': an [image_height, image_width, channels] image tensor that detection -// will be run on. -// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class -// labels of detected boxes in the image. -// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of -// detected boxes in the image. -// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height, -// image_width] float tensor storing binary mask of the objects in boxes. - -message InputReader { - // Path to StringIntLabelMap pbtxt file specifying the mapping from string - // labels to integer ids. - optional string label_map_path = 1 [default=""]; - - // Whether data should be processed in the order they are read in, or - // shuffled randomly. - optional bool shuffle = 2 [default=true]; - - // Maximum number of records to keep in reader queue. - optional uint32 queue_capacity = 3 [default=2000]; - - // Minimum number of records to keep in reader queue. A large value is needed - // to generate a good random shuffle. - optional uint32 min_after_dequeue = 4 [default=1000]; - - // The number of times a data source is read. If set to zero, the data source - // will be reused indefinitely. - optional uint32 num_epochs = 5 [default=0]; - - // Number of reader instances to create. - optional uint32 num_readers = 6 [default=8]; - - // Whether to load groundtruth instance masks. - optional bool load_instance_masks = 7 [default = false]; - - oneof input_reader { - TFRecordInputReader tf_record_input_reader = 8; - ExternalInputReader external_input_reader = 9; - } -} - -// An input reader that reads TF Example protos from local TFRecord files. -message TFRecordInputReader { - // Path(s) to `TFRecordFile`s. - repeated string input_path = 1; -} - -// An externally defined input reader. Users may define an extension to this -// proto to interface their own input readers. -message ExternalInputReader { - extensions 1 to 999; -} diff --git a/object_detection/protos/input_reader_pb2.py b/object_detection/protos/input_reader_pb2.py deleted file mode 100644 index 1e8022b9..00000000 --- a/object_detection/protos/input_reader_pb2.py +++ /dev/null @@ -1,207 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/input_reader.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/input_reader.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n*object_detection/protos/input_reader.proto\x12\x17object_detection.protos\"\xff\x02\n\x0bInputReader\x12\x18\n\x0elabel_map_path\x18\x01 \x01(\t:\x00\x12\x15\n\x07shuffle\x18\x02 \x01(\x08:\x04true\x12\x1c\n\x0equeue_capacity\x18\x03 \x01(\r:\x04\x32\x30\x30\x30\x12\x1f\n\x11min_after_dequeue\x18\x04 \x01(\r:\x04\x31\x30\x30\x30\x12\x15\n\nnum_epochs\x18\x05 \x01(\r:\x01\x30\x12\x16\n\x0bnum_readers\x18\x06 \x01(\r:\x01\x38\x12\"\n\x13load_instance_masks\x18\x07 \x01(\x08:\x05\x66\x61lse\x12N\n\x16tf_record_input_reader\x18\x08 \x01(\x0b\x32,.object_detection.protos.TFRecordInputReaderH\x00\x12M\n\x15\x65xternal_input_reader\x18\t \x01(\x0b\x32,.object_detection.protos.ExternalInputReaderH\x00\x42\x0e\n\x0cinput_reader\")\n\x13TFRecordInputReader\x12\x12\n\ninput_path\x18\x01 \x03(\t\"\x1c\n\x13\x45xternalInputReader*\x05\x08\x01\x10\xe8\x07') -) - - - - -_INPUTREADER = _descriptor.Descriptor( - name='InputReader', - full_name='object_detection.protos.InputReader', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='label_map_path', full_name='object_detection.protos.InputReader.label_map_path', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='shuffle', full_name='object_detection.protos.InputReader.shuffle', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='queue_capacity', full_name='object_detection.protos.InputReader.queue_capacity', index=2, - number=3, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=2000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_after_dequeue', full_name='object_detection.protos.InputReader.min_after_dequeue', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_epochs', full_name='object_detection.protos.InputReader.num_epochs', index=4, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_readers', full_name='object_detection.protos.InputReader.num_readers', index=5, - number=6, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=8, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='load_instance_masks', full_name='object_detection.protos.InputReader.load_instance_masks', index=6, - number=7, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='tf_record_input_reader', full_name='object_detection.protos.InputReader.tf_record_input_reader', index=7, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='external_input_reader', full_name='object_detection.protos.InputReader.external_input_reader', index=8, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='input_reader', full_name='object_detection.protos.InputReader.input_reader', - index=0, containing_type=None, fields=[]), - ], - serialized_start=72, - serialized_end=455, -) - - -_TFRECORDINPUTREADER = _descriptor.Descriptor( - name='TFRecordInputReader', - full_name='object_detection.protos.TFRecordInputReader', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='input_path', full_name='object_detection.protos.TFRecordInputReader.input_path', index=0, - number=1, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=457, - serialized_end=498, -) - - -_EXTERNALINPUTREADER = _descriptor.Descriptor( - name='ExternalInputReader', - full_name='object_detection.protos.ExternalInputReader', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(1, 1000), ], - oneofs=[ - ], - serialized_start=500, - serialized_end=528, -) - -_INPUTREADER.fields_by_name['tf_record_input_reader'].message_type = _TFRECORDINPUTREADER -_INPUTREADER.fields_by_name['external_input_reader'].message_type = _EXTERNALINPUTREADER -_INPUTREADER.oneofs_by_name['input_reader'].fields.append( - _INPUTREADER.fields_by_name['tf_record_input_reader']) -_INPUTREADER.fields_by_name['tf_record_input_reader'].containing_oneof = _INPUTREADER.oneofs_by_name['input_reader'] -_INPUTREADER.oneofs_by_name['input_reader'].fields.append( - _INPUTREADER.fields_by_name['external_input_reader']) -_INPUTREADER.fields_by_name['external_input_reader'].containing_oneof = _INPUTREADER.oneofs_by_name['input_reader'] -DESCRIPTOR.message_types_by_name['InputReader'] = _INPUTREADER -DESCRIPTOR.message_types_by_name['TFRecordInputReader'] = _TFRECORDINPUTREADER -DESCRIPTOR.message_types_by_name['ExternalInputReader'] = _EXTERNALINPUTREADER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -InputReader = _reflection.GeneratedProtocolMessageType('InputReader', (_message.Message,), dict( - DESCRIPTOR = _INPUTREADER, - __module__ = 'object_detection.protos.input_reader_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.InputReader) - )) -_sym_db.RegisterMessage(InputReader) - -TFRecordInputReader = _reflection.GeneratedProtocolMessageType('TFRecordInputReader', (_message.Message,), dict( - DESCRIPTOR = _TFRECORDINPUTREADER, - __module__ = 'object_detection.protos.input_reader_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.TFRecordInputReader) - )) -_sym_db.RegisterMessage(TFRecordInputReader) - -ExternalInputReader = _reflection.GeneratedProtocolMessageType('ExternalInputReader', (_message.Message,), dict( - DESCRIPTOR = _EXTERNALINPUTREADER, - __module__ = 'object_detection.protos.input_reader_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ExternalInputReader) - )) -_sym_db.RegisterMessage(ExternalInputReader) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/keypoint_box_coder.proto b/object_detection/protos/keypoint_box_coder.proto deleted file mode 100644 index 542ebbfb..00000000 --- a/object_detection/protos/keypoint_box_coder.proto +++ /dev/null @@ -1,19 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for KeypointBoxCoder. See -// box_coders/keypoint_box_coder.py for details. -message KeypointBoxCoder { - optional int32 num_keypoints = 1; - - // Scale factor for anchor encoded box center and keypoints. - optional float y_scale = 2 [default = 10.0]; - optional float x_scale = 3 [default = 10.0]; - - // Scale factor for anchor encoded box height. - optional float height_scale = 4 [default = 5.0]; - - // Scale factor for anchor encoded box width. - optional float width_scale = 5 [default = 5.0]; -} diff --git a/object_detection/protos/keypoint_box_coder_pb2.py b/object_detection/protos/keypoint_box_coder_pb2.py deleted file mode 100644 index d473bc9d..00000000 --- a/object_detection/protos/keypoint_box_coder_pb2.py +++ /dev/null @@ -1,97 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/keypoint_box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/keypoint_box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n0object_detection/protos/keypoint_box_coder.proto\x12\x17object_detection.protos\"\x84\x01\n\x10KeypointBoxCoder\x12\x15\n\rnum_keypoints\x18\x01 \x01(\x05\x12\x13\n\x07y_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x03 \x01(\x02:\x02\x31\x30\x12\x17\n\x0cheight_scale\x18\x04 \x01(\x02:\x01\x35\x12\x16\n\x0bwidth_scale\x18\x05 \x01(\x02:\x01\x35') -) - - - - -_KEYPOINTBOXCODER = _descriptor.Descriptor( - name='KeypointBoxCoder', - full_name='object_detection.protos.KeypointBoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_keypoints', full_name='object_detection.protos.KeypointBoxCoder.num_keypoints', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='y_scale', full_name='object_detection.protos.KeypointBoxCoder.y_scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='x_scale', full_name='object_detection.protos.KeypointBoxCoder.x_scale', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_scale', full_name='object_detection.protos.KeypointBoxCoder.height_scale', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_scale', full_name='object_detection.protos.KeypointBoxCoder.width_scale', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=78, - serialized_end=210, -) - -DESCRIPTOR.message_types_by_name['KeypointBoxCoder'] = _KEYPOINTBOXCODER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -KeypointBoxCoder = _reflection.GeneratedProtocolMessageType('KeypointBoxCoder', (_message.Message,), dict( - DESCRIPTOR = _KEYPOINTBOXCODER, - __module__ = 'object_detection.protos.keypoint_box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.KeypointBoxCoder) - )) -_sym_db.RegisterMessage(KeypointBoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/losses.proto b/object_detection/protos/losses.proto deleted file mode 100644 index e2d189b5..00000000 --- a/object_detection/protos/losses.proto +++ /dev/null @@ -1,130 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Message for configuring the localization loss, classification loss and hard -// example miner used for training object detection models. See core/losses.py -// for details -message Loss { - // Localization loss to use. - optional LocalizationLoss localization_loss = 1; - - // Classification loss to use. - optional ClassificationLoss classification_loss = 2; - - // If not left to default, applies hard example mining. - optional HardExampleMiner hard_example_miner = 3; - - // Classification loss weight. - optional float classification_weight = 4 [default=1.0]; - - // Localization loss weight. - optional float localization_weight = 5 [default=1.0]; -} - -// Configuration for bounding box localization loss function. -message LocalizationLoss { - oneof localization_loss { - WeightedL2LocalizationLoss weighted_l2 = 1; - WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2; - WeightedIOULocalizationLoss weighted_iou = 3; - } -} - -// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2 -message WeightedL2LocalizationLoss { - // Output loss per anchor. - optional bool anchorwise_output = 1 [default=false]; -} - -// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5 -message WeightedSmoothL1LocalizationLoss { - // Output loss per anchor. - optional bool anchorwise_output = 1 [default=false]; -} - -// Intersection over union location loss: 1 - IOU -message WeightedIOULocalizationLoss { -} - -// Configuration for class prediction loss function. -message ClassificationLoss { - oneof classification_loss { - WeightedSigmoidClassificationLoss weighted_sigmoid = 1; - WeightedSoftmaxClassificationLoss weighted_softmax = 2; - BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3; - SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4; - } -} - -// Classification loss using a sigmoid function over class predictions. -message WeightedSigmoidClassificationLoss { - // Output loss per anchor. - optional bool anchorwise_output = 1 [default=false]; -} - -// Sigmoid Focal cross entropy loss as described in -// https://arxiv.org/abs/1708.02002 -message SigmoidFocalClassificationLoss { - optional bool anchorwise_output = 1 [default = false]; - // modulating factor for the loss. - optional float gamma = 2 [default = 2.0]; - // alpha weighting factor for the loss. - optional float alpha = 3; -} - -// Classification loss using a softmax function over class predictions. -message WeightedSoftmaxClassificationLoss { - // Output loss per anchor. - optional bool anchorwise_output = 1 [default=false]; - // Scale logit (input) value before calculating softmax classification loss. - // Typically used for softmax distillation. - optional float logit_scale = 2 [default = 1.0]; -} - -// Classification loss using a sigmoid function over the class prediction with -// the highest prediction score. -message BootstrappedSigmoidClassificationLoss { - // Interpolation weight between 0 and 1. - optional float alpha = 1; - - // Whether hard boot strapping should be used or not. If true, will only use - // one class favored by model. Othewise, will use all predicted class - // probabilities. - optional bool hard_bootstrap = 2 [default=false]; - - // Output loss per anchor. - optional bool anchorwise_output = 3 [default=false]; -} - -// Configuation for hard example miner. -message HardExampleMiner { - // Maximum number of hard examples to be selected per image (prior to - // enforcing max negative to positive ratio constraint). If set to 0, - // all examples obtained after NMS are considered. - optional int32 num_hard_examples = 1 [default=64]; - - // Minimum intersection over union for an example to be discarded during NMS. - optional float iou_threshold = 2 [default=0.7]; - - // Whether to use classification losses ('cls', default), localization losses - // ('loc') or both losses ('both'). In the case of 'both', cls_loss_weight and - // loc_loss_weight are used to compute weighted sum of the two losses. - enum LossType { - BOTH = 0; - CLASSIFICATION = 1; - LOCALIZATION = 2; - } - optional LossType loss_type = 3 [default=BOTH]; - - // Maximum number of negatives to retain for each positive anchor. If - // num_negatives_per_positive is 0 no prespecified negative:positive ratio is - // enforced. - optional int32 max_negatives_per_positive = 4 [default=0]; - - // Minimum number of negative anchors to sample for a given image. Setting - // this to a positive number samples negatives in an image without any - // positive anchors and thus not bias the model towards having at least one - // detection per image. - optional int32 min_negatives_per_image = 5 [default=0]; -} diff --git a/object_detection/protos/losses_pb2.py b/object_detection/protos/losses_pb2.py deleted file mode 100644 index 51077a3c..00000000 --- a/object_detection/protos/losses_pb2.py +++ /dev/null @@ -1,644 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/losses.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/losses.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n$object_detection/protos/losses.proto\x12\x17object_detection.protos\"\x9f\x02\n\x04Loss\x12\x44\n\x11localization_loss\x18\x01 \x01(\x0b\x32).object_detection.protos.LocalizationLoss\x12H\n\x13\x63lassification_loss\x18\x02 \x01(\x0b\x32+.object_detection.protos.ClassificationLoss\x12\x45\n\x12hard_example_miner\x18\x03 \x01(\x0b\x32).object_detection.protos.HardExampleMiner\x12 \n\x15\x63lassification_weight\x18\x04 \x01(\x02:\x01\x31\x12\x1e\n\x13localization_weight\x18\x05 \x01(\x02:\x01\x31\"\x9a\x02\n\x10LocalizationLoss\x12J\n\x0bweighted_l2\x18\x01 \x01(\x0b\x32\x33.object_detection.protos.WeightedL2LocalizationLossH\x00\x12W\n\x12weighted_smooth_l1\x18\x02 \x01(\x0b\x32\x39.object_detection.protos.WeightedSmoothL1LocalizationLossH\x00\x12L\n\x0cweighted_iou\x18\x03 \x01(\x0b\x32\x34.object_detection.protos.WeightedIOULocalizationLossH\x00\x42\x13\n\x11localization_loss\">\n\x1aWeightedL2LocalizationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"D\n WeightedSmoothL1LocalizationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"\x1d\n\x1bWeightedIOULocalizationLoss\"\x96\x03\n\x12\x43lassificationLoss\x12V\n\x10weighted_sigmoid\x18\x01 \x01(\x0b\x32:.object_detection.protos.WeightedSigmoidClassificationLossH\x00\x12V\n\x10weighted_softmax\x18\x02 \x01(\x0b\x32:.object_detection.protos.WeightedSoftmaxClassificationLossH\x00\x12^\n\x14\x62ootstrapped_sigmoid\x18\x03 \x01(\x0b\x32>.object_detection.protos.BootstrappedSigmoidClassificationLossH\x00\x12Y\n\x16weighted_sigmoid_focal\x18\x04 \x01(\x0b\x32\x37.object_detection.protos.SigmoidFocalClassificationLossH\x00\x42\x15\n\x13\x63lassification_loss\"E\n!WeightedSigmoidClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"c\n\x1eSigmoidFocalClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\r\n\x05\x61lpha\x18\x03 \x01(\x02\"]\n!WeightedSoftmaxClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x0blogit_scale\x18\x02 \x01(\x02:\x01\x31\"w\n%BootstrappedSigmoidClassificationLoss\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x1d\n\x0ehard_bootstrap\x18\x02 \x01(\x08:\x05\x66\x61lse\x12 \n\x11\x61nchorwise_output\x18\x03 \x01(\x08:\x05\x66\x61lse\"\xa1\x02\n\x10HardExampleMiner\x12\x1d\n\x11num_hard_examples\x18\x01 \x01(\x05:\x02\x36\x34\x12\x1a\n\riou_threshold\x18\x02 \x01(\x02:\x03\x30.7\x12K\n\tloss_type\x18\x03 \x01(\x0e\x32\x32.object_detection.protos.HardExampleMiner.LossType:\x04\x42OTH\x12%\n\x1amax_negatives_per_positive\x18\x04 \x01(\x05:\x01\x30\x12\"\n\x17min_negatives_per_image\x18\x05 \x01(\x05:\x01\x30\":\n\x08LossType\x12\x08\n\x04\x42OTH\x10\x00\x12\x12\n\x0e\x43LASSIFICATION\x10\x01\x12\x10\n\x0cLOCALIZATION\x10\x02') -) - - - -_HARDEXAMPLEMINER_LOSSTYPE = _descriptor.EnumDescriptor( - name='LossType', - full_name='object_detection.protos.HardExampleMiner.LossType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='BOTH', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CLASSIFICATION', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='LOCALIZATION', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=1834, - serialized_end=1892, -) -_sym_db.RegisterEnumDescriptor(_HARDEXAMPLEMINER_LOSSTYPE) - - -_LOSS = _descriptor.Descriptor( - name='Loss', - full_name='object_detection.protos.Loss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='localization_loss', full_name='object_detection.protos.Loss.localization_loss', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='classification_loss', full_name='object_detection.protos.Loss.classification_loss', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='hard_example_miner', full_name='object_detection.protos.Loss.hard_example_miner', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='classification_weight', full_name='object_detection.protos.Loss.classification_weight', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='localization_weight', full_name='object_detection.protos.Loss.localization_weight', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=66, - serialized_end=353, -) - - -_LOCALIZATIONLOSS = _descriptor.Descriptor( - name='LocalizationLoss', - full_name='object_detection.protos.LocalizationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='weighted_l2', full_name='object_detection.protos.LocalizationLoss.weighted_l2', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='weighted_smooth_l1', full_name='object_detection.protos.LocalizationLoss.weighted_smooth_l1', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='weighted_iou', full_name='object_detection.protos.LocalizationLoss.weighted_iou', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='localization_loss', full_name='object_detection.protos.LocalizationLoss.localization_loss', - index=0, containing_type=None, fields=[]), - ], - serialized_start=356, - serialized_end=638, -) - - -_WEIGHTEDL2LOCALIZATIONLOSS = _descriptor.Descriptor( - name='WeightedL2LocalizationLoss', - full_name='object_detection.protos.WeightedL2LocalizationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.WeightedL2LocalizationLoss.anchorwise_output', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=640, - serialized_end=702, -) - - -_WEIGHTEDSMOOTHL1LOCALIZATIONLOSS = _descriptor.Descriptor( - name='WeightedSmoothL1LocalizationLoss', - full_name='object_detection.protos.WeightedSmoothL1LocalizationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.WeightedSmoothL1LocalizationLoss.anchorwise_output', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=704, - serialized_end=772, -) - - -_WEIGHTEDIOULOCALIZATIONLOSS = _descriptor.Descriptor( - name='WeightedIOULocalizationLoss', - full_name='object_detection.protos.WeightedIOULocalizationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=774, - serialized_end=803, -) - - -_CLASSIFICATIONLOSS = _descriptor.Descriptor( - name='ClassificationLoss', - full_name='object_detection.protos.ClassificationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='weighted_sigmoid', full_name='object_detection.protos.ClassificationLoss.weighted_sigmoid', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='weighted_softmax', full_name='object_detection.protos.ClassificationLoss.weighted_softmax', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='bootstrapped_sigmoid', full_name='object_detection.protos.ClassificationLoss.bootstrapped_sigmoid', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='weighted_sigmoid_focal', full_name='object_detection.protos.ClassificationLoss.weighted_sigmoid_focal', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='classification_loss', full_name='object_detection.protos.ClassificationLoss.classification_loss', - index=0, containing_type=None, fields=[]), - ], - serialized_start=806, - serialized_end=1212, -) - - -_WEIGHTEDSIGMOIDCLASSIFICATIONLOSS = _descriptor.Descriptor( - name='WeightedSigmoidClassificationLoss', - full_name='object_detection.protos.WeightedSigmoidClassificationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.WeightedSigmoidClassificationLoss.anchorwise_output', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1214, - serialized_end=1283, -) - - -_SIGMOIDFOCALCLASSIFICATIONLOSS = _descriptor.Descriptor( - name='SigmoidFocalClassificationLoss', - full_name='object_detection.protos.SigmoidFocalClassificationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.SigmoidFocalClassificationLoss.anchorwise_output', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='gamma', full_name='object_detection.protos.SigmoidFocalClassificationLoss.gamma', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(2), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='alpha', full_name='object_detection.protos.SigmoidFocalClassificationLoss.alpha', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1285, - serialized_end=1384, -) - - -_WEIGHTEDSOFTMAXCLASSIFICATIONLOSS = _descriptor.Descriptor( - name='WeightedSoftmaxClassificationLoss', - full_name='object_detection.protos.WeightedSoftmaxClassificationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.WeightedSoftmaxClassificationLoss.anchorwise_output', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='logit_scale', full_name='object_detection.protos.WeightedSoftmaxClassificationLoss.logit_scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1386, - serialized_end=1479, -) - - -_BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS = _descriptor.Descriptor( - name='BootstrappedSigmoidClassificationLoss', - full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='alpha', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.alpha', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='hard_bootstrap', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.hard_bootstrap', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='anchorwise_output', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.anchorwise_output', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1481, - serialized_end=1600, -) - - -_HARDEXAMPLEMINER = _descriptor.Descriptor( - name='HardExampleMiner', - full_name='object_detection.protos.HardExampleMiner', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_hard_examples', full_name='object_detection.protos.HardExampleMiner.num_hard_examples', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=64, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='iou_threshold', full_name='object_detection.protos.HardExampleMiner.iou_threshold', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.7), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='loss_type', full_name='object_detection.protos.HardExampleMiner.loss_type', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_negatives_per_positive', full_name='object_detection.protos.HardExampleMiner.max_negatives_per_positive', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_negatives_per_image', full_name='object_detection.protos.HardExampleMiner.min_negatives_per_image', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _HARDEXAMPLEMINER_LOSSTYPE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1603, - serialized_end=1892, -) - -_LOSS.fields_by_name['localization_loss'].message_type = _LOCALIZATIONLOSS -_LOSS.fields_by_name['classification_loss'].message_type = _CLASSIFICATIONLOSS -_LOSS.fields_by_name['hard_example_miner'].message_type = _HARDEXAMPLEMINER -_LOCALIZATIONLOSS.fields_by_name['weighted_l2'].message_type = _WEIGHTEDL2LOCALIZATIONLOSS -_LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1'].message_type = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS -_LOCALIZATIONLOSS.fields_by_name['weighted_iou'].message_type = _WEIGHTEDIOULOCALIZATIONLOSS -_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append( - _LOCALIZATIONLOSS.fields_by_name['weighted_l2']) -_LOCALIZATIONLOSS.fields_by_name['weighted_l2'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss'] -_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append( - _LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1']) -_LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss'] -_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append( - _LOCALIZATIONLOSS.fields_by_name['weighted_iou']) -_LOCALIZATIONLOSS.fields_by_name['weighted_iou'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss'] -_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid'].message_type = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS -_CLASSIFICATIONLOSS.fields_by_name['weighted_softmax'].message_type = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS -_CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid'].message_type = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS -_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal'].message_type = _SIGMOIDFOCALCLASSIFICATIONLOSS -_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append( - _CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid']) -_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'] -_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append( - _CLASSIFICATIONLOSS.fields_by_name['weighted_softmax']) -_CLASSIFICATIONLOSS.fields_by_name['weighted_softmax'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'] -_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append( - _CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid']) -_CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'] -_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append( - _CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal']) -_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'] -_HARDEXAMPLEMINER.fields_by_name['loss_type'].enum_type = _HARDEXAMPLEMINER_LOSSTYPE -_HARDEXAMPLEMINER_LOSSTYPE.containing_type = _HARDEXAMPLEMINER -DESCRIPTOR.message_types_by_name['Loss'] = _LOSS -DESCRIPTOR.message_types_by_name['LocalizationLoss'] = _LOCALIZATIONLOSS -DESCRIPTOR.message_types_by_name['WeightedL2LocalizationLoss'] = _WEIGHTEDL2LOCALIZATIONLOSS -DESCRIPTOR.message_types_by_name['WeightedSmoothL1LocalizationLoss'] = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS -DESCRIPTOR.message_types_by_name['WeightedIOULocalizationLoss'] = _WEIGHTEDIOULOCALIZATIONLOSS -DESCRIPTOR.message_types_by_name['ClassificationLoss'] = _CLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['WeightedSigmoidClassificationLoss'] = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['SigmoidFocalClassificationLoss'] = _SIGMOIDFOCALCLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['WeightedSoftmaxClassificationLoss'] = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['BootstrappedSigmoidClassificationLoss'] = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS -DESCRIPTOR.message_types_by_name['HardExampleMiner'] = _HARDEXAMPLEMINER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Loss = _reflection.GeneratedProtocolMessageType('Loss', (_message.Message,), dict( - DESCRIPTOR = _LOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Loss) - )) -_sym_db.RegisterMessage(Loss) - -LocalizationLoss = _reflection.GeneratedProtocolMessageType('LocalizationLoss', (_message.Message,), dict( - DESCRIPTOR = _LOCALIZATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.LocalizationLoss) - )) -_sym_db.RegisterMessage(LocalizationLoss) - -WeightedL2LocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedL2LocalizationLoss', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDL2LOCALIZATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedL2LocalizationLoss) - )) -_sym_db.RegisterMessage(WeightedL2LocalizationLoss) - -WeightedSmoothL1LocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedSmoothL1LocalizationLoss', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSmoothL1LocalizationLoss) - )) -_sym_db.RegisterMessage(WeightedSmoothL1LocalizationLoss) - -WeightedIOULocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedIOULocalizationLoss', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDIOULOCALIZATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedIOULocalizationLoss) - )) -_sym_db.RegisterMessage(WeightedIOULocalizationLoss) - -ClassificationLoss = _reflection.GeneratedProtocolMessageType('ClassificationLoss', (_message.Message,), dict( - DESCRIPTOR = _CLASSIFICATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ClassificationLoss) - )) -_sym_db.RegisterMessage(ClassificationLoss) - -WeightedSigmoidClassificationLoss = _reflection.GeneratedProtocolMessageType('WeightedSigmoidClassificationLoss', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSigmoidClassificationLoss) - )) -_sym_db.RegisterMessage(WeightedSigmoidClassificationLoss) - -SigmoidFocalClassificationLoss = _reflection.GeneratedProtocolMessageType('SigmoidFocalClassificationLoss', (_message.Message,), dict( - DESCRIPTOR = _SIGMOIDFOCALCLASSIFICATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SigmoidFocalClassificationLoss) - )) -_sym_db.RegisterMessage(SigmoidFocalClassificationLoss) - -WeightedSoftmaxClassificationLoss = _reflection.GeneratedProtocolMessageType('WeightedSoftmaxClassificationLoss', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSoftmaxClassificationLoss) - )) -_sym_db.RegisterMessage(WeightedSoftmaxClassificationLoss) - -BootstrappedSigmoidClassificationLoss = _reflection.GeneratedProtocolMessageType('BootstrappedSigmoidClassificationLoss', (_message.Message,), dict( - DESCRIPTOR = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BootstrappedSigmoidClassificationLoss) - )) -_sym_db.RegisterMessage(BootstrappedSigmoidClassificationLoss) - -HardExampleMiner = _reflection.GeneratedProtocolMessageType('HardExampleMiner', (_message.Message,), dict( - DESCRIPTOR = _HARDEXAMPLEMINER, - __module__ = 'object_detection.protos.losses_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.HardExampleMiner) - )) -_sym_db.RegisterMessage(HardExampleMiner) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/matcher.proto b/object_detection/protos/matcher.proto deleted file mode 100644 index b47de56c..00000000 --- a/object_detection/protos/matcher.proto +++ /dev/null @@ -1,15 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/argmax_matcher.proto"; -import "object_detection/protos/bipartite_matcher.proto"; - -// Configuration proto for the matcher to be used in the object detection -// pipeline. See core/matcher.py for details. -message Matcher { - oneof matcher_oneof { - ArgMaxMatcher argmax_matcher = 1; - BipartiteMatcher bipartite_matcher = 2; - } -} diff --git a/object_detection/protos/matcher_pb2.py b/object_detection/protos/matcher_pb2.py deleted file mode 100644 index c422bb81..00000000 --- a/object_detection/protos/matcher_pb2.py +++ /dev/null @@ -1,90 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/matcher.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import argmax_matcher_pb2 as object__detection_dot_protos_dot_argmax__matcher__pb2 -from object_detection.protos import bipartite_matcher_pb2 as object__detection_dot_protos_dot_bipartite__matcher__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/matcher.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n%object_detection/protos/matcher.proto\x12\x17object_detection.protos\x1a,object_detection/protos/argmax_matcher.proto\x1a/object_detection/protos/bipartite_matcher.proto\"\xa4\x01\n\x07Matcher\x12@\n\x0e\x61rgmax_matcher\x18\x01 \x01(\x0b\x32&.object_detection.protos.ArgMaxMatcherH\x00\x12\x46\n\x11\x62ipartite_matcher\x18\x02 \x01(\x0b\x32).object_detection.protos.BipartiteMatcherH\x00\x42\x0f\n\rmatcher_oneof') - , - dependencies=[object__detection_dot_protos_dot_argmax__matcher__pb2.DESCRIPTOR,object__detection_dot_protos_dot_bipartite__matcher__pb2.DESCRIPTOR,]) - - - - -_MATCHER = _descriptor.Descriptor( - name='Matcher', - full_name='object_detection.protos.Matcher', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='argmax_matcher', full_name='object_detection.protos.Matcher.argmax_matcher', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='bipartite_matcher', full_name='object_detection.protos.Matcher.bipartite_matcher', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='matcher_oneof', full_name='object_detection.protos.Matcher.matcher_oneof', - index=0, containing_type=None, fields=[]), - ], - serialized_start=162, - serialized_end=326, -) - -_MATCHER.fields_by_name['argmax_matcher'].message_type = object__detection_dot_protos_dot_argmax__matcher__pb2._ARGMAXMATCHER -_MATCHER.fields_by_name['bipartite_matcher'].message_type = object__detection_dot_protos_dot_bipartite__matcher__pb2._BIPARTITEMATCHER -_MATCHER.oneofs_by_name['matcher_oneof'].fields.append( - _MATCHER.fields_by_name['argmax_matcher']) -_MATCHER.fields_by_name['argmax_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof'] -_MATCHER.oneofs_by_name['matcher_oneof'].fields.append( - _MATCHER.fields_by_name['bipartite_matcher']) -_MATCHER.fields_by_name['bipartite_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof'] -DESCRIPTOR.message_types_by_name['Matcher'] = _MATCHER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Matcher = _reflection.GeneratedProtocolMessageType('Matcher', (_message.Message,), dict( - DESCRIPTOR = _MATCHER, - __module__ = 'object_detection.protos.matcher_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Matcher) - )) -_sym_db.RegisterMessage(Matcher) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/mean_stddev_box_coder.proto b/object_detection/protos/mean_stddev_box_coder.proto deleted file mode 100644 index 597c70cd..00000000 --- a/object_detection/protos/mean_stddev_box_coder.proto +++ /dev/null @@ -1,8 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for MeanStddevBoxCoder. See -// box_coders/mean_stddev_box_coder.py for details. -message MeanStddevBoxCoder { -} diff --git a/object_detection/protos/mean_stddev_box_coder_pb2.py b/object_detection/protos/mean_stddev_box_coder_pb2.py deleted file mode 100644 index 184565dd..00000000 --- a/object_detection/protos/mean_stddev_box_coder_pb2.py +++ /dev/null @@ -1,62 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/mean_stddev_box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/mean_stddev_box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n3object_detection/protos/mean_stddev_box_coder.proto\x12\x17object_detection.protos\"\x14\n\x12MeanStddevBoxCoder') -) - - - - -_MEANSTDDEVBOXCODER = _descriptor.Descriptor( - name='MeanStddevBoxCoder', - full_name='object_detection.protos.MeanStddevBoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=80, - serialized_end=100, -) - -DESCRIPTOR.message_types_by_name['MeanStddevBoxCoder'] = _MEANSTDDEVBOXCODER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -MeanStddevBoxCoder = _reflection.GeneratedProtocolMessageType('MeanStddevBoxCoder', (_message.Message,), dict( - DESCRIPTOR = _MEANSTDDEVBOXCODER, - __module__ = 'object_detection.protos.mean_stddev_box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.MeanStddevBoxCoder) - )) -_sym_db.RegisterMessage(MeanStddevBoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/model.proto b/object_detection/protos/model.proto deleted file mode 100644 index b699c17b..00000000 --- a/object_detection/protos/model.proto +++ /dev/null @@ -1,14 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/faster_rcnn.proto"; -import "object_detection/protos/ssd.proto"; - -// Top level configuration for DetectionModels. -message DetectionModel { - oneof model { - FasterRcnn faster_rcnn = 1; - Ssd ssd = 2; - } -} diff --git a/object_detection/protos/model_pb2.py b/object_detection/protos/model_pb2.py deleted file mode 100644 index 54ec4673..00000000 --- a/object_detection/protos/model_pb2.py +++ /dev/null @@ -1,90 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/model.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import faster_rcnn_pb2 as object__detection_dot_protos_dot_faster__rcnn__pb2 -from object_detection.protos import ssd_pb2 as object__detection_dot_protos_dot_ssd__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/model.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n#object_detection/protos/model.proto\x12\x17object_detection.protos\x1a)object_detection/protos/faster_rcnn.proto\x1a!object_detection/protos/ssd.proto\"\x82\x01\n\x0e\x44\x65tectionModel\x12:\n\x0b\x66\x61ster_rcnn\x18\x01 \x01(\x0b\x32#.object_detection.protos.FasterRcnnH\x00\x12+\n\x03ssd\x18\x02 \x01(\x0b\x32\x1c.object_detection.protos.SsdH\x00\x42\x07\n\x05model') - , - dependencies=[object__detection_dot_protos_dot_faster__rcnn__pb2.DESCRIPTOR,object__detection_dot_protos_dot_ssd__pb2.DESCRIPTOR,]) - - - - -_DETECTIONMODEL = _descriptor.Descriptor( - name='DetectionModel', - full_name='object_detection.protos.DetectionModel', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='faster_rcnn', full_name='object_detection.protos.DetectionModel.faster_rcnn', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd', full_name='object_detection.protos.DetectionModel.ssd', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='model', full_name='object_detection.protos.DetectionModel.model', - index=0, containing_type=None, fields=[]), - ], - serialized_start=143, - serialized_end=273, -) - -_DETECTIONMODEL.fields_by_name['faster_rcnn'].message_type = object__detection_dot_protos_dot_faster__rcnn__pb2._FASTERRCNN -_DETECTIONMODEL.fields_by_name['ssd'].message_type = object__detection_dot_protos_dot_ssd__pb2._SSD -_DETECTIONMODEL.oneofs_by_name['model'].fields.append( - _DETECTIONMODEL.fields_by_name['faster_rcnn']) -_DETECTIONMODEL.fields_by_name['faster_rcnn'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model'] -_DETECTIONMODEL.oneofs_by_name['model'].fields.append( - _DETECTIONMODEL.fields_by_name['ssd']) -_DETECTIONMODEL.fields_by_name['ssd'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model'] -DESCRIPTOR.message_types_by_name['DetectionModel'] = _DETECTIONMODEL -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -DetectionModel = _reflection.GeneratedProtocolMessageType('DetectionModel', (_message.Message,), dict( - DESCRIPTOR = _DETECTIONMODEL, - __module__ = 'object_detection.protos.model_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.DetectionModel) - )) -_sym_db.RegisterMessage(DetectionModel) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/optimizer.proto b/object_detection/protos/optimizer.proto deleted file mode 100644 index d3cf45ce..00000000 --- a/object_detection/protos/optimizer.proto +++ /dev/null @@ -1,83 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Messages for configuring the optimizing strategy for training object -// detection models. - -// Top level optimizer message. -message Optimizer { - oneof optimizer { - RMSPropOptimizer rms_prop_optimizer = 1; - MomentumOptimizer momentum_optimizer = 2; - AdamOptimizer adam_optimizer = 3; - } - optional bool use_moving_average = 4 [default = true]; - optional float moving_average_decay = 5 [default = 0.9999]; -} - -// Configuration message for the RMSPropOptimizer -// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer -message RMSPropOptimizer { - optional LearningRate learning_rate = 1; - optional float momentum_optimizer_value = 2 [default = 0.9]; - optional float decay = 3 [default = 0.9]; - optional float epsilon = 4 [default = 1.0]; -} - -// Configuration message for the MomentumOptimizer -// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer -message MomentumOptimizer { - optional LearningRate learning_rate = 1; - optional float momentum_optimizer_value = 2 [default = 0.9]; -} - -// Configuration message for the AdamOptimizer -// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer -message AdamOptimizer { - optional LearningRate learning_rate = 1; -} - -// Configuration message for optimizer learning rate. -message LearningRate { - oneof learning_rate { - ConstantLearningRate constant_learning_rate = 1; - ExponentialDecayLearningRate exponential_decay_learning_rate = 2; - ManualStepLearningRate manual_step_learning_rate = 3; - CosineDecayLearningRate cosine_decay_learning_rate = 4; - } -} - -// Configuration message for a constant learning rate. -message ConstantLearningRate { - optional float learning_rate = 1 [default = 0.002]; -} - -// Configuration message for an exponentially decaying learning rate. -// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \ -// decaying_the_learning_rate#exponential_decay -message ExponentialDecayLearningRate { - optional float initial_learning_rate = 1 [default = 0.002]; - optional uint32 decay_steps = 2 [default = 4000000]; - optional float decay_factor = 3 [default = 0.95]; - optional bool staircase = 4 [default = true]; -} - -// Configuration message for a manually defined learning rate schedule. -message ManualStepLearningRate { - optional float initial_learning_rate = 1 [default = 0.002]; - message LearningRateSchedule { - optional uint32 step = 1; - optional float learning_rate = 2 [default = 0.002]; - } - repeated LearningRateSchedule schedule = 2; -} - -// Configuration message for a cosine decaying learning rate as defined in -// object_detection/utils/learning_schedules.py -message CosineDecayLearningRate { - optional float learning_rate_base = 1 [default = 0.002]; - optional uint32 total_steps = 2 [default = 4000000]; - optional float warmup_learning_rate = 3 [default = 0.0002]; - optional uint32 warmup_steps = 4 [default = 10000]; -} diff --git a/object_detection/protos/optimizer_pb2.py b/object_detection/protos/optimizer_pb2.py deleted file mode 100644 index 9063efa6..00000000 --- a/object_detection/protos/optimizer_pb2.py +++ /dev/null @@ -1,591 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/optimizer.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/optimizer.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n\'object_detection/protos/optimizer.proto\x12\x17object_detection.protos\"\xb5\x02\n\tOptimizer\x12G\n\x12rms_prop_optimizer\x18\x01 \x01(\x0b\x32).object_detection.protos.RMSPropOptimizerH\x00\x12H\n\x12momentum_optimizer\x18\x02 \x01(\x0b\x32*.object_detection.protos.MomentumOptimizerH\x00\x12@\n\x0e\x61\x64\x61m_optimizer\x18\x03 \x01(\x0b\x32&.object_detection.protos.AdamOptimizerH\x00\x12 \n\x12use_moving_average\x18\x04 \x01(\x08:\x04true\x12$\n\x14moving_average_decay\x18\x05 \x01(\x02:\x06\x30.9999B\x0b\n\toptimizer\"\x9f\x01\n\x10RMSPropOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\x12%\n\x18momentum_optimizer_value\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x05\x64\x65\x63\x61y\x18\x03 \x01(\x02:\x03\x30.9\x12\x12\n\x07\x65psilon\x18\x04 \x01(\x02:\x01\x31\"x\n\x11MomentumOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\x12%\n\x18momentum_optimizer_value\x18\x02 \x01(\x02:\x03\x30.9\"M\n\rAdamOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\"\x80\x03\n\x0cLearningRate\x12O\n\x16\x63onstant_learning_rate\x18\x01 \x01(\x0b\x32-.object_detection.protos.ConstantLearningRateH\x00\x12`\n\x1f\x65xponential_decay_learning_rate\x18\x02 \x01(\x0b\x32\x35.object_detection.protos.ExponentialDecayLearningRateH\x00\x12T\n\x19manual_step_learning_rate\x18\x03 \x01(\x0b\x32/.object_detection.protos.ManualStepLearningRateH\x00\x12V\n\x1a\x63osine_decay_learning_rate\x18\x04 \x01(\x0b\x32\x30.object_detection.protos.CosineDecayLearningRateH\x00\x42\x0f\n\rlearning_rate\"4\n\x14\x43onstantLearningRate\x12\x1c\n\rlearning_rate\x18\x01 \x01(\x02:\x05\x30.002\"\x97\x01\n\x1c\x45xponentialDecayLearningRate\x12$\n\x15initial_learning_rate\x18\x01 \x01(\x02:\x05\x30.002\x12\x1c\n\x0b\x64\x65\x63\x61y_steps\x18\x02 \x01(\r:\x07\x34\x30\x30\x30\x30\x30\x30\x12\x1a\n\x0c\x64\x65\x63\x61y_factor\x18\x03 \x01(\x02:\x04\x30.95\x12\x17\n\tstaircase\x18\x04 \x01(\x08:\x04true\"\xda\x01\n\x16ManualStepLearningRate\x12$\n\x15initial_learning_rate\x18\x01 \x01(\x02:\x05\x30.002\x12V\n\x08schedule\x18\x02 \x03(\x0b\x32\x44.object_detection.protos.ManualStepLearningRate.LearningRateSchedule\x1a\x42\n\x14LearningRateSchedule\x12\x0c\n\x04step\x18\x01 \x01(\r\x12\x1c\n\rlearning_rate\x18\x02 \x01(\x02:\x05\x30.002\"\x9d\x01\n\x17\x43osineDecayLearningRate\x12!\n\x12learning_rate_base\x18\x01 \x01(\x02:\x05\x30.002\x12\x1c\n\x0btotal_steps\x18\x02 \x01(\r:\x07\x34\x30\x30\x30\x30\x30\x30\x12$\n\x14warmup_learning_rate\x18\x03 \x01(\x02:\x06\x30.0002\x12\x1b\n\x0cwarmup_steps\x18\x04 \x01(\r:\x05\x31\x30\x30\x30\x30') -) - - - - -_OPTIMIZER = _descriptor.Descriptor( - name='Optimizer', - full_name='object_detection.protos.Optimizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='rms_prop_optimizer', full_name='object_detection.protos.Optimizer.rms_prop_optimizer', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='momentum_optimizer', full_name='object_detection.protos.Optimizer.momentum_optimizer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='adam_optimizer', full_name='object_detection.protos.Optimizer.adam_optimizer', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='use_moving_average', full_name='object_detection.protos.Optimizer.use_moving_average', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='moving_average_decay', full_name='object_detection.protos.Optimizer.moving_average_decay', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9999), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='optimizer', full_name='object_detection.protos.Optimizer.optimizer', - index=0, containing_type=None, fields=[]), - ], - serialized_start=69, - serialized_end=378, -) - - -_RMSPROPOPTIMIZER = _descriptor.Descriptor( - name='RMSPropOptimizer', - full_name='object_detection.protos.RMSPropOptimizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='object_detection.protos.RMSPropOptimizer.learning_rate', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='momentum_optimizer_value', full_name='object_detection.protos.RMSPropOptimizer.momentum_optimizer_value', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='decay', full_name='object_detection.protos.RMSPropOptimizer.decay', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='epsilon', full_name='object_detection.protos.RMSPropOptimizer.epsilon', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=381, - serialized_end=540, -) - - -_MOMENTUMOPTIMIZER = _descriptor.Descriptor( - name='MomentumOptimizer', - full_name='object_detection.protos.MomentumOptimizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='object_detection.protos.MomentumOptimizer.learning_rate', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='momentum_optimizer_value', full_name='object_detection.protos.MomentumOptimizer.momentum_optimizer_value', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=542, - serialized_end=662, -) - - -_ADAMOPTIMIZER = _descriptor.Descriptor( - name='AdamOptimizer', - full_name='object_detection.protos.AdamOptimizer', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='object_detection.protos.AdamOptimizer.learning_rate', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=664, - serialized_end=741, -) - - -_LEARNINGRATE = _descriptor.Descriptor( - name='LearningRate', - full_name='object_detection.protos.LearningRate', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='constant_learning_rate', full_name='object_detection.protos.LearningRate.constant_learning_rate', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='exponential_decay_learning_rate', full_name='object_detection.protos.LearningRate.exponential_decay_learning_rate', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='manual_step_learning_rate', full_name='object_detection.protos.LearningRate.manual_step_learning_rate', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='cosine_decay_learning_rate', full_name='object_detection.protos.LearningRate.cosine_decay_learning_rate', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='learning_rate', full_name='object_detection.protos.LearningRate.learning_rate', - index=0, containing_type=None, fields=[]), - ], - serialized_start=744, - serialized_end=1128, -) - - -_CONSTANTLEARNINGRATE = _descriptor.Descriptor( - name='ConstantLearningRate', - full_name='object_detection.protos.ConstantLearningRate', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='object_detection.protos.ConstantLearningRate.learning_rate', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1130, - serialized_end=1182, -) - - -_EXPONENTIALDECAYLEARNINGRATE = _descriptor.Descriptor( - name='ExponentialDecayLearningRate', - full_name='object_detection.protos.ExponentialDecayLearningRate', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='initial_learning_rate', full_name='object_detection.protos.ExponentialDecayLearningRate.initial_learning_rate', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='decay_steps', full_name='object_detection.protos.ExponentialDecayLearningRate.decay_steps', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=4000000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='decay_factor', full_name='object_detection.protos.ExponentialDecayLearningRate.decay_factor', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.95), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='staircase', full_name='object_detection.protos.ExponentialDecayLearningRate.staircase', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1185, - serialized_end=1336, -) - - -_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE = _descriptor.Descriptor( - name='LearningRateSchedule', - full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='step', full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule.step', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='learning_rate', full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule.learning_rate', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1491, - serialized_end=1557, -) - -_MANUALSTEPLEARNINGRATE = _descriptor.Descriptor( - name='ManualStepLearningRate', - full_name='object_detection.protos.ManualStepLearningRate', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='initial_learning_rate', full_name='object_detection.protos.ManualStepLearningRate.initial_learning_rate', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='schedule', full_name='object_detection.protos.ManualStepLearningRate.schedule', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1339, - serialized_end=1557, -) - - -_COSINEDECAYLEARNINGRATE = _descriptor.Descriptor( - name='CosineDecayLearningRate', - full_name='object_detection.protos.CosineDecayLearningRate', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate_base', full_name='object_detection.protos.CosineDecayLearningRate.learning_rate_base', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='total_steps', full_name='object_detection.protos.CosineDecayLearningRate.total_steps', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=4000000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='warmup_learning_rate', full_name='object_detection.protos.CosineDecayLearningRate.warmup_learning_rate', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.0002), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='warmup_steps', full_name='object_detection.protos.CosineDecayLearningRate.warmup_steps', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=10000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1560, - serialized_end=1717, -) - -_OPTIMIZER.fields_by_name['rms_prop_optimizer'].message_type = _RMSPROPOPTIMIZER -_OPTIMIZER.fields_by_name['momentum_optimizer'].message_type = _MOMENTUMOPTIMIZER -_OPTIMIZER.fields_by_name['adam_optimizer'].message_type = _ADAMOPTIMIZER -_OPTIMIZER.oneofs_by_name['optimizer'].fields.append( - _OPTIMIZER.fields_by_name['rms_prop_optimizer']) -_OPTIMIZER.fields_by_name['rms_prop_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer'] -_OPTIMIZER.oneofs_by_name['optimizer'].fields.append( - _OPTIMIZER.fields_by_name['momentum_optimizer']) -_OPTIMIZER.fields_by_name['momentum_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer'] -_OPTIMIZER.oneofs_by_name['optimizer'].fields.append( - _OPTIMIZER.fields_by_name['adam_optimizer']) -_OPTIMIZER.fields_by_name['adam_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer'] -_RMSPROPOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE -_MOMENTUMOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE -_ADAMOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE -_LEARNINGRATE.fields_by_name['constant_learning_rate'].message_type = _CONSTANTLEARNINGRATE -_LEARNINGRATE.fields_by_name['exponential_decay_learning_rate'].message_type = _EXPONENTIALDECAYLEARNINGRATE -_LEARNINGRATE.fields_by_name['manual_step_learning_rate'].message_type = _MANUALSTEPLEARNINGRATE -_LEARNINGRATE.fields_by_name['cosine_decay_learning_rate'].message_type = _COSINEDECAYLEARNINGRATE -_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append( - _LEARNINGRATE.fields_by_name['constant_learning_rate']) -_LEARNINGRATE.fields_by_name['constant_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate'] -_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append( - _LEARNINGRATE.fields_by_name['exponential_decay_learning_rate']) -_LEARNINGRATE.fields_by_name['exponential_decay_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate'] -_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append( - _LEARNINGRATE.fields_by_name['manual_step_learning_rate']) -_LEARNINGRATE.fields_by_name['manual_step_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate'] -_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append( - _LEARNINGRATE.fields_by_name['cosine_decay_learning_rate']) -_LEARNINGRATE.fields_by_name['cosine_decay_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate'] -_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE.containing_type = _MANUALSTEPLEARNINGRATE -_MANUALSTEPLEARNINGRATE.fields_by_name['schedule'].message_type = _MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE -DESCRIPTOR.message_types_by_name['Optimizer'] = _OPTIMIZER -DESCRIPTOR.message_types_by_name['RMSPropOptimizer'] = _RMSPROPOPTIMIZER -DESCRIPTOR.message_types_by_name['MomentumOptimizer'] = _MOMENTUMOPTIMIZER -DESCRIPTOR.message_types_by_name['AdamOptimizer'] = _ADAMOPTIMIZER -DESCRIPTOR.message_types_by_name['LearningRate'] = _LEARNINGRATE -DESCRIPTOR.message_types_by_name['ConstantLearningRate'] = _CONSTANTLEARNINGRATE -DESCRIPTOR.message_types_by_name['ExponentialDecayLearningRate'] = _EXPONENTIALDECAYLEARNINGRATE -DESCRIPTOR.message_types_by_name['ManualStepLearningRate'] = _MANUALSTEPLEARNINGRATE -DESCRIPTOR.message_types_by_name['CosineDecayLearningRate'] = _COSINEDECAYLEARNINGRATE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Optimizer = _reflection.GeneratedProtocolMessageType('Optimizer', (_message.Message,), dict( - DESCRIPTOR = _OPTIMIZER, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Optimizer) - )) -_sym_db.RegisterMessage(Optimizer) - -RMSPropOptimizer = _reflection.GeneratedProtocolMessageType('RMSPropOptimizer', (_message.Message,), dict( - DESCRIPTOR = _RMSPROPOPTIMIZER, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RMSPropOptimizer) - )) -_sym_db.RegisterMessage(RMSPropOptimizer) - -MomentumOptimizer = _reflection.GeneratedProtocolMessageType('MomentumOptimizer', (_message.Message,), dict( - DESCRIPTOR = _MOMENTUMOPTIMIZER, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.MomentumOptimizer) - )) -_sym_db.RegisterMessage(MomentumOptimizer) - -AdamOptimizer = _reflection.GeneratedProtocolMessageType('AdamOptimizer', (_message.Message,), dict( - DESCRIPTOR = _ADAMOPTIMIZER, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.AdamOptimizer) - )) -_sym_db.RegisterMessage(AdamOptimizer) - -LearningRate = _reflection.GeneratedProtocolMessageType('LearningRate', (_message.Message,), dict( - DESCRIPTOR = _LEARNINGRATE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.LearningRate) - )) -_sym_db.RegisterMessage(LearningRate) - -ConstantLearningRate = _reflection.GeneratedProtocolMessageType('ConstantLearningRate', (_message.Message,), dict( - DESCRIPTOR = _CONSTANTLEARNINGRATE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ConstantLearningRate) - )) -_sym_db.RegisterMessage(ConstantLearningRate) - -ExponentialDecayLearningRate = _reflection.GeneratedProtocolMessageType('ExponentialDecayLearningRate', (_message.Message,), dict( - DESCRIPTOR = _EXPONENTIALDECAYLEARNINGRATE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ExponentialDecayLearningRate) - )) -_sym_db.RegisterMessage(ExponentialDecayLearningRate) - -ManualStepLearningRate = _reflection.GeneratedProtocolMessageType('ManualStepLearningRate', (_message.Message,), dict( - - LearningRateSchedule = _reflection.GeneratedProtocolMessageType('LearningRateSchedule', (_message.Message,), dict( - DESCRIPTOR = _MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ManualStepLearningRate.LearningRateSchedule) - )) - , - DESCRIPTOR = _MANUALSTEPLEARNINGRATE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ManualStepLearningRate) - )) -_sym_db.RegisterMessage(ManualStepLearningRate) -_sym_db.RegisterMessage(ManualStepLearningRate.LearningRateSchedule) - -CosineDecayLearningRate = _reflection.GeneratedProtocolMessageType('CosineDecayLearningRate', (_message.Message,), dict( - DESCRIPTOR = _COSINEDECAYLEARNINGRATE, - __module__ = 'object_detection.protos.optimizer_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.CosineDecayLearningRate) - )) -_sym_db.RegisterMessage(CosineDecayLearningRate) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/pipeline.proto b/object_detection/protos/pipeline.proto deleted file mode 100644 index 67f4e544..00000000 --- a/object_detection/protos/pipeline.proto +++ /dev/null @@ -1,18 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/eval.proto"; -import "object_detection/protos/input_reader.proto"; -import "object_detection/protos/model.proto"; -import "object_detection/protos/train.proto"; - -// Convenience message for configuring a training and eval pipeline. Allows all -// of the pipeline parameters to be configured from one file. -message TrainEvalPipelineConfig { - optional DetectionModel model = 1; - optional TrainConfig train_config = 2; - optional InputReader train_input_reader = 3; - optional EvalConfig eval_config = 4; - optional InputReader eval_input_reader = 5; -} diff --git a/object_detection/protos/pipeline_pb2.py b/object_detection/protos/pipeline_pb2.py deleted file mode 100644 index d9a7073f..00000000 --- a/object_detection/protos/pipeline_pb2.py +++ /dev/null @@ -1,107 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/pipeline.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import eval_pb2 as object__detection_dot_protos_dot_eval__pb2 -from object_detection.protos import input_reader_pb2 as object__detection_dot_protos_dot_input__reader__pb2 -from object_detection.protos import model_pb2 as object__detection_dot_protos_dot_model__pb2 -from object_detection.protos import train_pb2 as object__detection_dot_protos_dot_train__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/pipeline.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n&object_detection/protos/pipeline.proto\x12\x17object_detection.protos\x1a\"object_detection/protos/eval.proto\x1a*object_detection/protos/input_reader.proto\x1a#object_detection/protos/model.proto\x1a#object_detection/protos/train.proto\"\xca\x02\n\x17TrainEvalPipelineConfig\x12\x36\n\x05model\x18\x01 \x01(\x0b\x32\'.object_detection.protos.DetectionModel\x12:\n\x0ctrain_config\x18\x02 \x01(\x0b\x32$.object_detection.protos.TrainConfig\x12@\n\x12train_input_reader\x18\x03 \x01(\x0b\x32$.object_detection.protos.InputReader\x12\x38\n\x0b\x65val_config\x18\x04 \x01(\x0b\x32#.object_detection.protos.EvalConfig\x12?\n\x11\x65val_input_reader\x18\x05 \x01(\x0b\x32$.object_detection.protos.InputReader') - , - dependencies=[object__detection_dot_protos_dot_eval__pb2.DESCRIPTOR,object__detection_dot_protos_dot_input__reader__pb2.DESCRIPTOR,object__detection_dot_protos_dot_model__pb2.DESCRIPTOR,object__detection_dot_protos_dot_train__pb2.DESCRIPTOR,]) - - - - -_TRAINEVALPIPELINECONFIG = _descriptor.Descriptor( - name='TrainEvalPipelineConfig', - full_name='object_detection.protos.TrainEvalPipelineConfig', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='model', full_name='object_detection.protos.TrainEvalPipelineConfig.model', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='train_config', full_name='object_detection.protos.TrainEvalPipelineConfig.train_config', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='train_input_reader', full_name='object_detection.protos.TrainEvalPipelineConfig.train_input_reader', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='eval_config', full_name='object_detection.protos.TrainEvalPipelineConfig.eval_config', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='eval_input_reader', full_name='object_detection.protos.TrainEvalPipelineConfig.eval_input_reader', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=222, - serialized_end=552, -) - -_TRAINEVALPIPELINECONFIG.fields_by_name['model'].message_type = object__detection_dot_protos_dot_model__pb2._DETECTIONMODEL -_TRAINEVALPIPELINECONFIG.fields_by_name['train_config'].message_type = object__detection_dot_protos_dot_train__pb2._TRAINCONFIG -_TRAINEVALPIPELINECONFIG.fields_by_name['train_input_reader'].message_type = object__detection_dot_protos_dot_input__reader__pb2._INPUTREADER -_TRAINEVALPIPELINECONFIG.fields_by_name['eval_config'].message_type = object__detection_dot_protos_dot_eval__pb2._EVALCONFIG -_TRAINEVALPIPELINECONFIG.fields_by_name['eval_input_reader'].message_type = object__detection_dot_protos_dot_input__reader__pb2._INPUTREADER -DESCRIPTOR.message_types_by_name['TrainEvalPipelineConfig'] = _TRAINEVALPIPELINECONFIG -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -TrainEvalPipelineConfig = _reflection.GeneratedProtocolMessageType('TrainEvalPipelineConfig', (_message.Message,), dict( - DESCRIPTOR = _TRAINEVALPIPELINECONFIG, - __module__ = 'object_detection.protos.pipeline_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.TrainEvalPipelineConfig) - )) -_sym_db.RegisterMessage(TrainEvalPipelineConfig) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/post_processing.proto b/object_detection/protos/post_processing.proto deleted file mode 100644 index bbd69cde..00000000 --- a/object_detection/protos/post_processing.proto +++ /dev/null @@ -1,46 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for non-max-suppression operation on a batch of -// detections. -message BatchNonMaxSuppression { - // Scalar threshold for score (low scoring boxes are removed). - optional float score_threshold = 1 [default = 0.0]; - - // Scalar threshold for IOU (boxes that have high IOU overlap - // with previously selected boxes are removed). - optional float iou_threshold = 2 [default = 0.6]; - - // Maximum number of detections to retain per class. - optional int32 max_detections_per_class = 3 [default = 100]; - - // Maximum number of detections to retain across all classes. - optional int32 max_total_detections = 5 [default = 100]; -} - -// Configuration proto for post-processing predicted boxes and -// scores. -message PostProcessing { - // Non max suppression parameters. - optional BatchNonMaxSuppression batch_non_max_suppression = 1; - - // Enum to specify how to convert the detection scores. - enum ScoreConverter { - // Input scores equals output scores. - IDENTITY = 0; - - // Applies a sigmoid on input scores. - SIGMOID = 1; - - // Applies a softmax on input scores - SOFTMAX = 2; - } - - // Score converter to use. - optional ScoreConverter score_converter = 2 [default = IDENTITY]; - // Scale logit (input) value before conversion in post-processing step. - // Typically used for softmax distillation, though can be used to scale for - // other reasons. - optional float logit_scale = 3 [default = 1.0]; -} diff --git a/object_detection/protos/post_processing_pb2.py b/object_detection/protos/post_processing_pb2.py deleted file mode 100644 index 4001956a..00000000 --- a/object_detection/protos/post_processing_pb2.py +++ /dev/null @@ -1,173 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/post_processing.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/post_processing.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n-object_detection/protos/post_processing.proto\x12\x17object_detection.protos\"\x9a\x01\n\x16\x42\x61tchNonMaxSuppression\x12\x1a\n\x0fscore_threshold\x18\x01 \x01(\x02:\x01\x30\x12\x1a\n\riou_threshold\x18\x02 \x01(\x02:\x03\x30.6\x12%\n\x18max_detections_per_class\x18\x03 \x01(\x05:\x03\x31\x30\x30\x12!\n\x14max_total_detections\x18\x05 \x01(\x05:\x03\x31\x30\x30\"\x91\x02\n\x0ePostProcessing\x12R\n\x19\x62\x61tch_non_max_suppression\x18\x01 \x01(\x0b\x32/.object_detection.protos.BatchNonMaxSuppression\x12Y\n\x0fscore_converter\x18\x02 \x01(\x0e\x32\x36.object_detection.protos.PostProcessing.ScoreConverter:\x08IDENTITY\x12\x16\n\x0blogit_scale\x18\x03 \x01(\x02:\x01\x31\"8\n\x0eScoreConverter\x12\x0c\n\x08IDENTITY\x10\x00\x12\x0b\n\x07SIGMOID\x10\x01\x12\x0b\n\x07SOFTMAX\x10\x02') -) - - - -_POSTPROCESSING_SCORECONVERTER = _descriptor.EnumDescriptor( - name='ScoreConverter', - full_name='object_detection.protos.PostProcessing.ScoreConverter', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='IDENTITY', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SIGMOID', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SOFTMAX', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=449, - serialized_end=505, -) -_sym_db.RegisterEnumDescriptor(_POSTPROCESSING_SCORECONVERTER) - - -_BATCHNONMAXSUPPRESSION = _descriptor.Descriptor( - name='BatchNonMaxSuppression', - full_name='object_detection.protos.BatchNonMaxSuppression', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='score_threshold', full_name='object_detection.protos.BatchNonMaxSuppression.score_threshold', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='iou_threshold', full_name='object_detection.protos.BatchNonMaxSuppression.iou_threshold', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.6), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_detections_per_class', full_name='object_detection.protos.BatchNonMaxSuppression.max_detections_per_class', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=100, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_total_detections', full_name='object_detection.protos.BatchNonMaxSuppression.max_total_detections', index=3, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=100, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=75, - serialized_end=229, -) - - -_POSTPROCESSING = _descriptor.Descriptor( - name='PostProcessing', - full_name='object_detection.protos.PostProcessing', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='batch_non_max_suppression', full_name='object_detection.protos.PostProcessing.batch_non_max_suppression', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='score_converter', full_name='object_detection.protos.PostProcessing.score_converter', index=1, - number=2, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='logit_scale', full_name='object_detection.protos.PostProcessing.logit_scale', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _POSTPROCESSING_SCORECONVERTER, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=232, - serialized_end=505, -) - -_POSTPROCESSING.fields_by_name['batch_non_max_suppression'].message_type = _BATCHNONMAXSUPPRESSION -_POSTPROCESSING.fields_by_name['score_converter'].enum_type = _POSTPROCESSING_SCORECONVERTER -_POSTPROCESSING_SCORECONVERTER.containing_type = _POSTPROCESSING -DESCRIPTOR.message_types_by_name['BatchNonMaxSuppression'] = _BATCHNONMAXSUPPRESSION -DESCRIPTOR.message_types_by_name['PostProcessing'] = _POSTPROCESSING -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -BatchNonMaxSuppression = _reflection.GeneratedProtocolMessageType('BatchNonMaxSuppression', (_message.Message,), dict( - DESCRIPTOR = _BATCHNONMAXSUPPRESSION, - __module__ = 'object_detection.protos.post_processing_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.BatchNonMaxSuppression) - )) -_sym_db.RegisterMessage(BatchNonMaxSuppression) - -PostProcessing = _reflection.GeneratedProtocolMessageType('PostProcessing', (_message.Message,), dict( - DESCRIPTOR = _POSTPROCESSING, - __module__ = 'object_detection.protos.post_processing_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.PostProcessing) - )) -_sym_db.RegisterMessage(PostProcessing) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/preprocessor.proto b/object_detection/protos/preprocessor.proto deleted file mode 100644 index fcfb450a..00000000 --- a/object_detection/protos/preprocessor.proto +++ /dev/null @@ -1,405 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Message for defining a preprocessing operation on input data. -// See: //object_detection/core/preprocessor.py -message PreprocessingStep { - oneof preprocessing_step { - NormalizeImage normalize_image = 1; - RandomHorizontalFlip random_horizontal_flip = 2; - RandomPixelValueScale random_pixel_value_scale = 3; - RandomImageScale random_image_scale = 4; - RandomRGBtoGray random_rgb_to_gray = 5; - RandomAdjustBrightness random_adjust_brightness = 6; - RandomAdjustContrast random_adjust_contrast = 7; - RandomAdjustHue random_adjust_hue = 8; - RandomAdjustSaturation random_adjust_saturation = 9; - RandomDistortColor random_distort_color = 10; - RandomJitterBoxes random_jitter_boxes = 11; - RandomCropImage random_crop_image = 12; - RandomPadImage random_pad_image = 13; - RandomCropPadImage random_crop_pad_image = 14; - RandomCropToAspectRatio random_crop_to_aspect_ratio = 15; - RandomBlackPatches random_black_patches = 16; - RandomResizeMethod random_resize_method = 17; - ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18; - ResizeImage resize_image = 19; - SubtractChannelMean subtract_channel_mean = 20; - SSDRandomCrop ssd_random_crop = 21; - SSDRandomCropPad ssd_random_crop_pad = 22; - SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23; - SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24; - RandomVerticalFlip random_vertical_flip = 25; - RandomRotation90 random_rotation90 = 26; - } -} - -// Normalizes pixel values in an image. -// For every channel in the image, moves the pixel values from the range -// [original_minval, original_maxval] to [target_minval, target_maxval]. -message NormalizeImage { - optional float original_minval = 1; - optional float original_maxval = 2; - optional float target_minval = 3 [default=0]; - optional float target_maxval = 4 [default=1]; -} - -// Randomly horizontally flips the image and detections 50% of the time. -message RandomHorizontalFlip { - // Specifies a mapping from the original keypoint indices to horizontally - // flipped indices. This is used in the event that keypoints are specified, - // in which case when the image is horizontally flipped the keypoints will - // need to be permuted. E.g. for keypoints representing left_eye, right_eye, - // nose_tip, mouth, left_ear, right_ear (in that order), one might specify - // the keypoint_flip_permutation below: - // keypoint_flip_permutation: 1 - // keypoint_flip_permutation: 0 - // keypoint_flip_permutation: 2 - // keypoint_flip_permutation: 3 - // keypoint_flip_permutation: 5 - // keypoint_flip_permutation: 4 - repeated int32 keypoint_flip_permutation = 1; -} - -// Randomly vertically flips the image and detections 50% of the time. -message RandomVerticalFlip { - // Specifies a mapping from the original keypoint indices to vertically - // flipped indices. This is used in the event that keypoints are specified, - // in which case when the image is vertically flipped the keypoints will - // need to be permuted. E.g. for keypoints representing left_eye, right_eye, - // nose_tip, mouth, left_ear, right_ear (in that order), one might specify - // the keypoint_flip_permutation below: - // keypoint_flip_permutation: 1 - // keypoint_flip_permutation: 0 - // keypoint_flip_permutation: 2 - // keypoint_flip_permutation: 3 - // keypoint_flip_permutation: 5 - // keypoint_flip_permutation: 4 - repeated int32 keypoint_flip_permutation = 1; -} - -// Randomly rotates the image and detections by 90 degrees counter-clockwise -// 50% of the time. -message RandomRotation90 {} - -// Randomly scales the values of all pixels in the image by some constant value -// between [minval, maxval], then clip the value to a range between [0, 1.0]. -message RandomPixelValueScale { - optional float minval = 1 [default=0.9]; - optional float maxval = 2 [default=1.1]; -} - -// Randomly enlarges or shrinks image (keeping aspect ratio). -message RandomImageScale { - optional float min_scale_ratio = 1 [default=0.5]; - optional float max_scale_ratio = 2 [default=2.0]; -} - -// Randomly convert entire image to grey scale. -message RandomRGBtoGray { - optional float probability = 1 [default=0.1]; -} - -// Randomly changes image brightness by up to max_delta. Image outputs will be -// saturated between 0 and 1. -message RandomAdjustBrightness { - optional float max_delta=1 [default=0.2]; -} - -// Randomly scales contract by a value between [min_delta, max_delta]. -message RandomAdjustContrast { - optional float min_delta = 1 [default=0.8]; - optional float max_delta = 2 [default=1.25]; -} - -// Randomly alters hue by a value of up to max_delta. -message RandomAdjustHue { - optional float max_delta = 1 [default=0.02]; -} - -// Randomly changes saturation by a value between [min_delta, max_delta]. -message RandomAdjustSaturation { - optional float min_delta = 1 [default=0.8]; - optional float max_delta = 2 [default=1.25]; -} - -// Performs a random color distortion. color_orderings should either be 0 or 1. -message RandomDistortColor { - optional int32 color_ordering = 1; -} - -// Randomly jitters corners of boxes in the image determined by ratio. -// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4]. -message RandomJitterBoxes { - optional float ratio = 1 [default=0.05]; -} - -// Randomly crops the image and bounding boxes. -message RandomCropImage { - // Cropped image must cover at least one box by this fraction. - optional float min_object_covered = 1 [default=1.0]; - - // Aspect ratio bounds of cropped image. - optional float min_aspect_ratio = 2 [default=0.75]; - optional float max_aspect_ratio = 3 [default=1.33]; - - // Allowed area ratio of cropped image to original image. - optional float min_area = 4 [default=0.1]; - optional float max_area = 5 [default=1.0]; - - // Minimum overlap threshold of cropped boxes to keep in new image. If the - // ratio between a cropped bounding box and the original is less than this - // value, it is removed from the new image. - optional float overlap_thresh = 6 [default=0.3]; - - // Probability of keeping the original image. - optional float random_coef = 7 [default=0.0]; -} - -// Randomly adds padding to the image. -message RandomPadImage { - // Minimum dimensions for padded image. If unset, will use original image - // dimension as a lower bound. - optional float min_image_height = 1; - optional float min_image_width = 2; - - // Maximum dimensions for padded image. If unset, will use double the original - // image dimension as a lower bound. - optional float max_image_height = 3; - optional float max_image_width = 4; - - // Color of the padding. If unset, will pad using average color of the input - // image. - repeated float pad_color = 5; -} - -// Randomly crops an image followed by a random pad. -message RandomCropPadImage { - // Cropping operation must cover at least one box by this fraction. - optional float min_object_covered = 1 [default=1.0]; - - // Aspect ratio bounds of image after cropping operation. - optional float min_aspect_ratio = 2 [default=0.75]; - optional float max_aspect_ratio = 3 [default=1.33]; - - // Allowed area ratio of image after cropping operation. - optional float min_area = 4 [default=0.1]; - optional float max_area = 5 [default=1.0]; - - // Minimum overlap threshold of cropped boxes to keep in new image. If the - // ratio between a cropped bounding box and the original is less than this - // value, it is removed from the new image. - optional float overlap_thresh = 6 [default=0.3]; - - // Probability of keeping the original image during the crop operation. - optional float random_coef = 7 [default=0.0]; - - // Maximum dimensions for padded image. If unset, will use double the original - // image dimension as a lower bound. Both of the following fields should be - // length 2. - repeated float min_padded_size_ratio = 8; - repeated float max_padded_size_ratio = 9; - - // Color of the padding. If unset, will pad using average color of the input - // image. - repeated float pad_color = 10; -} - -// Randomly crops an iamge to a given aspect ratio. -message RandomCropToAspectRatio { - // Aspect ratio. - optional float aspect_ratio = 1 [default=1.0]; - - // Minimum overlap threshold of cropped boxes to keep in new image. If the - // ratio between a cropped bounding box and the original is less than this - // value, it is removed from the new image. - optional float overlap_thresh = 2 [default=0.3]; -} - -// Randomly adds black square patches to an image. -message RandomBlackPatches { - // The maximum number of black patches to add. - optional int32 max_black_patches = 1 [default=10]; - - // The probability of a black patch being added to an image. - optional float probability = 2 [default=0.5]; - - // Ratio between the dimension of the black patch to the minimum dimension of - // the image (patch_width = patch_height = min(image_height, image_width)). - optional float size_to_image_ratio = 3 [default=0.1]; -} - -// Randomly resizes the image up to [target_height, target_width]. -message RandomResizeMethod { - optional float target_height = 1; - optional float target_width = 2; -} - -// Scales boxes from normalized coordinates to pixel coordinates. -message ScaleBoxesToPixelCoordinates { -} - -// Resizes images to [new_height, new_width]. -message ResizeImage { - optional int32 new_height = 1; - optional int32 new_width = 2; - enum Method { - AREA=1; - BICUBIC=2; - BILINEAR=3; - NEAREST_NEIGHBOR=4; - } - optional Method method = 3 [default=BILINEAR]; -} - -// Normalizes an image by subtracting a mean from each channel. -message SubtractChannelMean { - // The mean to subtract from each channel. Should be of same dimension of - // channels in the input image. - repeated float means = 1; -} - -message SSDRandomCropOperation { - // Cropped image must cover at least this fraction of one original bounding - // box. - optional float min_object_covered = 1; - - // The aspect ratio of the cropped image must be within the range of - // [min_aspect_ratio, max_aspect_ratio]. - optional float min_aspect_ratio = 2; - optional float max_aspect_ratio = 3; - - // The area of the cropped image must be within the range of - // [min_area, max_area]. - optional float min_area = 4; - optional float max_area = 5; - - // Cropped box area ratio must be above this threhold to be kept. - optional float overlap_thresh = 6; - - // Probability a crop operation is skipped. - optional float random_coef = 7; -} - -// Randomly crops a image according to: -// Liu et al., SSD: Single shot multibox detector. -// This preprocessing step defines multiple SSDRandomCropOperations. Only one -// operation (chosen at random) is actually performed on an image. -message SSDRandomCrop { - repeated SSDRandomCropOperation operations = 1; -} - -message SSDRandomCropPadOperation { - // Cropped image must cover at least this fraction of one original bounding - // box. - optional float min_object_covered = 1; - - // The aspect ratio of the cropped image must be within the range of - // [min_aspect_ratio, max_aspect_ratio]. - optional float min_aspect_ratio = 2; - optional float max_aspect_ratio = 3; - - // The area of the cropped image must be within the range of - // [min_area, max_area]. - optional float min_area = 4; - optional float max_area = 5; - - // Cropped box area ratio must be above this threhold to be kept. - optional float overlap_thresh = 6; - - // Probability a crop operation is skipped. - optional float random_coef = 7; - - // Min ratio of padded image height and width to the input image's height and - // width. Two entries per operation. - repeated float min_padded_size_ratio = 8; - - // Max ratio of padded image height and width to the input image's height and - // width. Two entries per operation. - repeated float max_padded_size_ratio = 9; - - // Padding color. - optional float pad_color_r = 10; - optional float pad_color_g = 11; - optional float pad_color_b = 12; -} - -// Randomly crops and pads an image according to: -// Liu et al., SSD: Single shot multibox detector. -// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one -// operation (chosen at random) is actually performed on an image. -message SSDRandomCropPad { - repeated SSDRandomCropPadOperation operations = 1; -} - -message SSDRandomCropFixedAspectRatioOperation { - // Cropped image must cover at least this fraction of one original bounding - // box. - optional float min_object_covered = 1; - - // The area of the cropped image must be within the range of - // [min_area, max_area]. - optional float min_area = 4; - optional float max_area = 5; - - // Cropped box area ratio must be above this threhold to be kept. - optional float overlap_thresh = 6; - - // Probability a crop operation is skipped. - optional float random_coef = 7; -} - -// Randomly crops a image to a fixed aspect ratio according to: -// Liu et al., SSD: Single shot multibox detector. -// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this -// preprocessing step. Only one operation (chosen at random) is actually -// performed on an image. -message SSDRandomCropFixedAspectRatio { - repeated SSDRandomCropFixedAspectRatioOperation operations = 1; - - // Aspect ratio to crop to. This value is used for all crop operations. - optional float aspect_ratio = 2 [default=1.0]; -} - -message SSDRandomCropPadFixedAspectRatioOperation { - // Cropped image must cover at least this fraction of one original bounding - // box. - optional float min_object_covered = 1; - - // The aspect ratio of the cropped image must be within the range of - // [min_aspect_ratio, max_aspect_ratio]. - optional float min_aspect_ratio = 2; - optional float max_aspect_ratio = 3; - - // The area of the cropped image must be within the range of - // [min_area, max_area]. - optional float min_area = 4; - optional float max_area = 5; - - // Cropped box area ratio must be above this threhold to be kept. - optional float overlap_thresh = 6; - - // Probability a crop operation is skipped. - optional float random_coef = 7; - - // Min ratio of padded image height and width to the input image's height and - // width. Two entries per operation. - repeated float min_padded_size_ratio = 8; - - // Max ratio of padded image height and width to the input image's height and - // width. Two entries per operation. - repeated float max_padded_size_ratio = 9; -} - -// Randomly crops and pads an image to a fixed aspect ratio according to: -// Liu et al., SSD: Single shot multibox detector. -// Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this -// preprocessing step. Only one operation (chosen at random) is actually -// performed on an image. -message SSDRandomCropPadFixedAspectRatio { - repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1; - - // Aspect ratio to pad to. This value is used for all crop and pad operations. - optional float aspect_ratio = 2 [default=1.0]; -} diff --git a/object_detection/protos/preprocessor_pb2.py b/object_detection/protos/preprocessor_pb2.py deleted file mode 100644 index 40a0d576..00000000 --- a/object_detection/protos/preprocessor_pb2.py +++ /dev/null @@ -1,1985 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/preprocessor.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/preprocessor.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n*object_detection/protos/preprocessor.proto\x12\x17object_detection.protos\"\xaf\x10\n\x11PreprocessingStep\x12\x42\n\x0fnormalize_image\x18\x01 \x01(\x0b\x32\'.object_detection.protos.NormalizeImageH\x00\x12O\n\x16random_horizontal_flip\x18\x02 \x01(\x0b\x32-.object_detection.protos.RandomHorizontalFlipH\x00\x12R\n\x18random_pixel_value_scale\x18\x03 \x01(\x0b\x32..object_detection.protos.RandomPixelValueScaleH\x00\x12G\n\x12random_image_scale\x18\x04 \x01(\x0b\x32).object_detection.protos.RandomImageScaleH\x00\x12\x46\n\x12random_rgb_to_gray\x18\x05 \x01(\x0b\x32(.object_detection.protos.RandomRGBtoGrayH\x00\x12S\n\x18random_adjust_brightness\x18\x06 \x01(\x0b\x32/.object_detection.protos.RandomAdjustBrightnessH\x00\x12O\n\x16random_adjust_contrast\x18\x07 \x01(\x0b\x32-.object_detection.protos.RandomAdjustContrastH\x00\x12\x45\n\x11random_adjust_hue\x18\x08 \x01(\x0b\x32(.object_detection.protos.RandomAdjustHueH\x00\x12S\n\x18random_adjust_saturation\x18\t \x01(\x0b\x32/.object_detection.protos.RandomAdjustSaturationH\x00\x12K\n\x14random_distort_color\x18\n \x01(\x0b\x32+.object_detection.protos.RandomDistortColorH\x00\x12I\n\x13random_jitter_boxes\x18\x0b \x01(\x0b\x32*.object_detection.protos.RandomJitterBoxesH\x00\x12\x45\n\x11random_crop_image\x18\x0c \x01(\x0b\x32(.object_detection.protos.RandomCropImageH\x00\x12\x43\n\x10random_pad_image\x18\r \x01(\x0b\x32\'.object_detection.protos.RandomPadImageH\x00\x12L\n\x15random_crop_pad_image\x18\x0e \x01(\x0b\x32+.object_detection.protos.RandomCropPadImageH\x00\x12W\n\x1brandom_crop_to_aspect_ratio\x18\x0f \x01(\x0b\x32\x30.object_detection.protos.RandomCropToAspectRatioH\x00\x12K\n\x14random_black_patches\x18\x10 \x01(\x0b\x32+.object_detection.protos.RandomBlackPatchesH\x00\x12K\n\x14random_resize_method\x18\x11 \x01(\x0b\x32+.object_detection.protos.RandomResizeMethodH\x00\x12\x61\n scale_boxes_to_pixel_coordinates\x18\x12 \x01(\x0b\x32\x35.object_detection.protos.ScaleBoxesToPixelCoordinatesH\x00\x12<\n\x0cresize_image\x18\x13 \x01(\x0b\x32$.object_detection.protos.ResizeImageH\x00\x12M\n\x15subtract_channel_mean\x18\x14 \x01(\x0b\x32,.object_detection.protos.SubtractChannelMeanH\x00\x12\x41\n\x0fssd_random_crop\x18\x15 \x01(\x0b\x32&.object_detection.protos.SSDRandomCropH\x00\x12H\n\x13ssd_random_crop_pad\x18\x16 \x01(\x0b\x32).object_detection.protos.SSDRandomCropPadH\x00\x12\x64\n\"ssd_random_crop_fixed_aspect_ratio\x18\x17 \x01(\x0b\x32\x36.object_detection.protos.SSDRandomCropFixedAspectRatioH\x00\x12k\n&ssd_random_crop_pad_fixed_aspect_ratio\x18\x18 \x01(\x0b\x32\x39.object_detection.protos.SSDRandomCropPadFixedAspectRatioH\x00\x12K\n\x14random_vertical_flip\x18\x19 \x01(\x0b\x32+.object_detection.protos.RandomVerticalFlipH\x00\x12\x46\n\x11random_rotation90\x18\x1a \x01(\x0b\x32).object_detection.protos.RandomRotation90H\x00\x42\x14\n\x12preprocessing_step\"v\n\x0eNormalizeImage\x12\x17\n\x0foriginal_minval\x18\x01 \x01(\x02\x12\x17\n\x0foriginal_maxval\x18\x02 \x01(\x02\x12\x18\n\rtarget_minval\x18\x03 \x01(\x02:\x01\x30\x12\x18\n\rtarget_maxval\x18\x04 \x01(\x02:\x01\x31\"9\n\x14RandomHorizontalFlip\x12!\n\x19keypoint_flip_permutation\x18\x01 \x03(\x05\"7\n\x12RandomVerticalFlip\x12!\n\x19keypoint_flip_permutation\x18\x01 \x03(\x05\"\x12\n\x10RandomRotation90\"A\n\x15RandomPixelValueScale\x12\x13\n\x06minval\x18\x01 \x01(\x02:\x03\x30.9\x12\x13\n\x06maxval\x18\x02 \x01(\x02:\x03\x31.1\"L\n\x10RandomImageScale\x12\x1c\n\x0fmin_scale_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x1a\n\x0fmax_scale_ratio\x18\x02 \x01(\x02:\x01\x32\"+\n\x0fRandomRGBtoGray\x12\x18\n\x0bprobability\x18\x01 \x01(\x02:\x03\x30.1\"0\n\x16RandomAdjustBrightness\x12\x16\n\tmax_delta\x18\x01 \x01(\x02:\x03\x30.2\"G\n\x14RandomAdjustContrast\x12\x16\n\tmin_delta\x18\x01 \x01(\x02:\x03\x30.8\x12\x17\n\tmax_delta\x18\x02 \x01(\x02:\x04\x31.25\"*\n\x0fRandomAdjustHue\x12\x17\n\tmax_delta\x18\x01 \x01(\x02:\x04\x30.02\"I\n\x16RandomAdjustSaturation\x12\x16\n\tmin_delta\x18\x01 \x01(\x02:\x03\x30.8\x12\x17\n\tmax_delta\x18\x02 \x01(\x02:\x04\x31.25\",\n\x12RandomDistortColor\x12\x16\n\x0e\x63olor_ordering\x18\x01 \x01(\x05\"(\n\x11RandomJitterBoxes\x12\x13\n\x05ratio\x18\x01 \x01(\x02:\x04\x30.05\"\xd1\x01\n\x0fRandomCropImage\x12\x1d\n\x12min_object_covered\x18\x01 \x01(\x02:\x01\x31\x12\x1e\n\x10min_aspect_ratio\x18\x02 \x01(\x02:\x04\x30.75\x12\x1e\n\x10max_aspect_ratio\x18\x03 \x01(\x02:\x04\x31.33\x12\x15\n\x08min_area\x18\x04 \x01(\x02:\x03\x30.1\x12\x13\n\x08max_area\x18\x05 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x06 \x01(\x02:\x03\x30.3\x12\x16\n\x0brandom_coef\x18\x07 \x01(\x02:\x01\x30\"\x89\x01\n\x0eRandomPadImage\x12\x18\n\x10min_image_height\x18\x01 \x01(\x02\x12\x17\n\x0fmin_image_width\x18\x02 \x01(\x02\x12\x18\n\x10max_image_height\x18\x03 \x01(\x02\x12\x17\n\x0fmax_image_width\x18\x04 \x01(\x02\x12\x11\n\tpad_color\x18\x05 \x03(\x02\"\xa5\x02\n\x12RandomCropPadImage\x12\x1d\n\x12min_object_covered\x18\x01 \x01(\x02:\x01\x31\x12\x1e\n\x10min_aspect_ratio\x18\x02 \x01(\x02:\x04\x30.75\x12\x1e\n\x10max_aspect_ratio\x18\x03 \x01(\x02:\x04\x31.33\x12\x15\n\x08min_area\x18\x04 \x01(\x02:\x03\x30.1\x12\x13\n\x08max_area\x18\x05 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x06 \x01(\x02:\x03\x30.3\x12\x16\n\x0brandom_coef\x18\x07 \x01(\x02:\x01\x30\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\x12\x11\n\tpad_color\x18\n \x03(\x02\"O\n\x17RandomCropToAspectRatio\x12\x17\n\x0c\x61spect_ratio\x18\x01 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x02 \x01(\x02:\x03\x30.3\"o\n\x12RandomBlackPatches\x12\x1d\n\x11max_black_patches\x18\x01 \x01(\x05:\x02\x31\x30\x12\x18\n\x0bprobability\x18\x02 \x01(\x02:\x03\x30.5\x12 \n\x13size_to_image_ratio\x18\x03 \x01(\x02:\x03\x30.1\"A\n\x12RandomResizeMethod\x12\x15\n\rtarget_height\x18\x01 \x01(\x02\x12\x14\n\x0ctarget_width\x18\x02 \x01(\x02\"\x1e\n\x1cScaleBoxesToPixelCoordinates\"\xc0\x01\n\x0bResizeImage\x12\x12\n\nnew_height\x18\x01 \x01(\x05\x12\x11\n\tnew_width\x18\x02 \x01(\x05\x12\x45\n\x06method\x18\x03 \x01(\x0e\x32+.object_detection.protos.ResizeImage.Method:\x08\x42ILINEAR\"C\n\x06Method\x12\x08\n\x04\x41REA\x10\x01\x12\x0b\n\x07\x42ICUBIC\x10\x02\x12\x0c\n\x08\x42ILINEAR\x10\x03\x12\x14\n\x10NEAREST_NEIGHBOR\x10\x04\"$\n\x13SubtractChannelMean\x12\r\n\x05means\x18\x01 \x03(\x02\"\xb9\x01\n\x16SSDRandomCropOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\"T\n\rSSDRandomCrop\x12\x43\n\noperations\x18\x01 \x03(\x0b\x32/.object_detection.protos.SSDRandomCropOperation\"\xb9\x02\n\x19SSDRandomCropPadOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\x12\x13\n\x0bpad_color_r\x18\n \x01(\x02\x12\x13\n\x0bpad_color_g\x18\x0b \x01(\x02\x12\x13\n\x0bpad_color_b\x18\x0c \x01(\x02\"Z\n\x10SSDRandomCropPad\x12\x46\n\noperations\x18\x01 \x03(\x0b\x32\x32.object_detection.protos.SSDRandomCropPadOperation\"\x95\x01\n&SSDRandomCropFixedAspectRatioOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\"\x8d\x01\n\x1dSSDRandomCropFixedAspectRatio\x12S\n\noperations\x18\x01 \x03(\x0b\x32?.object_detection.protos.SSDRandomCropFixedAspectRatioOperation\x12\x17\n\x0c\x61spect_ratio\x18\x02 \x01(\x02:\x01\x31\"\x8a\x02\n)SSDRandomCropPadFixedAspectRatioOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\"\x93\x01\n SSDRandomCropPadFixedAspectRatio\x12V\n\noperations\x18\x01 \x03(\x0b\x32\x42.object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation\x12\x17\n\x0c\x61spect_ratio\x18\x02 \x01(\x02:\x01\x31') -) - - - -_RESIZEIMAGE_METHOD = _descriptor.EnumDescriptor( - name='Method', - full_name='object_detection.protos.ResizeImage.Method', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='AREA', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BICUBIC', index=1, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BILINEAR', index=2, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='NEAREST_NEIGHBOR', index=3, number=4, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=4012, - serialized_end=4079, -) -_sym_db.RegisterEnumDescriptor(_RESIZEIMAGE_METHOD) - - -_PREPROCESSINGSTEP = _descriptor.Descriptor( - name='PreprocessingStep', - full_name='object_detection.protos.PreprocessingStep', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='normalize_image', full_name='object_detection.protos.PreprocessingStep.normalize_image', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_horizontal_flip', full_name='object_detection.protos.PreprocessingStep.random_horizontal_flip', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_pixel_value_scale', full_name='object_detection.protos.PreprocessingStep.random_pixel_value_scale', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_image_scale', full_name='object_detection.protos.PreprocessingStep.random_image_scale', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_rgb_to_gray', full_name='object_detection.protos.PreprocessingStep.random_rgb_to_gray', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_adjust_brightness', full_name='object_detection.protos.PreprocessingStep.random_adjust_brightness', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_adjust_contrast', full_name='object_detection.protos.PreprocessingStep.random_adjust_contrast', index=6, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_adjust_hue', full_name='object_detection.protos.PreprocessingStep.random_adjust_hue', index=7, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_adjust_saturation', full_name='object_detection.protos.PreprocessingStep.random_adjust_saturation', index=8, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_distort_color', full_name='object_detection.protos.PreprocessingStep.random_distort_color', index=9, - number=10, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_jitter_boxes', full_name='object_detection.protos.PreprocessingStep.random_jitter_boxes', index=10, - number=11, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_crop_image', full_name='object_detection.protos.PreprocessingStep.random_crop_image', index=11, - number=12, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_pad_image', full_name='object_detection.protos.PreprocessingStep.random_pad_image', index=12, - number=13, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_crop_pad_image', full_name='object_detection.protos.PreprocessingStep.random_crop_pad_image', index=13, - number=14, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_crop_to_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.random_crop_to_aspect_ratio', index=14, - number=15, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_black_patches', full_name='object_detection.protos.PreprocessingStep.random_black_patches', index=15, - number=16, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_resize_method', full_name='object_detection.protos.PreprocessingStep.random_resize_method', index=16, - number=17, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='scale_boxes_to_pixel_coordinates', full_name='object_detection.protos.PreprocessingStep.scale_boxes_to_pixel_coordinates', index=17, - number=18, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='resize_image', full_name='object_detection.protos.PreprocessingStep.resize_image', index=18, - number=19, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='subtract_channel_mean', full_name='object_detection.protos.PreprocessingStep.subtract_channel_mean', index=19, - number=20, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd_random_crop', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop', index=20, - number=21, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd_random_crop_pad', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_pad', index=21, - number=22, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd_random_crop_fixed_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_fixed_aspect_ratio', index=22, - number=23, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ssd_random_crop_pad_fixed_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_pad_fixed_aspect_ratio', index=23, - number=24, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_vertical_flip', full_name='object_detection.protos.PreprocessingStep.random_vertical_flip', index=24, - number=25, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_rotation90', full_name='object_detection.protos.PreprocessingStep.random_rotation90', index=25, - number=26, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='preprocessing_step', full_name='object_detection.protos.PreprocessingStep.preprocessing_step', - index=0, containing_type=None, fields=[]), - ], - serialized_start=72, - serialized_end=2167, -) - - -_NORMALIZEIMAGE = _descriptor.Descriptor( - name='NormalizeImage', - full_name='object_detection.protos.NormalizeImage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='original_minval', full_name='object_detection.protos.NormalizeImage.original_minval', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='original_maxval', full_name='object_detection.protos.NormalizeImage.original_maxval', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='target_minval', full_name='object_detection.protos.NormalizeImage.target_minval', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='target_maxval', full_name='object_detection.protos.NormalizeImage.target_maxval', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2169, - serialized_end=2287, -) - - -_RANDOMHORIZONTALFLIP = _descriptor.Descriptor( - name='RandomHorizontalFlip', - full_name='object_detection.protos.RandomHorizontalFlip', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='keypoint_flip_permutation', full_name='object_detection.protos.RandomHorizontalFlip.keypoint_flip_permutation', index=0, - number=1, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2289, - serialized_end=2346, -) - - -_RANDOMVERTICALFLIP = _descriptor.Descriptor( - name='RandomVerticalFlip', - full_name='object_detection.protos.RandomVerticalFlip', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='keypoint_flip_permutation', full_name='object_detection.protos.RandomVerticalFlip.keypoint_flip_permutation', index=0, - number=1, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2348, - serialized_end=2403, -) - - -_RANDOMROTATION90 = _descriptor.Descriptor( - name='RandomRotation90', - full_name='object_detection.protos.RandomRotation90', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2405, - serialized_end=2423, -) - - -_RANDOMPIXELVALUESCALE = _descriptor.Descriptor( - name='RandomPixelValueScale', - full_name='object_detection.protos.RandomPixelValueScale', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='minval', full_name='object_detection.protos.RandomPixelValueScale.minval', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='maxval', full_name='object_detection.protos.RandomPixelValueScale.maxval', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1.1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2425, - serialized_end=2490, -) - - -_RANDOMIMAGESCALE = _descriptor.Descriptor( - name='RandomImageScale', - full_name='object_detection.protos.RandomImageScale', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_scale_ratio', full_name='object_detection.protos.RandomImageScale.min_scale_ratio', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_scale_ratio', full_name='object_detection.protos.RandomImageScale.max_scale_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(2), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2492, - serialized_end=2568, -) - - -_RANDOMRGBTOGRAY = _descriptor.Descriptor( - name='RandomRGBtoGray', - full_name='object_detection.protos.RandomRGBtoGray', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='probability', full_name='object_detection.protos.RandomRGBtoGray.probability', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2570, - serialized_end=2613, -) - - -_RANDOMADJUSTBRIGHTNESS = _descriptor.Descriptor( - name='RandomAdjustBrightness', - full_name='object_detection.protos.RandomAdjustBrightness', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='max_delta', full_name='object_detection.protos.RandomAdjustBrightness.max_delta', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.2), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2615, - serialized_end=2663, -) - - -_RANDOMADJUSTCONTRAST = _descriptor.Descriptor( - name='RandomAdjustContrast', - full_name='object_detection.protos.RandomAdjustContrast', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_delta', full_name='object_detection.protos.RandomAdjustContrast.min_delta', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.8), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_delta', full_name='object_detection.protos.RandomAdjustContrast.max_delta', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1.25), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2665, - serialized_end=2736, -) - - -_RANDOMADJUSTHUE = _descriptor.Descriptor( - name='RandomAdjustHue', - full_name='object_detection.protos.RandomAdjustHue', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='max_delta', full_name='object_detection.protos.RandomAdjustHue.max_delta', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.02), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2738, - serialized_end=2780, -) - - -_RANDOMADJUSTSATURATION = _descriptor.Descriptor( - name='RandomAdjustSaturation', - full_name='object_detection.protos.RandomAdjustSaturation', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_delta', full_name='object_detection.protos.RandomAdjustSaturation.min_delta', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.8), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_delta', full_name='object_detection.protos.RandomAdjustSaturation.max_delta', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1.25), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2782, - serialized_end=2855, -) - - -_RANDOMDISTORTCOLOR = _descriptor.Descriptor( - name='RandomDistortColor', - full_name='object_detection.protos.RandomDistortColor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='color_ordering', full_name='object_detection.protos.RandomDistortColor.color_ordering', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2857, - serialized_end=2901, -) - - -_RANDOMJITTERBOXES = _descriptor.Descriptor( - name='RandomJitterBoxes', - full_name='object_detection.protos.RandomJitterBoxes', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='ratio', full_name='object_detection.protos.RandomJitterBoxes.ratio', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.05), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2903, - serialized_end=2943, -) - - -_RANDOMCROPIMAGE = _descriptor.Descriptor( - name='RandomCropImage', - full_name='object_detection.protos.RandomCropImage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.RandomCropImage.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_aspect_ratio', full_name='object_detection.protos.RandomCropImage.min_aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.75), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_aspect_ratio', full_name='object_detection.protos.RandomCropImage.max_aspect_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1.33), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.RandomCropImage.min_area', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.RandomCropImage.max_area', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.RandomCropImage.overlap_thresh', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.3), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.RandomCropImage.random_coef', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2946, - serialized_end=3155, -) - - -_RANDOMPADIMAGE = _descriptor.Descriptor( - name='RandomPadImage', - full_name='object_detection.protos.RandomPadImage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_image_height', full_name='object_detection.protos.RandomPadImage.min_image_height', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_image_width', full_name='object_detection.protos.RandomPadImage.min_image_width', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_image_height', full_name='object_detection.protos.RandomPadImage.max_image_height', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_image_width', full_name='object_detection.protos.RandomPadImage.max_image_width', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_color', full_name='object_detection.protos.RandomPadImage.pad_color', index=4, - number=5, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3158, - serialized_end=3295, -) - - -_RANDOMCROPPADIMAGE = _descriptor.Descriptor( - name='RandomCropPadImage', - full_name='object_detection.protos.RandomCropPadImage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.RandomCropPadImage.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_aspect_ratio', full_name='object_detection.protos.RandomCropPadImage.min_aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.75), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_aspect_ratio', full_name='object_detection.protos.RandomCropPadImage.max_aspect_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1.33), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.RandomCropPadImage.min_area', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.RandomCropPadImage.max_area', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.RandomCropPadImage.overlap_thresh', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.3), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.RandomCropPadImage.random_coef', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_padded_size_ratio', full_name='object_detection.protos.RandomCropPadImage.min_padded_size_ratio', index=7, - number=8, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_padded_size_ratio', full_name='object_detection.protos.RandomCropPadImage.max_padded_size_ratio', index=8, - number=9, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_color', full_name='object_detection.protos.RandomCropPadImage.pad_color', index=9, - number=10, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3298, - serialized_end=3591, -) - - -_RANDOMCROPTOASPECTRATIO = _descriptor.Descriptor( - name='RandomCropToAspectRatio', - full_name='object_detection.protos.RandomCropToAspectRatio', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='aspect_ratio', full_name='object_detection.protos.RandomCropToAspectRatio.aspect_ratio', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.RandomCropToAspectRatio.overlap_thresh', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.3), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3593, - serialized_end=3672, -) - - -_RANDOMBLACKPATCHES = _descriptor.Descriptor( - name='RandomBlackPatches', - full_name='object_detection.protos.RandomBlackPatches', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='max_black_patches', full_name='object_detection.protos.RandomBlackPatches.max_black_patches', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=10, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='probability', full_name='object_detection.protos.RandomBlackPatches.probability', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='size_to_image_ratio', full_name='object_detection.protos.RandomBlackPatches.size_to_image_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3674, - serialized_end=3785, -) - - -_RANDOMRESIZEMETHOD = _descriptor.Descriptor( - name='RandomResizeMethod', - full_name='object_detection.protos.RandomResizeMethod', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='target_height', full_name='object_detection.protos.RandomResizeMethod.target_height', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='target_width', full_name='object_detection.protos.RandomResizeMethod.target_width', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3787, - serialized_end=3852, -) - - -_SCALEBOXESTOPIXELCOORDINATES = _descriptor.Descriptor( - name='ScaleBoxesToPixelCoordinates', - full_name='object_detection.protos.ScaleBoxesToPixelCoordinates', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3854, - serialized_end=3884, -) - - -_RESIZEIMAGE = _descriptor.Descriptor( - name='ResizeImage', - full_name='object_detection.protos.ResizeImage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='new_height', full_name='object_detection.protos.ResizeImage.new_height', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='new_width', full_name='object_detection.protos.ResizeImage.new_width', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='method', full_name='object_detection.protos.ResizeImage.method', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=3, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _RESIZEIMAGE_METHOD, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3887, - serialized_end=4079, -) - - -_SUBTRACTCHANNELMEAN = _descriptor.Descriptor( - name='SubtractChannelMean', - full_name='object_detection.protos.SubtractChannelMean', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='means', full_name='object_detection.protos.SubtractChannelMean.means', index=0, - number=1, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4081, - serialized_end=4117, -) - - -_SSDRANDOMCROPOPERATION = _descriptor.Descriptor( - name='SSDRandomCropOperation', - full_name='object_detection.protos.SSDRandomCropOperation', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.SSDRandomCropOperation.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropOperation.min_aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropOperation.max_aspect_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.SSDRandomCropOperation.min_area', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.SSDRandomCropOperation.max_area', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropOperation.overlap_thresh', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.SSDRandomCropOperation.random_coef', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4120, - serialized_end=4305, -) - - -_SSDRANDOMCROP = _descriptor.Descriptor( - name='SSDRandomCrop', - full_name='object_detection.protos.SSDRandomCrop', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operations', full_name='object_detection.protos.SSDRandomCrop.operations', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4307, - serialized_end=4391, -) - - -_SSDRANDOMCROPPADOPERATION = _descriptor.Descriptor( - name='SSDRandomCropPadOperation', - full_name='object_detection.protos.SSDRandomCropPadOperation', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.SSDRandomCropPadOperation.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.min_aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.max_aspect_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.SSDRandomCropPadOperation.min_area', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.SSDRandomCropPadOperation.max_area', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropPadOperation.overlap_thresh', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.SSDRandomCropPadOperation.random_coef', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.min_padded_size_ratio', index=7, - number=8, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.max_padded_size_ratio', index=8, - number=9, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_color_r', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_r', index=9, - number=10, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_color_g', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_g', index=10, - number=11, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_color_b', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_b', index=11, - number=12, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4394, - serialized_end=4707, -) - - -_SSDRANDOMCROPPAD = _descriptor.Descriptor( - name='SSDRandomCropPad', - full_name='object_detection.protos.SSDRandomCropPad', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operations', full_name='object_detection.protos.SSDRandomCropPad.operations', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4709, - serialized_end=4799, -) - - -_SSDRANDOMCROPFIXEDASPECTRATIOOPERATION = _descriptor.Descriptor( - name='SSDRandomCropFixedAspectRatioOperation', - full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.min_area', index=1, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.max_area', index=2, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.overlap_thresh', index=3, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.random_coef', index=4, - number=7, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4802, - serialized_end=4951, -) - - -_SSDRANDOMCROPFIXEDASPECTRATIO = _descriptor.Descriptor( - name='SSDRandomCropFixedAspectRatio', - full_name='object_detection.protos.SSDRandomCropFixedAspectRatio', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operations', full_name='object_detection.protos.SSDRandomCropFixedAspectRatio.operations', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='aspect_ratio', full_name='object_detection.protos.SSDRandomCropFixedAspectRatio.aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=4954, - serialized_end=5095, -) - - -_SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION = _descriptor.Descriptor( - name='SSDRandomCropPadFixedAspectRatioOperation', - full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='min_object_covered', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_object_covered', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_aspect_ratio', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_area', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_area', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_area', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_area', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.overlap_thresh', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='random_coef', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.random_coef', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_padded_size_ratio', index=7, - number=8, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_padded_size_ratio', index=8, - number=9, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5098, - serialized_end=5364, -) - - -_SSDRANDOMCROPPADFIXEDASPECTRATIO = _descriptor.Descriptor( - name='SSDRandomCropPadFixedAspectRatio', - full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operations', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio.operations', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio.aspect_ratio', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5367, - serialized_end=5514, -) - -_PREPROCESSINGSTEP.fields_by_name['normalize_image'].message_type = _NORMALIZEIMAGE -_PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip'].message_type = _RANDOMHORIZONTALFLIP -_PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale'].message_type = _RANDOMPIXELVALUESCALE -_PREPROCESSINGSTEP.fields_by_name['random_image_scale'].message_type = _RANDOMIMAGESCALE -_PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray'].message_type = _RANDOMRGBTOGRAY -_PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness'].message_type = _RANDOMADJUSTBRIGHTNESS -_PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast'].message_type = _RANDOMADJUSTCONTRAST -_PREPROCESSINGSTEP.fields_by_name['random_adjust_hue'].message_type = _RANDOMADJUSTHUE -_PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation'].message_type = _RANDOMADJUSTSATURATION -_PREPROCESSINGSTEP.fields_by_name['random_distort_color'].message_type = _RANDOMDISTORTCOLOR -_PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes'].message_type = _RANDOMJITTERBOXES -_PREPROCESSINGSTEP.fields_by_name['random_crop_image'].message_type = _RANDOMCROPIMAGE -_PREPROCESSINGSTEP.fields_by_name['random_pad_image'].message_type = _RANDOMPADIMAGE -_PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image'].message_type = _RANDOMCROPPADIMAGE -_PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio'].message_type = _RANDOMCROPTOASPECTRATIO -_PREPROCESSINGSTEP.fields_by_name['random_black_patches'].message_type = _RANDOMBLACKPATCHES -_PREPROCESSINGSTEP.fields_by_name['random_resize_method'].message_type = _RANDOMRESIZEMETHOD -_PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates'].message_type = _SCALEBOXESTOPIXELCOORDINATES -_PREPROCESSINGSTEP.fields_by_name['resize_image'].message_type = _RESIZEIMAGE -_PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean'].message_type = _SUBTRACTCHANNELMEAN -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop'].message_type = _SSDRANDOMCROP -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad'].message_type = _SSDRANDOMCROPPAD -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio'].message_type = _SSDRANDOMCROPFIXEDASPECTRATIO -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio'].message_type = _SSDRANDOMCROPPADFIXEDASPECTRATIO -_PREPROCESSINGSTEP.fields_by_name['random_vertical_flip'].message_type = _RANDOMVERTICALFLIP -_PREPROCESSINGSTEP.fields_by_name['random_rotation90'].message_type = _RANDOMROTATION90 -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['normalize_image']) -_PREPROCESSINGSTEP.fields_by_name['normalize_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip']) -_PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale']) -_PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_image_scale']) -_PREPROCESSINGSTEP.fields_by_name['random_image_scale'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray']) -_PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness']) -_PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast']) -_PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_adjust_hue']) -_PREPROCESSINGSTEP.fields_by_name['random_adjust_hue'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation']) -_PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_distort_color']) -_PREPROCESSINGSTEP.fields_by_name['random_distort_color'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes']) -_PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_crop_image']) -_PREPROCESSINGSTEP.fields_by_name['random_crop_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_pad_image']) -_PREPROCESSINGSTEP.fields_by_name['random_pad_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image']) -_PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio']) -_PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_black_patches']) -_PREPROCESSINGSTEP.fields_by_name['random_black_patches'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_resize_method']) -_PREPROCESSINGSTEP.fields_by_name['random_resize_method'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates']) -_PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['resize_image']) -_PREPROCESSINGSTEP.fields_by_name['resize_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean']) -_PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop']) -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad']) -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio']) -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio']) -_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_vertical_flip']) -_PREPROCESSINGSTEP.fields_by_name['random_vertical_flip'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append( - _PREPROCESSINGSTEP.fields_by_name['random_rotation90']) -_PREPROCESSINGSTEP.fields_by_name['random_rotation90'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'] -_RESIZEIMAGE.fields_by_name['method'].enum_type = _RESIZEIMAGE_METHOD -_RESIZEIMAGE_METHOD.containing_type = _RESIZEIMAGE -_SSDRANDOMCROP.fields_by_name['operations'].message_type = _SSDRANDOMCROPOPERATION -_SSDRANDOMCROPPAD.fields_by_name['operations'].message_type = _SSDRANDOMCROPPADOPERATION -_SSDRANDOMCROPFIXEDASPECTRATIO.fields_by_name['operations'].message_type = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION -_SSDRANDOMCROPPADFIXEDASPECTRATIO.fields_by_name['operations'].message_type = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION -DESCRIPTOR.message_types_by_name['PreprocessingStep'] = _PREPROCESSINGSTEP -DESCRIPTOR.message_types_by_name['NormalizeImage'] = _NORMALIZEIMAGE -DESCRIPTOR.message_types_by_name['RandomHorizontalFlip'] = _RANDOMHORIZONTALFLIP -DESCRIPTOR.message_types_by_name['RandomVerticalFlip'] = _RANDOMVERTICALFLIP -DESCRIPTOR.message_types_by_name['RandomRotation90'] = _RANDOMROTATION90 -DESCRIPTOR.message_types_by_name['RandomPixelValueScale'] = _RANDOMPIXELVALUESCALE -DESCRIPTOR.message_types_by_name['RandomImageScale'] = _RANDOMIMAGESCALE -DESCRIPTOR.message_types_by_name['RandomRGBtoGray'] = _RANDOMRGBTOGRAY -DESCRIPTOR.message_types_by_name['RandomAdjustBrightness'] = _RANDOMADJUSTBRIGHTNESS -DESCRIPTOR.message_types_by_name['RandomAdjustContrast'] = _RANDOMADJUSTCONTRAST -DESCRIPTOR.message_types_by_name['RandomAdjustHue'] = _RANDOMADJUSTHUE -DESCRIPTOR.message_types_by_name['RandomAdjustSaturation'] = _RANDOMADJUSTSATURATION -DESCRIPTOR.message_types_by_name['RandomDistortColor'] = _RANDOMDISTORTCOLOR -DESCRIPTOR.message_types_by_name['RandomJitterBoxes'] = _RANDOMJITTERBOXES -DESCRIPTOR.message_types_by_name['RandomCropImage'] = _RANDOMCROPIMAGE -DESCRIPTOR.message_types_by_name['RandomPadImage'] = _RANDOMPADIMAGE -DESCRIPTOR.message_types_by_name['RandomCropPadImage'] = _RANDOMCROPPADIMAGE -DESCRIPTOR.message_types_by_name['RandomCropToAspectRatio'] = _RANDOMCROPTOASPECTRATIO -DESCRIPTOR.message_types_by_name['RandomBlackPatches'] = _RANDOMBLACKPATCHES -DESCRIPTOR.message_types_by_name['RandomResizeMethod'] = _RANDOMRESIZEMETHOD -DESCRIPTOR.message_types_by_name['ScaleBoxesToPixelCoordinates'] = _SCALEBOXESTOPIXELCOORDINATES -DESCRIPTOR.message_types_by_name['ResizeImage'] = _RESIZEIMAGE -DESCRIPTOR.message_types_by_name['SubtractChannelMean'] = _SUBTRACTCHANNELMEAN -DESCRIPTOR.message_types_by_name['SSDRandomCropOperation'] = _SSDRANDOMCROPOPERATION -DESCRIPTOR.message_types_by_name['SSDRandomCrop'] = _SSDRANDOMCROP -DESCRIPTOR.message_types_by_name['SSDRandomCropPadOperation'] = _SSDRANDOMCROPPADOPERATION -DESCRIPTOR.message_types_by_name['SSDRandomCropPad'] = _SSDRANDOMCROPPAD -DESCRIPTOR.message_types_by_name['SSDRandomCropFixedAspectRatioOperation'] = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION -DESCRIPTOR.message_types_by_name['SSDRandomCropFixedAspectRatio'] = _SSDRANDOMCROPFIXEDASPECTRATIO -DESCRIPTOR.message_types_by_name['SSDRandomCropPadFixedAspectRatioOperation'] = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION -DESCRIPTOR.message_types_by_name['SSDRandomCropPadFixedAspectRatio'] = _SSDRANDOMCROPPADFIXEDASPECTRATIO -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -PreprocessingStep = _reflection.GeneratedProtocolMessageType('PreprocessingStep', (_message.Message,), dict( - DESCRIPTOR = _PREPROCESSINGSTEP, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.PreprocessingStep) - )) -_sym_db.RegisterMessage(PreprocessingStep) - -NormalizeImage = _reflection.GeneratedProtocolMessageType('NormalizeImage', (_message.Message,), dict( - DESCRIPTOR = _NORMALIZEIMAGE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.NormalizeImage) - )) -_sym_db.RegisterMessage(NormalizeImage) - -RandomHorizontalFlip = _reflection.GeneratedProtocolMessageType('RandomHorizontalFlip', (_message.Message,), dict( - DESCRIPTOR = _RANDOMHORIZONTALFLIP, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomHorizontalFlip) - )) -_sym_db.RegisterMessage(RandomHorizontalFlip) - -RandomVerticalFlip = _reflection.GeneratedProtocolMessageType('RandomVerticalFlip', (_message.Message,), dict( - DESCRIPTOR = _RANDOMVERTICALFLIP, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomVerticalFlip) - )) -_sym_db.RegisterMessage(RandomVerticalFlip) - -RandomRotation90 = _reflection.GeneratedProtocolMessageType('RandomRotation90', (_message.Message,), dict( - DESCRIPTOR = _RANDOMROTATION90, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomRotation90) - )) -_sym_db.RegisterMessage(RandomRotation90) - -RandomPixelValueScale = _reflection.GeneratedProtocolMessageType('RandomPixelValueScale', (_message.Message,), dict( - DESCRIPTOR = _RANDOMPIXELVALUESCALE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomPixelValueScale) - )) -_sym_db.RegisterMessage(RandomPixelValueScale) - -RandomImageScale = _reflection.GeneratedProtocolMessageType('RandomImageScale', (_message.Message,), dict( - DESCRIPTOR = _RANDOMIMAGESCALE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomImageScale) - )) -_sym_db.RegisterMessage(RandomImageScale) - -RandomRGBtoGray = _reflection.GeneratedProtocolMessageType('RandomRGBtoGray', (_message.Message,), dict( - DESCRIPTOR = _RANDOMRGBTOGRAY, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomRGBtoGray) - )) -_sym_db.RegisterMessage(RandomRGBtoGray) - -RandomAdjustBrightness = _reflection.GeneratedProtocolMessageType('RandomAdjustBrightness', (_message.Message,), dict( - DESCRIPTOR = _RANDOMADJUSTBRIGHTNESS, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustBrightness) - )) -_sym_db.RegisterMessage(RandomAdjustBrightness) - -RandomAdjustContrast = _reflection.GeneratedProtocolMessageType('RandomAdjustContrast', (_message.Message,), dict( - DESCRIPTOR = _RANDOMADJUSTCONTRAST, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustContrast) - )) -_sym_db.RegisterMessage(RandomAdjustContrast) - -RandomAdjustHue = _reflection.GeneratedProtocolMessageType('RandomAdjustHue', (_message.Message,), dict( - DESCRIPTOR = _RANDOMADJUSTHUE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustHue) - )) -_sym_db.RegisterMessage(RandomAdjustHue) - -RandomAdjustSaturation = _reflection.GeneratedProtocolMessageType('RandomAdjustSaturation', (_message.Message,), dict( - DESCRIPTOR = _RANDOMADJUSTSATURATION, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustSaturation) - )) -_sym_db.RegisterMessage(RandomAdjustSaturation) - -RandomDistortColor = _reflection.GeneratedProtocolMessageType('RandomDistortColor', (_message.Message,), dict( - DESCRIPTOR = _RANDOMDISTORTCOLOR, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomDistortColor) - )) -_sym_db.RegisterMessage(RandomDistortColor) - -RandomJitterBoxes = _reflection.GeneratedProtocolMessageType('RandomJitterBoxes', (_message.Message,), dict( - DESCRIPTOR = _RANDOMJITTERBOXES, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomJitterBoxes) - )) -_sym_db.RegisterMessage(RandomJitterBoxes) - -RandomCropImage = _reflection.GeneratedProtocolMessageType('RandomCropImage', (_message.Message,), dict( - DESCRIPTOR = _RANDOMCROPIMAGE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropImage) - )) -_sym_db.RegisterMessage(RandomCropImage) - -RandomPadImage = _reflection.GeneratedProtocolMessageType('RandomPadImage', (_message.Message,), dict( - DESCRIPTOR = _RANDOMPADIMAGE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomPadImage) - )) -_sym_db.RegisterMessage(RandomPadImage) - -RandomCropPadImage = _reflection.GeneratedProtocolMessageType('RandomCropPadImage', (_message.Message,), dict( - DESCRIPTOR = _RANDOMCROPPADIMAGE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropPadImage) - )) -_sym_db.RegisterMessage(RandomCropPadImage) - -RandomCropToAspectRatio = _reflection.GeneratedProtocolMessageType('RandomCropToAspectRatio', (_message.Message,), dict( - DESCRIPTOR = _RANDOMCROPTOASPECTRATIO, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropToAspectRatio) - )) -_sym_db.RegisterMessage(RandomCropToAspectRatio) - -RandomBlackPatches = _reflection.GeneratedProtocolMessageType('RandomBlackPatches', (_message.Message,), dict( - DESCRIPTOR = _RANDOMBLACKPATCHES, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomBlackPatches) - )) -_sym_db.RegisterMessage(RandomBlackPatches) - -RandomResizeMethod = _reflection.GeneratedProtocolMessageType('RandomResizeMethod', (_message.Message,), dict( - DESCRIPTOR = _RANDOMRESIZEMETHOD, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RandomResizeMethod) - )) -_sym_db.RegisterMessage(RandomResizeMethod) - -ScaleBoxesToPixelCoordinates = _reflection.GeneratedProtocolMessageType('ScaleBoxesToPixelCoordinates', (_message.Message,), dict( - DESCRIPTOR = _SCALEBOXESTOPIXELCOORDINATES, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ScaleBoxesToPixelCoordinates) - )) -_sym_db.RegisterMessage(ScaleBoxesToPixelCoordinates) - -ResizeImage = _reflection.GeneratedProtocolMessageType('ResizeImage', (_message.Message,), dict( - DESCRIPTOR = _RESIZEIMAGE, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.ResizeImage) - )) -_sym_db.RegisterMessage(ResizeImage) - -SubtractChannelMean = _reflection.GeneratedProtocolMessageType('SubtractChannelMean', (_message.Message,), dict( - DESCRIPTOR = _SUBTRACTCHANNELMEAN, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SubtractChannelMean) - )) -_sym_db.RegisterMessage(SubtractChannelMean) - -SSDRandomCropOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropOperation', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPOPERATION, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropOperation) - )) -_sym_db.RegisterMessage(SSDRandomCropOperation) - -SSDRandomCrop = _reflection.GeneratedProtocolMessageType('SSDRandomCrop', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROP, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCrop) - )) -_sym_db.RegisterMessage(SSDRandomCrop) - -SSDRandomCropPadOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadOperation', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPPADOPERATION, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadOperation) - )) -_sym_db.RegisterMessage(SSDRandomCropPadOperation) - -SSDRandomCropPad = _reflection.GeneratedProtocolMessageType('SSDRandomCropPad', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPPAD, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPad) - )) -_sym_db.RegisterMessage(SSDRandomCropPad) - -SSDRandomCropFixedAspectRatioOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropFixedAspectRatioOperation', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropFixedAspectRatioOperation) - )) -_sym_db.RegisterMessage(SSDRandomCropFixedAspectRatioOperation) - -SSDRandomCropFixedAspectRatio = _reflection.GeneratedProtocolMessageType('SSDRandomCropFixedAspectRatio', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPFIXEDASPECTRATIO, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropFixedAspectRatio) - )) -_sym_db.RegisterMessage(SSDRandomCropFixedAspectRatio) - -SSDRandomCropPadFixedAspectRatioOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadFixedAspectRatioOperation', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation) - )) -_sym_db.RegisterMessage(SSDRandomCropPadFixedAspectRatioOperation) - -SSDRandomCropPadFixedAspectRatio = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadFixedAspectRatio', (_message.Message,), dict( - DESCRIPTOR = _SSDRANDOMCROPPADFIXEDASPECTRATIO, - __module__ = 'object_detection.protos.preprocessor_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadFixedAspectRatio) - )) -_sym_db.RegisterMessage(SSDRandomCropPadFixedAspectRatio) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/region_similarity_calculator.proto b/object_detection/protos/region_similarity_calculator.proto deleted file mode 100644 index e82424e2..00000000 --- a/object_detection/protos/region_similarity_calculator.proto +++ /dev/null @@ -1,25 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for region similarity calculators. See -// core/region_similarity_calculator.py for details. -message RegionSimilarityCalculator { - oneof region_similarity { - NegSqDistSimilarity neg_sq_dist_similarity = 1; - IouSimilarity iou_similarity = 2; - IoaSimilarity ioa_similarity = 3; - } -} - -// Configuration for negative squared distance similarity calculator. -message NegSqDistSimilarity { -} - -// Configuration for intersection-over-union (IOU) similarity calculator. -message IouSimilarity { -} - -// Configuration for intersection-over-area (IOA) similarity calculator. -message IoaSimilarity { -} diff --git a/object_detection/protos/region_similarity_calculator_pb2.py b/object_detection/protos/region_similarity_calculator_pb2.py deleted file mode 100644 index 9e260d84..00000000 --- a/object_detection/protos/region_similarity_calculator_pb2.py +++ /dev/null @@ -1,194 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/region_similarity_calculator.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/region_similarity_calculator.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n:object_detection/protos/region_similarity_calculator.proto\x12\x17object_detection.protos\"\x85\x02\n\x1aRegionSimilarityCalculator\x12N\n\x16neg_sq_dist_similarity\x18\x01 \x01(\x0b\x32,.object_detection.protos.NegSqDistSimilarityH\x00\x12@\n\x0eiou_similarity\x18\x02 \x01(\x0b\x32&.object_detection.protos.IouSimilarityH\x00\x12@\n\x0eioa_similarity\x18\x03 \x01(\x0b\x32&.object_detection.protos.IoaSimilarityH\x00\x42\x13\n\x11region_similarity\"\x15\n\x13NegSqDistSimilarity\"\x0f\n\rIouSimilarity\"\x0f\n\rIoaSimilarity') -) - - - - -_REGIONSIMILARITYCALCULATOR = _descriptor.Descriptor( - name='RegionSimilarityCalculator', - full_name='object_detection.protos.RegionSimilarityCalculator', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='neg_sq_dist_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.neg_sq_dist_similarity', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='iou_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.iou_similarity', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='ioa_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.ioa_similarity', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='region_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.region_similarity', - index=0, containing_type=None, fields=[]), - ], - serialized_start=88, - serialized_end=349, -) - - -_NEGSQDISTSIMILARITY = _descriptor.Descriptor( - name='NegSqDistSimilarity', - full_name='object_detection.protos.NegSqDistSimilarity', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=351, - serialized_end=372, -) - - -_IOUSIMILARITY = _descriptor.Descriptor( - name='IouSimilarity', - full_name='object_detection.protos.IouSimilarity', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=374, - serialized_end=389, -) - - -_IOASIMILARITY = _descriptor.Descriptor( - name='IoaSimilarity', - full_name='object_detection.protos.IoaSimilarity', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=391, - serialized_end=406, -) - -_REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity'].message_type = _NEGSQDISTSIMILARITY -_REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity'].message_type = _IOUSIMILARITY -_REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity'].message_type = _IOASIMILARITY -_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append( - _REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity']) -_REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'] -_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append( - _REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity']) -_REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'] -_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append( - _REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity']) -_REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'] -DESCRIPTOR.message_types_by_name['RegionSimilarityCalculator'] = _REGIONSIMILARITYCALCULATOR -DESCRIPTOR.message_types_by_name['NegSqDistSimilarity'] = _NEGSQDISTSIMILARITY -DESCRIPTOR.message_types_by_name['IouSimilarity'] = _IOUSIMILARITY -DESCRIPTOR.message_types_by_name['IoaSimilarity'] = _IOASIMILARITY -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -RegionSimilarityCalculator = _reflection.GeneratedProtocolMessageType('RegionSimilarityCalculator', (_message.Message,), dict( - DESCRIPTOR = _REGIONSIMILARITYCALCULATOR, - __module__ = 'object_detection.protos.region_similarity_calculator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.RegionSimilarityCalculator) - )) -_sym_db.RegisterMessage(RegionSimilarityCalculator) - -NegSqDistSimilarity = _reflection.GeneratedProtocolMessageType('NegSqDistSimilarity', (_message.Message,), dict( - DESCRIPTOR = _NEGSQDISTSIMILARITY, - __module__ = 'object_detection.protos.region_similarity_calculator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.NegSqDistSimilarity) - )) -_sym_db.RegisterMessage(NegSqDistSimilarity) - -IouSimilarity = _reflection.GeneratedProtocolMessageType('IouSimilarity', (_message.Message,), dict( - DESCRIPTOR = _IOUSIMILARITY, - __module__ = 'object_detection.protos.region_similarity_calculator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.IouSimilarity) - )) -_sym_db.RegisterMessage(IouSimilarity) - -IoaSimilarity = _reflection.GeneratedProtocolMessageType('IoaSimilarity', (_message.Message,), dict( - DESCRIPTOR = _IOASIMILARITY, - __module__ = 'object_detection.protos.region_similarity_calculator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.IoaSimilarity) - )) -_sym_db.RegisterMessage(IoaSimilarity) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/square_box_coder.proto b/object_detection/protos/square_box_coder.proto deleted file mode 100644 index 41575eb4..00000000 --- a/object_detection/protos/square_box_coder.proto +++ /dev/null @@ -1,14 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for SquareBoxCoder. See -// box_coders/square_box_coder.py for details. -message SquareBoxCoder { - // Scale factor for anchor encoded box center. - optional float y_scale = 1 [default = 10.0]; - optional float x_scale = 2 [default = 10.0]; - - // Scale factor for anchor encoded box length. - optional float length_scale = 3 [default = 5.0]; -} diff --git a/object_detection/protos/square_box_coder_pb2.py b/object_detection/protos/square_box_coder_pb2.py deleted file mode 100644 index a75e3d06..00000000 --- a/object_detection/protos/square_box_coder_pb2.py +++ /dev/null @@ -1,83 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/square_box_coder.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/square_box_coder.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n.object_detection/protos/square_box_coder.proto\x12\x17object_detection.protos\"S\n\x0eSquareBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0clength_scale\x18\x03 \x01(\x02:\x01\x35') -) - - - - -_SQUAREBOXCODER = _descriptor.Descriptor( - name='SquareBoxCoder', - full_name='object_detection.protos.SquareBoxCoder', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='y_scale', full_name='object_detection.protos.SquareBoxCoder.y_scale', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='x_scale', full_name='object_detection.protos.SquareBoxCoder.x_scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(10), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='length_scale', full_name='object_detection.protos.SquareBoxCoder.length_scale', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=75, - serialized_end=158, -) - -DESCRIPTOR.message_types_by_name['SquareBoxCoder'] = _SQUAREBOXCODER -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -SquareBoxCoder = _reflection.GeneratedProtocolMessageType('SquareBoxCoder', (_message.Message,), dict( - DESCRIPTOR = _SQUAREBOXCODER, - __module__ = 'object_detection.protos.square_box_coder_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SquareBoxCoder) - )) -_sym_db.RegisterMessage(SquareBoxCoder) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/ssd.proto b/object_detection/protos/ssd.proto deleted file mode 100644 index 067c2fff..00000000 --- a/object_detection/protos/ssd.proto +++ /dev/null @@ -1,85 +0,0 @@ -syntax = "proto2"; -package object_detection.protos; - -import "object_detection/protos/anchor_generator.proto"; -import "object_detection/protos/box_coder.proto"; -import "object_detection/protos/box_predictor.proto"; -import "object_detection/protos/hyperparams.proto"; -import "object_detection/protos/image_resizer.proto"; -import "object_detection/protos/matcher.proto"; -import "object_detection/protos/losses.proto"; -import "object_detection/protos/post_processing.proto"; -import "object_detection/protos/region_similarity_calculator.proto"; - -// Configuration for Single Shot Detection (SSD) models. -message Ssd { - - // Number of classes to predict. - optional int32 num_classes = 1; - - // Image resizer for preprocessing the input image. - optional ImageResizer image_resizer = 2; - - // Feature extractor config. - optional SsdFeatureExtractor feature_extractor = 3; - - // Box coder to encode the boxes. - optional BoxCoder box_coder = 4; - - // Matcher to match groundtruth with anchors. - optional Matcher matcher = 5; - - // Region similarity calculator to compute similarity of boxes. - optional RegionSimilarityCalculator similarity_calculator = 6; - - // Box predictor to attach to the features. - optional BoxPredictor box_predictor = 7; - - // Anchor generator to compute anchors. - optional AnchorGenerator anchor_generator = 8; - - // Post processing to apply on the predictions. - optional PostProcessing post_processing = 9; - - // Whether to normalize the loss by number of groundtruth boxes that match to - // the anchors. - optional bool normalize_loss_by_num_matches = 10 [default=true]; - - // Loss configuration for training. - optional Loss loss = 11; -} - - -message SsdFeatureExtractor { - // Type of ssd feature extractor. - optional string type = 1; - - // The factor to alter the depth of the channels in the feature extractor. - optional float depth_multiplier = 2 [default=1.0]; - - // Minimum number of the channels in the feature extractor. - optional int32 min_depth = 3 [default=16]; - - // Hyperparameters for the feature extractor. - optional Hyperparams conv_hyperparams = 4; - - // The nearest multiple to zero-pad the input height and width dimensions to. - // For example, if pad_to_multiple = 2, input dimensions are zero-padded - // until the resulting dimensions are even. - optional int32 pad_to_multiple = 5 [default = 1]; - - // Whether to update batch norm parameters during training or not. - // When training with a relative small batch size (e.g. 1), it is - // desirable to disable batch norm update and use pretrained batch norm - // params. - // - // Note: Some feature extractors are used with canned arg_scopes - // (e.g resnet arg scopes). In these cases training behavior of batch norm - // variables may depend on both values of `batch_norm_trainable` and - // `is_training`. - // - // When canned arg_scopes are used with feature extractors `conv_hyperparams` - // will apply only to the additional layers that are added and are outside the - // canned arg_scope. - optional bool batch_norm_trainable = 6 [default=true]; -} diff --git a/object_detection/protos/ssd_anchor_generator.proto b/object_detection/protos/ssd_anchor_generator.proto deleted file mode 100644 index d969ddf7..00000000 --- a/object_detection/protos/ssd_anchor_generator.proto +++ /dev/null @@ -1,55 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -// Configuration proto for SSD anchor generator described in -// https://arxiv.org/abs/1512.02325. See -// anchor_generators/multiple_grid_anchor_generator.py for details. -message SsdAnchorGenerator { - // Number of grid layers to create anchors for. - optional int32 num_layers = 1 [default = 6]; - - // Scale of anchors corresponding to finest resolution. - optional float min_scale = 2 [default = 0.2]; - - // Scale of anchors corresponding to coarsest resolution - optional float max_scale = 3 [default = 0.95]; - - // Can be used to override min_scale->max_scale, with an explicitly defined - // set of scales. If empty, then min_scale->max_scale is used. - repeated float scales = 12; - - // Aspect ratios for anchors at each grid point. - repeated float aspect_ratios = 4; - - // When this aspect ratio is greater than 0, then an additional - // anchor, with an interpolated scale is added with this aspect ratio. - optional float interpolated_scale_aspect_ratio = 13 [default = 1.0]; - - // Whether to use the following aspect ratio and scale combination for the - // layer with the finest resolution : (scale=0.1, aspect_ratio=1.0), - // (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5). - optional bool reduce_boxes_in_lowest_layer = 5 [default = true]; - - // The base anchor size in height dimension. - optional float base_anchor_height = 6 [default = 1.0]; - - // The base anchor size in width dimension. - optional float base_anchor_width = 7 [default = 1.0]; - - // Anchor stride in height dimension in pixels for each layer. The length of - // this field is expected to be equal to the value of num_layers. - repeated int32 height_stride = 8; - - // Anchor stride in width dimension in pixels for each layer. The length of - // this field is expected to be equal to the value of num_layers. - repeated int32 width_stride = 9; - - // Anchor height offset in pixels for each layer. The length of this field is - // expected to be equal to the value of num_layers. - repeated int32 height_offset = 10; - - // Anchor width offset in pixels for each layer. The length of this field is - // expected to be equal to the value of num_layers. - repeated int32 width_offset = 11; -} diff --git a/object_detection/protos/ssd_anchor_generator_pb2.py b/object_detection/protos/ssd_anchor_generator_pb2.py deleted file mode 100644 index 25fe45a0..00000000 --- a/object_detection/protos/ssd_anchor_generator_pb2.py +++ /dev/null @@ -1,153 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/ssd_anchor_generator.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/ssd_anchor_generator.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n2object_detection/protos/ssd_anchor_generator.proto\x12\x17object_detection.protos\"\xf2\x02\n\x12SsdAnchorGenerator\x12\x15\n\nnum_layers\x18\x01 \x01(\x05:\x01\x36\x12\x16\n\tmin_scale\x18\x02 \x01(\x02:\x03\x30.2\x12\x17\n\tmax_scale\x18\x03 \x01(\x02:\x04\x30.95\x12\x0e\n\x06scales\x18\x0c \x03(\x02\x12\x15\n\raspect_ratios\x18\x04 \x03(\x02\x12*\n\x1finterpolated_scale_aspect_ratio\x18\r \x01(\x02:\x01\x31\x12*\n\x1creduce_boxes_in_lowest_layer\x18\x05 \x01(\x08:\x04true\x12\x1d\n\x12\x62\x61se_anchor_height\x18\x06 \x01(\x02:\x01\x31\x12\x1c\n\x11\x62\x61se_anchor_width\x18\x07 \x01(\x02:\x01\x31\x12\x15\n\rheight_stride\x18\x08 \x03(\x05\x12\x14\n\x0cwidth_stride\x18\t \x03(\x05\x12\x15\n\rheight_offset\x18\n \x03(\x05\x12\x14\n\x0cwidth_offset\x18\x0b \x03(\x05') -) - - - - -_SSDANCHORGENERATOR = _descriptor.Descriptor( - name='SsdAnchorGenerator', - full_name='object_detection.protos.SsdAnchorGenerator', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_layers', full_name='object_detection.protos.SsdAnchorGenerator.num_layers', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=6, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_scale', full_name='object_detection.protos.SsdAnchorGenerator.min_scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.2), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='max_scale', full_name='object_detection.protos.SsdAnchorGenerator.max_scale', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.95), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='scales', full_name='object_detection.protos.SsdAnchorGenerator.scales', index=3, - number=12, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='aspect_ratios', full_name='object_detection.protos.SsdAnchorGenerator.aspect_ratios', index=4, - number=4, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='interpolated_scale_aspect_ratio', full_name='object_detection.protos.SsdAnchorGenerator.interpolated_scale_aspect_ratio', index=5, - number=13, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='reduce_boxes_in_lowest_layer', full_name='object_detection.protos.SsdAnchorGenerator.reduce_boxes_in_lowest_layer', index=6, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='base_anchor_height', full_name='object_detection.protos.SsdAnchorGenerator.base_anchor_height', index=7, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='base_anchor_width', full_name='object_detection.protos.SsdAnchorGenerator.base_anchor_width', index=8, - number=7, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_stride', full_name='object_detection.protos.SsdAnchorGenerator.height_stride', index=9, - number=8, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_stride', full_name='object_detection.protos.SsdAnchorGenerator.width_stride', index=10, - number=9, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='height_offset', full_name='object_detection.protos.SsdAnchorGenerator.height_offset', index=11, - number=10, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='width_offset', full_name='object_detection.protos.SsdAnchorGenerator.width_offset', index=12, - number=11, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=80, - serialized_end=450, -) - -DESCRIPTOR.message_types_by_name['SsdAnchorGenerator'] = _SSDANCHORGENERATOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -SsdAnchorGenerator = _reflection.GeneratedProtocolMessageType('SsdAnchorGenerator', (_message.Message,), dict( - DESCRIPTOR = _SSDANCHORGENERATOR, - __module__ = 'object_detection.protos.ssd_anchor_generator_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SsdAnchorGenerator) - )) -_sym_db.RegisterMessage(SsdAnchorGenerator) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/ssd_pb2.py b/object_detection/protos/ssd_pb2.py deleted file mode 100644 index bc4ec4a1..00000000 --- a/object_detection/protos/ssd_pb2.py +++ /dev/null @@ -1,233 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/ssd.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import anchor_generator_pb2 as object__detection_dot_protos_dot_anchor__generator__pb2 -from object_detection.protos import box_coder_pb2 as object__detection_dot_protos_dot_box__coder__pb2 -from object_detection.protos import box_predictor_pb2 as object__detection_dot_protos_dot_box__predictor__pb2 -from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2 -from object_detection.protos import image_resizer_pb2 as object__detection_dot_protos_dot_image__resizer__pb2 -from object_detection.protos import matcher_pb2 as object__detection_dot_protos_dot_matcher__pb2 -from object_detection.protos import losses_pb2 as object__detection_dot_protos_dot_losses__pb2 -from object_detection.protos import post_processing_pb2 as object__detection_dot_protos_dot_post__processing__pb2 -from object_detection.protos import region_similarity_calculator_pb2 as object__detection_dot_protos_dot_region__similarity__calculator__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/ssd.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n!object_detection/protos/ssd.proto\x12\x17object_detection.protos\x1a.object_detection/protos/anchor_generator.proto\x1a\'object_detection/protos/box_coder.proto\x1a+object_detection/protos/box_predictor.proto\x1a)object_detection/protos/hyperparams.proto\x1a+object_detection/protos/image_resizer.proto\x1a%object_detection/protos/matcher.proto\x1a$object_detection/protos/losses.proto\x1a-object_detection/protos/post_processing.proto\x1a:object_detection/protos/region_similarity_calculator.proto\"\xfc\x04\n\x03Ssd\x12\x13\n\x0bnum_classes\x18\x01 \x01(\x05\x12<\n\rimage_resizer\x18\x02 \x01(\x0b\x32%.object_detection.protos.ImageResizer\x12G\n\x11\x66\x65\x61ture_extractor\x18\x03 \x01(\x0b\x32,.object_detection.protos.SsdFeatureExtractor\x12\x34\n\tbox_coder\x18\x04 \x01(\x0b\x32!.object_detection.protos.BoxCoder\x12\x31\n\x07matcher\x18\x05 \x01(\x0b\x32 .object_detection.protos.Matcher\x12R\n\x15similarity_calculator\x18\x06 \x01(\x0b\x32\x33.object_detection.protos.RegionSimilarityCalculator\x12<\n\rbox_predictor\x18\x07 \x01(\x0b\x32%.object_detection.protos.BoxPredictor\x12\x42\n\x10\x61nchor_generator\x18\x08 \x01(\x0b\x32(.object_detection.protos.AnchorGenerator\x12@\n\x0fpost_processing\x18\t \x01(\x0b\x32\'.object_detection.protos.PostProcessing\x12+\n\x1dnormalize_loss_by_num_matches\x18\n \x01(\x08:\x04true\x12+\n\x04loss\x18\x0b \x01(\x0b\x32\x1d.object_detection.protos.Loss\"\xd7\x01\n\x13SsdFeatureExtractor\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x1b\n\x10\x64\x65pth_multiplier\x18\x02 \x01(\x02:\x01\x31\x12\x15\n\tmin_depth\x18\x03 \x01(\x05:\x02\x31\x36\x12>\n\x10\x63onv_hyperparams\x18\x04 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0fpad_to_multiple\x18\x05 \x01(\x05:\x01\x31\x12\"\n\x14\x62\x61tch_norm_trainable\x18\x06 \x01(\x08:\x04true') - , - dependencies=[object__detection_dot_protos_dot_anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__predictor__pb2.DESCRIPTOR,object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,object__detection_dot_protos_dot_image__resizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_matcher__pb2.DESCRIPTOR,object__detection_dot_protos_dot_losses__pb2.DESCRIPTOR,object__detection_dot_protos_dot_post__processing__pb2.DESCRIPTOR,object__detection_dot_protos_dot_region__similarity__calculator__pb2.DESCRIPTOR,]) - - - - -_SSD = _descriptor.Descriptor( - name='Ssd', - full_name='object_detection.protos.Ssd', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_classes', full_name='object_detection.protos.Ssd.num_classes', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='image_resizer', full_name='object_detection.protos.Ssd.image_resizer', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='feature_extractor', full_name='object_detection.protos.Ssd.feature_extractor', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='box_coder', full_name='object_detection.protos.Ssd.box_coder', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='matcher', full_name='object_detection.protos.Ssd.matcher', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='similarity_calculator', full_name='object_detection.protos.Ssd.similarity_calculator', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='box_predictor', full_name='object_detection.protos.Ssd.box_predictor', index=6, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='anchor_generator', full_name='object_detection.protos.Ssd.anchor_generator', index=7, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='post_processing', full_name='object_detection.protos.Ssd.post_processing', index=8, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='normalize_loss_by_num_matches', full_name='object_detection.protos.Ssd.normalize_loss_by_num_matches', index=9, - number=10, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='loss', full_name='object_detection.protos.Ssd.loss', index=10, - number=11, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=469, - serialized_end=1105, -) - - -_SSDFEATUREEXTRACTOR = _descriptor.Descriptor( - name='SsdFeatureExtractor', - full_name='object_detection.protos.SsdFeatureExtractor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='type', full_name='object_detection.protos.SsdFeatureExtractor.type', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='depth_multiplier', full_name='object_detection.protos.SsdFeatureExtractor.depth_multiplier', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='min_depth', full_name='object_detection.protos.SsdFeatureExtractor.min_depth', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='conv_hyperparams', full_name='object_detection.protos.SsdFeatureExtractor.conv_hyperparams', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='pad_to_multiple', full_name='object_detection.protos.SsdFeatureExtractor.pad_to_multiple', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='batch_norm_trainable', full_name='object_detection.protos.SsdFeatureExtractor.batch_norm_trainable', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1108, - serialized_end=1323, -) - -_SSD.fields_by_name['image_resizer'].message_type = object__detection_dot_protos_dot_image__resizer__pb2._IMAGERESIZER -_SSD.fields_by_name['feature_extractor'].message_type = _SSDFEATUREEXTRACTOR -_SSD.fields_by_name['box_coder'].message_type = object__detection_dot_protos_dot_box__coder__pb2._BOXCODER -_SSD.fields_by_name['matcher'].message_type = object__detection_dot_protos_dot_matcher__pb2._MATCHER -_SSD.fields_by_name['similarity_calculator'].message_type = object__detection_dot_protos_dot_region__similarity__calculator__pb2._REGIONSIMILARITYCALCULATOR -_SSD.fields_by_name['box_predictor'].message_type = object__detection_dot_protos_dot_box__predictor__pb2._BOXPREDICTOR -_SSD.fields_by_name['anchor_generator'].message_type = object__detection_dot_protos_dot_anchor__generator__pb2._ANCHORGENERATOR -_SSD.fields_by_name['post_processing'].message_type = object__detection_dot_protos_dot_post__processing__pb2._POSTPROCESSING -_SSD.fields_by_name['loss'].message_type = object__detection_dot_protos_dot_losses__pb2._LOSS -_SSDFEATUREEXTRACTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS -DESCRIPTOR.message_types_by_name['Ssd'] = _SSD -DESCRIPTOR.message_types_by_name['SsdFeatureExtractor'] = _SSDFEATUREEXTRACTOR -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Ssd = _reflection.GeneratedProtocolMessageType('Ssd', (_message.Message,), dict( - DESCRIPTOR = _SSD, - __module__ = 'object_detection.protos.ssd_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.Ssd) - )) -_sym_db.RegisterMessage(Ssd) - -SsdFeatureExtractor = _reflection.GeneratedProtocolMessageType('SsdFeatureExtractor', (_message.Message,), dict( - DESCRIPTOR = _SSDFEATUREEXTRACTOR, - __module__ = 'object_detection.protos.ssd_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.SsdFeatureExtractor) - )) -_sym_db.RegisterMessage(SsdFeatureExtractor) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/string_int_label_map.proto b/object_detection/protos/string_int_label_map.proto deleted file mode 100644 index 0894183b..00000000 --- a/object_detection/protos/string_int_label_map.proto +++ /dev/null @@ -1,24 +0,0 @@ -// Message to store the mapping from class label strings to class id. Datasets -// use string labels to represent classes while the object detection framework -// works with class ids. This message maps them so they can be converted back -// and forth as needed. -syntax = "proto2"; - -package object_detection.protos; - -message StringIntLabelMapItem { - // String name. The most common practice is to set this to a MID or synsets - // id. - optional string name = 1; - - // Integer id that maps to the string name above. Label ids should start from - // 1. - optional int32 id = 2; - - // Human readable string label. - optional string display_name = 3; -}; - -message StringIntLabelMap { - repeated StringIntLabelMapItem item = 1; -}; diff --git a/object_detection/protos/string_int_label_map_pb2.py b/object_detection/protos/string_int_label_map_pb2.py deleted file mode 100644 index 381d5526..00000000 --- a/object_detection/protos/string_int_label_map_pb2.py +++ /dev/null @@ -1,123 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/string_int_label_map.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/string_int_label_map.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem') -) - - - - -_STRINGINTLABELMAPITEM = _descriptor.Descriptor( - name='StringIntLabelMapItem', - full_name='object_detection.protos.StringIntLabelMapItem', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=79, - serialized_end=150, -) - - -_STRINGINTLABELMAP = _descriptor.Descriptor( - name='StringIntLabelMap', - full_name='object_detection.protos.StringIntLabelMap', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=152, - serialized_end=233, -) - -_STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM -DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM -DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict( - DESCRIPTOR = _STRINGINTLABELMAPITEM, - __module__ = 'object_detection.protos.string_int_label_map_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem) - )) -_sym_db.RegisterMessage(StringIntLabelMapItem) - -StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict( - DESCRIPTOR = _STRINGINTLABELMAP, - __module__ = 'object_detection.protos.string_int_label_map_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap) - )) -_sym_db.RegisterMessage(StringIntLabelMap) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/protos/train.proto b/object_detection/protos/train.proto deleted file mode 100644 index ae905c78..00000000 --- a/object_detection/protos/train.proto +++ /dev/null @@ -1,69 +0,0 @@ -syntax = "proto2"; - -package object_detection.protos; - -import "object_detection/protos/optimizer.proto"; -import "object_detection/protos/preprocessor.proto"; - -// Message for configuring DetectionModel training jobs (train.py). -message TrainConfig { - // Input queue batch size. - optional uint32 batch_size = 1 [default=32]; - - // Data augmentation options. - repeated PreprocessingStep data_augmentation_options = 2; - - // Whether to synchronize replicas during training. - optional bool sync_replicas = 3 [default=false]; - - // How frequently to keep checkpoints. - optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000]; - - // Optimizer used to train the DetectionModel. - optional Optimizer optimizer = 5; - - // If greater than 0, clips gradients by this value. - optional float gradient_clipping_by_norm = 6 [default=0.0]; - - // Checkpoint to restore variables from. Typically used to load feature - // extractor variables trained outside of object detection. - optional string fine_tune_checkpoint = 7 [default=""]; - - // Specifies if the finetune checkpoint is from an object detection model. - // If from an object detection model, the model being trained should have - // the same parameters with the exception of the num_classes parameter. - // If false, it assumes the checkpoint was a object classification model. - optional bool from_detection_checkpoint = 8 [default=false]; - - // Number of steps to train the DetectionModel for. If 0, will train the model - // indefinitely. - optional uint32 num_steps = 9 [default=0]; - - // Number of training steps between replica startup. - // This flag must be set to 0 if sync_replicas is set to true. - optional float startup_delay_steps = 10 [default=15]; - - // If greater than 0, multiplies the gradient of bias variables by this - // amount. - optional float bias_grad_multiplier = 11 [default=0]; - - // Variables that should not be updated during training. - repeated string freeze_variables = 12; - - // Number of replicas to aggregate before making parameter updates. - optional int32 replicas_to_aggregate = 13 [default=1]; - - // Maximum number of elements to store within a queue. - optional int32 batch_queue_capacity = 14 [default=150]; - - // Number of threads to use for batching. - optional int32 num_batch_queue_threads = 15 [default=8]; - - // Maximum capacity of the queue used to prefetch assembled batches. - optional int32 prefetch_queue_capacity = 16 [default=5]; - - // If true, boxes with the same coordinates will be merged together. - // This is useful when each box can have multiple labels. - // Note that only Sigmoid classification losses should be used. - optional bool merge_multiple_label_boxes = 17 [default=false]; -} diff --git a/object_detection/protos/train_pb2.py b/object_detection/protos/train_pb2.py deleted file mode 100644 index d1309252..00000000 --- a/object_detection/protos/train_pb2.py +++ /dev/null @@ -1,186 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: object_detection/protos/train.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from object_detection.protos import optimizer_pb2 as object__detection_dot_protos_dot_optimizer__pb2 -from object_detection.protos import preprocessor_pb2 as object__detection_dot_protos_dot_preprocessor__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='object_detection/protos/train.proto', - package='object_detection.protos', - syntax='proto2', - serialized_pb=_b('\n#object_detection/protos/train.proto\x12\x17object_detection.protos\x1a\'object_detection/protos/optimizer.proto\x1a*object_detection/protos/preprocessor.proto\"\x90\x05\n\x0bTrainConfig\x12\x16\n\nbatch_size\x18\x01 \x01(\r:\x02\x33\x32\x12M\n\x19\x64\x61ta_augmentation_options\x18\x02 \x03(\x0b\x32*.object_detection.protos.PreprocessingStep\x12\x1c\n\rsync_replicas\x18\x03 \x01(\x08:\x05\x66\x61lse\x12+\n\x1dkeep_checkpoint_every_n_hours\x18\x04 \x01(\r:\x04\x31\x30\x30\x30\x12\x35\n\toptimizer\x18\x05 \x01(\x0b\x32\".object_detection.protos.Optimizer\x12$\n\x19gradient_clipping_by_norm\x18\x06 \x01(\x02:\x01\x30\x12\x1e\n\x14\x66ine_tune_checkpoint\x18\x07 \x01(\t:\x00\x12(\n\x19\x66rom_detection_checkpoint\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tnum_steps\x18\t \x01(\r:\x01\x30\x12\x1f\n\x13startup_delay_steps\x18\n \x01(\x02:\x02\x31\x35\x12\x1f\n\x14\x62ias_grad_multiplier\x18\x0b \x01(\x02:\x01\x30\x12\x18\n\x10\x66reeze_variables\x18\x0c \x03(\t\x12 \n\x15replicas_to_aggregate\x18\r \x01(\x05:\x01\x31\x12!\n\x14\x62\x61tch_queue_capacity\x18\x0e \x01(\x05:\x03\x31\x35\x30\x12\"\n\x17num_batch_queue_threads\x18\x0f \x01(\x05:\x01\x38\x12\"\n\x17prefetch_queue_capacity\x18\x10 \x01(\x05:\x01\x35\x12)\n\x1amerge_multiple_label_boxes\x18\x11 \x01(\x08:\x05\x66\x61lse') - , - dependencies=[object__detection_dot_protos_dot_optimizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_preprocessor__pb2.DESCRIPTOR,]) - - - - -_TRAINCONFIG = _descriptor.Descriptor( - name='TrainConfig', - full_name='object_detection.protos.TrainConfig', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='batch_size', full_name='object_detection.protos.TrainConfig.batch_size', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=32, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='data_augmentation_options', full_name='object_detection.protos.TrainConfig.data_augmentation_options', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='sync_replicas', full_name='object_detection.protos.TrainConfig.sync_replicas', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='keep_checkpoint_every_n_hours', full_name='object_detection.protos.TrainConfig.keep_checkpoint_every_n_hours', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='optimizer', full_name='object_detection.protos.TrainConfig.optimizer', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='gradient_clipping_by_norm', full_name='object_detection.protos.TrainConfig.gradient_clipping_by_norm', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='fine_tune_checkpoint', full_name='object_detection.protos.TrainConfig.fine_tune_checkpoint', index=6, - number=7, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='from_detection_checkpoint', full_name='object_detection.protos.TrainConfig.from_detection_checkpoint', index=7, - number=8, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_steps', full_name='object_detection.protos.TrainConfig.num_steps', index=8, - number=9, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='startup_delay_steps', full_name='object_detection.protos.TrainConfig.startup_delay_steps', index=9, - number=10, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(15), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='bias_grad_multiplier', full_name='object_detection.protos.TrainConfig.bias_grad_multiplier', index=10, - number=11, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='freeze_variables', full_name='object_detection.protos.TrainConfig.freeze_variables', index=11, - number=12, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='replicas_to_aggregate', full_name='object_detection.protos.TrainConfig.replicas_to_aggregate', index=12, - number=13, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='batch_queue_capacity', full_name='object_detection.protos.TrainConfig.batch_queue_capacity', index=13, - number=14, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=150, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='num_batch_queue_threads', full_name='object_detection.protos.TrainConfig.num_batch_queue_threads', index=14, - number=15, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=8, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='prefetch_queue_capacity', full_name='object_detection.protos.TrainConfig.prefetch_queue_capacity', index=15, - number=16, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=5, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='merge_multiple_label_boxes', full_name='object_detection.protos.TrainConfig.merge_multiple_label_boxes', index=16, - number=17, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=150, - serialized_end=806, -) - -_TRAINCONFIG.fields_by_name['data_augmentation_options'].message_type = object__detection_dot_protos_dot_preprocessor__pb2._PREPROCESSINGSTEP -_TRAINCONFIG.fields_by_name['optimizer'].message_type = object__detection_dot_protos_dot_optimizer__pb2._OPTIMIZER -DESCRIPTOR.message_types_by_name['TrainConfig'] = _TRAINCONFIG -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -TrainConfig = _reflection.GeneratedProtocolMessageType('TrainConfig', (_message.Message,), dict( - DESCRIPTOR = _TRAINCONFIG, - __module__ = 'object_detection.protos.train_pb2' - # @@protoc_insertion_point(class_scope:object_detection.protos.TrainConfig) - )) -_sym_db.RegisterMessage(TrainConfig) - - -# @@protoc_insertion_point(module_scope) diff --git a/object_detection/samples/cloud/cloud.yml b/object_detection/samples/cloud/cloud.yml deleted file mode 100644 index 495876a1..00000000 --- a/object_detection/samples/cloud/cloud.yml +++ /dev/null @@ -1,11 +0,0 @@ -trainingInput: - runtimeVersion: "1.0" - scaleTier: CUSTOM - masterType: standard_gpu - workerCount: 5 - workerType: standard_gpu - parameterServerCount: 3 - parameterServerType: standard - - - diff --git a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config b/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config deleted file mode 100644 index 8cee5b1a..00000000 --- a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config +++ /dev/null @@ -1,147 +0,0 @@ -# Faster R-CNN with Inception Resnet v2, Atrous version; -# Configured for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - first_stage_features_stride: 8 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 8 - width_stride: 8 - } - } - first_stage_atrous_rate: 2 - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config b/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config deleted file mode 100644 index b9211464..00000000 --- a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config +++ /dev/null @@ -1,146 +0,0 @@ -# Faster R-CNN with Inception Resnet v2, Atrous version; -# Configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_resnet_v2' - first_stage_features_stride: 8 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 8 - width_stride: 8 - } - } - first_stage_atrous_rate: 2 - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config b/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config deleted file mode 100644 index 8950aa18..00000000 --- a/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config +++ /dev/null @@ -1,145 +0,0 @@ -# Faster R-CNN with Inception v2, configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_v2' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0002 - schedule { - step: 0 - learning_rate: .0002 - } - schedule { - step: 900000 - learning_rate: .00002 - } - schedule { - step: 1200000 - learning_rate: .000002 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the COCO dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config b/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config deleted file mode 100644 index 0c3e8bb2..00000000 --- a/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config +++ /dev/null @@ -1,145 +0,0 @@ -# Faster R-CNN with Inception v2, configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_inception_v2' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0002 - schedule { - step: 0 - learning_rate: .0002 - } - schedule { - step: 900000 - learning_rate: .00002 - } - schedule { - step: 1200000 - learning_rate: .000002 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_nas_coco.config b/object_detection/samples/configs/faster_rcnn_nas_coco.config deleted file mode 100644 index a32cb033..00000000 --- a/object_detection/samples/configs/faster_rcnn_nas_coco.config +++ /dev/null @@ -1,148 +0,0 @@ -# Faster R-CNN with NASNet-A featurization -# Configured for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. -model { - faster_rcnn { - num_classes: 90 - image_resizer { - # TODO: Only fixed_shape_resizer is currently supported for NASNet - # featurization. The reason for this is that nasnet.py only supports - # inputs with fully known shapes. We need to update nasnet.py to handle - # shapes not known at compile time. - fixed_shape_resizer { - height: 1200 - width: 1200 - } - } - feature_extractor { - type: 'faster_rcnn_nas' - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 50 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 17 - maxpool_kernel_size: 1 - maxpool_stride: 1 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - metrics_set: "pascal_voc_metrics" - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_coco.config b/object_detection/samples/configs/faster_rcnn_resnet101_coco.config deleted file mode 100644 index ed11bb94..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet101_coco.config +++ /dev/null @@ -1,145 +0,0 @@ -# Faster R-CNN with Resnet-101 (v1) configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config b/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config deleted file mode 100644 index 196d047c..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config +++ /dev/null @@ -1,143 +0,0 @@ -# Faster R-CNN with Resnet-101 (v1) -# Trained on KITTI dataset (cars and pedestrian), initialized from COCO -# detection checkpoint. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 2 - image_resizer { - keep_aspect_ratio_resizer { - # Raw KITTI images have a resolution of 1242x375, if we wish to resize - # them to have a height of 600 then their width should be - # 1242/(375/600)=1987.2 - min_dimension: 600 - max_dimension: 1987 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0001 - schedule { - step: 0 - learning_rate: .0001 - } - schedule { - step: 500000 - learning_rate: .00001 - } - schedule { - step: 700000 - learning_rate: .000001 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - num_steps: 800000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - label_map_path: "PATH_TO_BE_CONFIGURED/kitti_label_map.pbtxt" - tf_record_input_reader: { - input_path: "PATH_TO_BE_CONFIGURED/kitti_train.tfrecord" - } -} - -eval_config: { - metrics_set: "coco_metrics" - use_moving_averages: false - num_examples: 500 -} - -eval_input_reader: { - label_map_path: "PATH_TO_BE_CONFIGURED/kitti_label_map.pbtxt" - tf_record_input_reader: { - input_path: "PATH_TO_BE_CONFIGURED/kitti_val.tfrecord" - } -} - diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_pets.config b/object_detection/samples/configs/faster_rcnn_resnet101_pets.config deleted file mode 100644 index 0a61d641..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet101_pets.config +++ /dev/null @@ -1,144 +0,0 @@ -# Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config b/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config deleted file mode 100644 index e2362241..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config +++ /dev/null @@ -1,137 +0,0 @@ -# Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 20 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0001 - schedule { - step: 0 - learning_rate: .0001 - } - schedule { - step: 500000 - learning_rate: .00001 - } - schedule { - step: 700000 - learning_rate: .000001 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - num_steps: 800000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pascal_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt" -} - -eval_config: { - num_examples: 4952 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pascal_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet152_coco.config b/object_detection/samples/configs/faster_rcnn_resnet152_coco.config deleted file mode 100644 index d537b08f..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet152_coco.config +++ /dev/null @@ -1,145 +0,0 @@ -# Faster R-CNN with Resnet-152 (v1), configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet152' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet152_pets.config b/object_detection/samples/configs/faster_rcnn_resnet152_pets.config deleted file mode 100644 index bc6f1aa7..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet152_pets.config +++ /dev/null @@ -1,144 +0,0 @@ -# Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet152' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet50_coco.config b/object_detection/samples/configs/faster_rcnn_resnet50_coco.config deleted file mode 100644 index e3257860..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet50_coco.config +++ /dev/null @@ -1,145 +0,0 @@ -# Faster R-CNN with Resnet-50 (v1), configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet50' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/faster_rcnn_resnet50_pets.config b/object_detection/samples/configs/faster_rcnn_resnet50_pets.config deleted file mode 100644 index 042ee13d..00000000 --- a/object_detection/samples/configs/faster_rcnn_resnet50_pets.config +++ /dev/null @@ -1,144 +0,0 @@ -# Faster R-CNN with Resnet-50 (v1), configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet50' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - initial_crop_size: 14 - maxpool_kernel_size: 2 - maxpool_stride: 2 - second_stage_box_predictor { - mask_rcnn_box_predictor { - use_dropout: false - dropout_keep_probability: 1.0 - fc_hyperparams { - op: FC - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - variance_scaling_initializer { - factor: 1.0 - uniform: true - mode: FAN_AVG - } - } - } - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/rfcn_resnet101_coco.config b/object_detection/samples/configs/rfcn_resnet101_coco.config deleted file mode 100644 index 6c383fa7..00000000 --- a/object_detection/samples/configs/rfcn_resnet101_coco.config +++ /dev/null @@ -1,142 +0,0 @@ -# R-FCN with Resnet-101 (v1), configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 90 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - second_stage_box_predictor { - rfcn_box_predictor { - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - crop_height: 18 - crop_width: 18 - num_spatial_bins_height: 3 - num_spatial_bins_width: 3 - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/rfcn_resnet101_pets.config b/object_detection/samples/configs/rfcn_resnet101_pets.config deleted file mode 100644 index 28f3d091..00000000 --- a/object_detection/samples/configs/rfcn_resnet101_pets.config +++ /dev/null @@ -1,141 +0,0 @@ -# R-FCN with Resnet-101 (v1), configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - faster_rcnn { - num_classes: 37 - image_resizer { - keep_aspect_ratio_resizer { - min_dimension: 600 - max_dimension: 1024 - } - } - feature_extractor { - type: 'faster_rcnn_resnet101' - first_stage_features_stride: 16 - } - first_stage_anchor_generator { - grid_anchor_generator { - scales: [0.25, 0.5, 1.0, 2.0] - aspect_ratios: [0.5, 1.0, 2.0] - height_stride: 16 - width_stride: 16 - } - } - first_stage_box_predictor_conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - first_stage_nms_score_threshold: 0.0 - first_stage_nms_iou_threshold: 0.7 - first_stage_max_proposals: 300 - first_stage_localization_loss_weight: 2.0 - first_stage_objectness_loss_weight: 1.0 - second_stage_box_predictor { - rfcn_box_predictor { - conv_hyperparams { - op: CONV - regularizer { - l2_regularizer { - weight: 0.0 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.01 - } - } - } - crop_height: 18 - crop_width: 18 - num_spatial_bins_height: 3 - num_spatial_bins_width: 3 - } - } - second_stage_post_processing { - batch_non_max_suppression { - score_threshold: 0.0 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 300 - } - score_converter: SOFTMAX - } - second_stage_localization_loss_weight: 2.0 - second_stage_classification_loss_weight: 1.0 - } -} - -train_config: { - batch_size: 1 - optimizer { - momentum_optimizer: { - learning_rate: { - manual_step_learning_rate { - initial_learning_rate: 0.0003 - schedule { - step: 0 - learning_rate: .0003 - } - schedule { - step: 900000 - learning_rate: .00003 - } - schedule { - step: 1200000 - learning_rate: .000003 - } - } - } - momentum_optimizer_value: 0.9 - } - use_moving_average: false - } - gradient_clipping_by_norm: 10.0 - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/ssd_inception_v2_coco.config b/object_detection/samples/configs/ssd_inception_v2_coco.config deleted file mode 100644 index 62e7e4f2..00000000 --- a/object_detection/samples/configs/ssd_inception_v2_coco.config +++ /dev/null @@ -1,191 +0,0 @@ -# SSD with Inception v2 configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - ssd { - num_classes: 90 - box_coder { - faster_rcnn_box_coder { - y_scale: 10.0 - x_scale: 10.0 - height_scale: 5.0 - width_scale: 5.0 - } - } - matcher { - argmax_matcher { - matched_threshold: 0.5 - unmatched_threshold: 0.5 - ignore_thresholds: false - negatives_lower_than_unmatched: true - force_match_for_each_row: true - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - num_layers: 6 - min_scale: 0.2 - max_scale: 0.95 - aspect_ratios: 1.0 - aspect_ratios: 2.0 - aspect_ratios: 0.5 - aspect_ratios: 3.0 - aspect_ratios: 0.3333 - reduce_boxes_in_lowest_layer: true - } - } - image_resizer { - fixed_shape_resizer { - height: 300 - width: 300 - } - } - box_predictor { - convolutional_box_predictor { - min_depth: 0 - max_depth: 0 - num_layers_before_predictor: 0 - use_dropout: false - dropout_keep_probability: 0.8 - kernel_size: 3 - box_code_size: 4 - apply_sigmoid_to_scores: false - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - } - } - } - feature_extractor { - type: 'ssd_inception_v2' - min_depth: 16 - depth_multiplier: 1.0 - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - loss { - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_smooth_l1 { - anchorwise_output: true - } - } - hard_example_miner { - num_hard_examples: 3000 - iou_threshold: 0.99 - loss_type: CLASSIFICATION - max_negatives_per_positive: 3 - min_negatives_per_image: 0 - } - classification_weight: 1.0 - localization_weight: 1.0 - } - normalize_loss_by_num_matches: true - post_processing { - batch_non_max_suppression { - score_threshold: 1e-8 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SIGMOID - } - } -} - -train_config: { - batch_size: 24 - optimizer { - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - } - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } - data_augmentation_options { - ssd_random_crop { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/ssd_inception_v2_pets.config b/object_detection/samples/configs/ssd_inception_v2_pets.config deleted file mode 100644 index 41b15880..00000000 --- a/object_detection/samples/configs/ssd_inception_v2_pets.config +++ /dev/null @@ -1,190 +0,0 @@ -# SSD with Inception v2 configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - ssd { - num_classes: 37 - box_coder { - faster_rcnn_box_coder { - y_scale: 10.0 - x_scale: 10.0 - height_scale: 5.0 - width_scale: 5.0 - } - } - matcher { - argmax_matcher { - matched_threshold: 0.5 - unmatched_threshold: 0.5 - ignore_thresholds: false - negatives_lower_than_unmatched: true - force_match_for_each_row: true - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - num_layers: 6 - min_scale: 0.2 - max_scale: 0.95 - aspect_ratios: 1.0 - aspect_ratios: 2.0 - aspect_ratios: 0.5 - aspect_ratios: 3.0 - aspect_ratios: 0.3333 - reduce_boxes_in_lowest_layer: true - } - } - image_resizer { - fixed_shape_resizer { - height: 300 - width: 300 - } - } - box_predictor { - convolutional_box_predictor { - min_depth: 0 - max_depth: 0 - num_layers_before_predictor: 0 - use_dropout: false - dropout_keep_probability: 0.8 - kernel_size: 3 - box_code_size: 4 - apply_sigmoid_to_scores: false - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - } - } - } - feature_extractor { - type: 'ssd_inception_v2' - min_depth: 16 - depth_multiplier: 1.0 - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - loss { - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_smooth_l1 { - anchorwise_output: true - } - } - hard_example_miner { - num_hard_examples: 3000 - iou_threshold: 0.99 - loss_type: CLASSIFICATION - max_negatives_per_positive: 3 - min_negatives_per_image: 0 - } - classification_weight: 1.0 - localization_weight: 1.0 - } - normalize_loss_by_num_matches: true - post_processing { - batch_non_max_suppression { - score_threshold: 1e-8 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SIGMOID - } - } -} - -train_config: { - batch_size: 24 - optimizer { - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - } - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } - data_augmentation_options { - ssd_random_crop { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/samples/configs/ssd_mobilenet_v1_coco.config b/object_detection/samples/configs/ssd_mobilenet_v1_coco.config deleted file mode 100644 index d46a5432..00000000 --- a/object_detection/samples/configs/ssd_mobilenet_v1_coco.config +++ /dev/null @@ -1,197 +0,0 @@ -# SSD with Mobilenet v1 configuration for MSCOCO Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - ssd { - num_classes: 90 - box_coder { - faster_rcnn_box_coder { - y_scale: 10.0 - x_scale: 10.0 - height_scale: 5.0 - width_scale: 5.0 - } - } - matcher { - argmax_matcher { - matched_threshold: 0.5 - unmatched_threshold: 0.5 - ignore_thresholds: false - negatives_lower_than_unmatched: true - force_match_for_each_row: true - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - num_layers: 6 - min_scale: 0.2 - max_scale: 0.95 - aspect_ratios: 1.0 - aspect_ratios: 2.0 - aspect_ratios: 0.5 - aspect_ratios: 3.0 - aspect_ratios: 0.3333 - } - } - image_resizer { - fixed_shape_resizer { - height: 300 - width: 300 - } - } - box_predictor { - convolutional_box_predictor { - min_depth: 0 - max_depth: 0 - num_layers_before_predictor: 0 - use_dropout: false - dropout_keep_probability: 0.8 - kernel_size: 1 - box_code_size: 4 - apply_sigmoid_to_scores: false - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - } - feature_extractor { - type: 'ssd_mobilenet_v1' - min_depth: 16 - depth_multiplier: 1.0 - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - loss { - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_smooth_l1 { - anchorwise_output: true - } - } - hard_example_miner { - num_hard_examples: 3000 - iou_threshold: 0.99 - loss_type: CLASSIFICATION - max_negatives_per_positive: 3 - min_negatives_per_image: 0 - } - classification_weight: 1.0 - localization_weight: 1.0 - } - normalize_loss_by_num_matches: true - post_processing { - batch_non_max_suppression { - score_threshold: 1e-8 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SIGMOID - } - } -} - -train_config: { - batch_size: 24 - optimizer { - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - } - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } - data_augmentation_options { - ssd_random_crop { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" -} - -eval_config: { - num_examples: 8000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" - shuffle: false - num_readers: 1 - num_epochs: 1 -} diff --git a/object_detection/samples/configs/ssd_mobilenet_v1_pets.config b/object_detection/samples/configs/ssd_mobilenet_v1_pets.config deleted file mode 100644 index a6741357..00000000 --- a/object_detection/samples/configs/ssd_mobilenet_v1_pets.config +++ /dev/null @@ -1,196 +0,0 @@ -# SSD with Mobilenet v1, configured for Oxford-IIIT Pets Dataset. -# Users should configure the fine_tune_checkpoint field in the train config as -# well as the label_map_path and input_path fields in the train_input_reader and -# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that -# should be configured. - -model { - ssd { - num_classes: 37 - box_coder { - faster_rcnn_box_coder { - y_scale: 10.0 - x_scale: 10.0 - height_scale: 5.0 - width_scale: 5.0 - } - } - matcher { - argmax_matcher { - matched_threshold: 0.5 - unmatched_threshold: 0.5 - ignore_thresholds: false - negatives_lower_than_unmatched: true - force_match_for_each_row: true - } - } - similarity_calculator { - iou_similarity { - } - } - anchor_generator { - ssd_anchor_generator { - num_layers: 6 - min_scale: 0.2 - max_scale: 0.95 - aspect_ratios: 1.0 - aspect_ratios: 2.0 - aspect_ratios: 0.5 - aspect_ratios: 3.0 - aspect_ratios: 0.3333 - } - } - image_resizer { - fixed_shape_resizer { - height: 300 - width: 300 - } - } - box_predictor { - convolutional_box_predictor { - min_depth: 0 - max_depth: 0 - num_layers_before_predictor: 0 - use_dropout: false - dropout_keep_probability: 0.8 - kernel_size: 1 - box_code_size: 4 - apply_sigmoid_to_scores: false - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - } - feature_extractor { - type: 'ssd_mobilenet_v1' - min_depth: 16 - depth_multiplier: 1.0 - conv_hyperparams { - activation: RELU_6, - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - mean: 0.0 - } - } - batch_norm { - train: true, - scale: true, - center: true, - decay: 0.9997, - epsilon: 0.001, - } - } - } - loss { - classification_loss { - weighted_sigmoid { - anchorwise_output: true - } - } - localization_loss { - weighted_smooth_l1 { - anchorwise_output: true - } - } - hard_example_miner { - num_hard_examples: 3000 - iou_threshold: 0.99 - loss_type: CLASSIFICATION - max_negatives_per_positive: 3 - min_negatives_per_image: 0 - } - classification_weight: 1.0 - localization_weight: 1.0 - } - normalize_loss_by_num_matches: true - post_processing { - batch_non_max_suppression { - score_threshold: 1e-8 - iou_threshold: 0.6 - max_detections_per_class: 100 - max_total_detections: 100 - } - score_converter: SIGMOID - } - } -} - -train_config: { - batch_size: 24 - optimizer { - rms_prop_optimizer: { - learning_rate: { - exponential_decay_learning_rate { - initial_learning_rate: 0.004 - decay_steps: 800720 - decay_factor: 0.95 - } - } - momentum_optimizer_value: 0.9 - decay: 0.9 - epsilon: 1.0 - } - } - fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" - from_detection_checkpoint: true - # Note: The below line limits the training process to 200K steps, which we - # empirically found to be sufficient enough to train the pets dataset. This - # effectively bypasses the learning rate schedule (the learning rate will - # never decay). Remove the below line to train indefinitely. - num_steps: 200000 - data_augmentation_options { - random_horizontal_flip { - } - } - data_augmentation_options { - ssd_random_crop { - } - } -} - -train_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_train.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" -} - -eval_config: { - num_examples: 2000 - # Note: The below line limits the evaluation process to 10 evaluations. - # Remove the below line to evaluate indefinitely. - max_evals: 10 -} - -eval_input_reader: { - tf_record_input_reader { - input_path: "PATH_TO_BE_CONFIGURED/pet_val.record" - } - label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt" - shuffle: false - num_readers: 1 -} diff --git a/object_detection/test_images/image1.jpg b/object_detection/test_images/image1.jpg deleted file mode 100644 index 8b20d8af..00000000 Binary files a/object_detection/test_images/image1.jpg and /dev/null differ diff --git a/object_detection/test_images/image2.jpg b/object_detection/test_images/image2.jpg deleted file mode 100644 index 9eb325ac..00000000 Binary files a/object_detection/test_images/image2.jpg and /dev/null differ diff --git a/object_detection/test_images/image_info.txt b/object_detection/test_images/image_info.txt deleted file mode 100644 index 6f805cbc..00000000 --- a/object_detection/test_images/image_info.txt +++ /dev/null @@ -1,6 +0,0 @@ - -Image provenance: -image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg -image2.jpg: Michael Miley, - https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 - diff --git a/object_detection/train.py b/object_detection/train.py deleted file mode 100644 index faab1acc..00000000 --- a/object_detection/train.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Training executable for detection models. - -This executable is used to train DetectionModels. There are two ways of -configuring the training job: - -1) A single pipeline_pb2.TrainEvalPipelineConfig configuration file -can be specified by --pipeline_config_path. - -Example usage: - ./train \ - --logtostderr \ - --train_dir=path/to/train_dir \ - --pipeline_config_path=pipeline_config.pbtxt - -2) Three configuration files can be provided: a model_pb2.DetectionModel -configuration file to define what type of DetectionModel is being trained, an -input_reader_pb2.InputReader file to specify what training data will be used and -a train_pb2.TrainConfig file to configure training parameters. - -Example usage: - ./train \ - --logtostderr \ - --train_dir=path/to/train_dir \ - --model_config_path=model_config.pbtxt \ - --train_config_path=train_config.pbtxt \ - --input_config_path=train_input_config.pbtxt -""" - -import functools -import json -import os -import tensorflow as tf - -from object_detection import trainer -from object_detection.builders import input_reader_builder -from object_detection.builders import model_builder -from object_detection.utils import config_util - -tf.logging.set_verbosity(tf.logging.INFO) - -flags = tf.app.flags -flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.') -flags.DEFINE_integer('task', 0, 'task id') -flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.') -flags.DEFINE_boolean('clone_on_cpu', False, - 'Force clones to be deployed on CPU. Note that even if ' - 'set to False (allowing ops to run on gpu), some ops may ' - 'still be run on the CPU if they have no GPU kernel.') -flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer ' - 'replicas.') -flags.DEFINE_integer('ps_tasks', 0, - 'Number of parameter server tasks. If None, does not use ' - 'a parameter server.') -flags.DEFINE_string('train_dir', '', - 'Directory to save the checkpoints and training summaries.') - -flags.DEFINE_string('pipeline_config_path', '', - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file. If provided, other configs are ignored') - -flags.DEFINE_string('train_config_path', '', - 'Path to a train_pb2.TrainConfig config file.') -flags.DEFINE_string('input_config_path', '', - 'Path to an input_reader_pb2.InputReader config file.') -flags.DEFINE_string('model_config_path', '', - 'Path to a model_pb2.DetectionModel config file.') - -FLAGS = flags.FLAGS - - -def main(_): - assert FLAGS.train_dir, '`train_dir` is missing.' - if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) - if FLAGS.pipeline_config_path: - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - if FLAGS.task == 0: - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.train_dir, 'pipeline.config'), - overwrite=True) - else: - configs = config_util.get_configs_from_multiple_files( - model_config_path=FLAGS.model_config_path, - train_config_path=FLAGS.train_config_path, - train_input_config_path=FLAGS.input_config_path) - if FLAGS.task == 0: - for name, config in [('model.config', FLAGS.model_config_path), - ('train.config', FLAGS.train_config_path), - ('input.config', FLAGS.input_config_path)]: - tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name), - overwrite=True) - - model_config = configs['model'] - train_config = configs['train_config'] - input_config = configs['train_input_config'] - - model_fn = functools.partial( - model_builder.build, - model_config=model_config, - is_training=True) - - create_input_dict_fn = functools.partial( - input_reader_builder.build, input_config) - - env = json.loads(os.environ.get('TF_CONFIG', '{}')) - cluster_data = env.get('cluster', None) - cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None - task_data = env.get('task', None) or {'type': 'master', 'index': 0} - task_info = type('TaskSpec', (object,), task_data) - - # Parameters for a single worker. - ps_tasks = 0 - worker_replicas = 1 - worker_job_name = 'lonely_worker' - task = 0 - is_chief = True - master = '' - - if cluster_data and 'worker' in cluster_data: - # Number of total worker replicas include "worker"s and the "master". - worker_replicas = len(cluster_data['worker']) + 1 - if cluster_data and 'ps' in cluster_data: - ps_tasks = len(cluster_data['ps']) - - if worker_replicas > 1 and ps_tasks < 1: - raise ValueError('At least 1 ps task is needed for distributed training.') - - if worker_replicas >= 1 and ps_tasks > 0: - # Set up distributed training. - server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', - job_name=task_info.type, - task_index=task_info.index) - if task_info.type == 'ps': - server.join() - return - - worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) - task = task_info.index - is_chief = (task_info.type == 'master') - master = server.target - - trainer.train(create_input_dict_fn, model_fn, train_config, master, task, - FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks, - worker_job_name, is_chief, FLAGS.train_dir) - - -if __name__ == '__main__': - tf.app.run() diff --git a/object_detection/trainer.py b/object_detection/trainer.py deleted file mode 100644 index ea91777b..00000000 --- a/object_detection/trainer.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Detection model trainer. - -This file provides a generic training method that can be used to train a -DetectionModel. -""" - -import functools - -import tensorflow as tf - -from object_detection.builders import optimizer_builder -from object_detection.builders import preprocessor_builder -from object_detection.core import batcher -from object_detection.core import preprocessor -from object_detection.core import standard_fields as fields -from object_detection.utils import ops as util_ops -from object_detection.utils import variables_helper -from deployment import model_deploy - -slim = tf.contrib.slim - - -def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, - batch_queue_capacity, num_batch_queue_threads, - prefetch_queue_capacity, data_augmentation_options): - """Sets up reader, prefetcher and returns input queue. - - Args: - batch_size_per_clone: batch size to use per clone. - create_tensor_dict_fn: function to create tensor dictionary. - batch_queue_capacity: maximum number of elements to store within a queue. - num_batch_queue_threads: number of threads to use for batching. - prefetch_queue_capacity: maximum capacity of the queue used to prefetch - assembled batches. - data_augmentation_options: a list of tuples, where each tuple contains a - data augmentation function and a dictionary containing arguments and their - values (see preprocessor.py). - - Returns: - input queue: a batcher.BatchQueue object holding enqueued tensor_dicts - (which hold images, boxes and targets). To get a batch of tensor_dicts, - call input_queue.Dequeue(). - """ - tensor_dict = create_tensor_dict_fn() - - tensor_dict[fields.InputDataFields.image] = tf.expand_dims( - tensor_dict[fields.InputDataFields.image], 0) - - images = tensor_dict[fields.InputDataFields.image] - float_images = tf.to_float(images) - tensor_dict[fields.InputDataFields.image] = float_images - - include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks - in tensor_dict) - include_keypoints = (fields.InputDataFields.groundtruth_keypoints - in tensor_dict) - if data_augmentation_options: - tensor_dict = preprocessor.preprocess( - tensor_dict, data_augmentation_options, - func_arg_map=preprocessor.get_default_func_arg_map( - include_instance_masks=include_instance_masks, - include_keypoints=include_keypoints)) - - input_queue = batcher.BatchQueue( - tensor_dict, - batch_size=batch_size_per_clone, - batch_queue_capacity=batch_queue_capacity, - num_batch_queue_threads=num_batch_queue_threads, - prefetch_queue_capacity=prefetch_queue_capacity) - return input_queue - - -def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False): - """Dequeues batch and constructs inputs to object detection model. - - Args: - input_queue: BatchQueue object holding enqueued tensor_dicts. - num_classes: Number of classes. - merge_multiple_label_boxes: Whether to merge boxes with multiple labels - or not. Defaults to false. Merged boxes are represented with a single - box and a k-hot encoding of the multiple labels associated with the - boxes. - - Returns: - images: a list of 3-D float tensor of images. - image_keys: a list of string keys for the images. - locations_list: a list of tensors of shape [num_boxes, 4] - containing the corners of the groundtruth boxes. - classes_list: a list of padded one-hot tensors containing target classes. - masks_list: a list of 3-D float tensors of shape [num_boxes, image_height, - image_width] containing instance masks for objects if present in the - input_queue. Else returns None. - keypoints_list: a list of 3-D float tensors of shape [num_boxes, - num_keypoints, 2] containing keypoints for objects if present in the - input queue. Else returns None. - """ - read_data_list = input_queue.dequeue() - label_id_offset = 1 - def extract_images_and_targets(read_data): - """Extract images and targets from the input dict.""" - image = read_data[fields.InputDataFields.image] - key = '' - if fields.InputDataFields.source_id in read_data: - key = read_data[fields.InputDataFields.source_id] - location_gt = read_data[fields.InputDataFields.groundtruth_boxes] - classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes], - tf.int32) - classes_gt -= label_id_offset - if merge_multiple_label_boxes: - location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels( - location_gt, classes_gt, num_classes) - else: - classes_gt = util_ops.padded_one_hot_encoding( - indices=classes_gt, depth=num_classes, left_pad=0) - masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks) - keypoints_gt = read_data.get(fields.InputDataFields.groundtruth_keypoints) - if (merge_multiple_label_boxes and ( - masks_gt is not None or keypoints_gt is not None)): - raise NotImplementedError('Multi-label support is only for boxes.') - return image, key, location_gt, classes_gt, masks_gt, keypoints_gt - - return zip(*map(extract_images_and_targets, read_data_list)) - - -def _create_losses(input_queue, create_model_fn, train_config): - """Creates loss function for a DetectionModel. - - Args: - input_queue: BatchQueue object holding enqueued tensor_dicts. - create_model_fn: A function to create the DetectionModel. - train_config: a train_pb2.TrainConfig protobuf. - """ - detection_model = create_model_fn() - (images, _, groundtruth_boxes_list, groundtruth_classes_list, - groundtruth_masks_list, groundtruth_keypoints_list) = get_inputs( - input_queue, - detection_model.num_classes, - train_config.merge_multiple_label_boxes) - images = [detection_model.preprocess(image) for image in images] - images = tf.concat(images, 0) - if any(mask is None for mask in groundtruth_masks_list): - groundtruth_masks_list = None - if any(keypoints is None for keypoints in groundtruth_keypoints_list): - groundtruth_keypoints_list = None - - detection_model.provide_groundtruth(groundtruth_boxes_list, - groundtruth_classes_list, - groundtruth_masks_list, - groundtruth_keypoints_list) - prediction_dict = detection_model.predict(images) - - losses_dict = detection_model.loss(prediction_dict) - for loss_tensor in losses_dict.values(): - tf.losses.add_loss(loss_tensor) - - -def train(create_tensor_dict_fn, create_model_fn, train_config, master, task, - num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name, - is_chief, train_dir): - """Training function for detection models. - - Args: - create_tensor_dict_fn: a function to create a tensor input dictionary. - create_model_fn: a function that creates a DetectionModel and generates - losses. - train_config: a train_pb2.TrainConfig protobuf. - master: BNS name of the TensorFlow master to use. - task: The task id of this training instance. - num_clones: The number of clones to run per machine. - worker_replicas: The number of work replicas to train with. - clone_on_cpu: True if clones should be forced to run on CPU. - ps_tasks: Number of parameter server tasks. - worker_job_name: Name of the worker job. - is_chief: Whether this replica is the chief replica. - train_dir: Directory to write checkpoints and training summaries to. - """ - - detection_model = create_model_fn() - data_augmentation_options = [ - preprocessor_builder.build(step) - for step in train_config.data_augmentation_options] - - with tf.Graph().as_default(): - # Build a configuration specifying multi-GPU and multi-replicas. - deploy_config = model_deploy.DeploymentConfig( - num_clones=num_clones, - clone_on_cpu=clone_on_cpu, - replica_id=task, - num_replicas=worker_replicas, - num_ps_tasks=ps_tasks, - worker_job_name=worker_job_name) - - # Place the global step on the device storing the variables. - with tf.device(deploy_config.variables_device()): - global_step = slim.create_global_step() - - with tf.device(deploy_config.inputs_device()): - input_queue = create_input_queue( - train_config.batch_size // num_clones, create_tensor_dict_fn, - train_config.batch_queue_capacity, - train_config.num_batch_queue_threads, - train_config.prefetch_queue_capacity, data_augmentation_options) - - # Gather initial summaries. - # TODO(rathodv): See if summaries can be added/extracted from global tf - # collections so that they don't have to be passed around. - summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) - global_summaries = set([]) - - model_fn = functools.partial(_create_losses, - create_model_fn=create_model_fn, - train_config=train_config) - clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue]) - first_clone_scope = clones[0].scope - - # Gather update_ops from the first clone. These contain, for example, - # the updates for the batch_norm variables created by model_fn. - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) - - with tf.device(deploy_config.optimizer_device()): - training_optimizer = optimizer_builder.build(train_config.optimizer, - global_summaries) - - sync_optimizer = None - if train_config.sync_replicas: - training_optimizer = tf.SyncReplicasOptimizer( - training_optimizer, - replicas_to_aggregate=train_config.replicas_to_aggregate, - total_num_replicas=train_config.worker_replicas) - sync_optimizer = training_optimizer - - # Create ops required to initialize the model from a given checkpoint. - init_fn = None - if train_config.fine_tune_checkpoint: - var_map = detection_model.restore_map( - from_detection_checkpoint=train_config.from_detection_checkpoint) - available_var_map = (variables_helper. - get_variables_available_in_checkpoint( - var_map, train_config.fine_tune_checkpoint)) - init_saver = tf.train.Saver(available_var_map) - def initializer_fn(sess): - init_saver.restore(sess, train_config.fine_tune_checkpoint) - init_fn = initializer_fn - - with tf.device(deploy_config.optimizer_device()): - total_loss, grads_and_vars = model_deploy.optimize_clones( - clones, training_optimizer, regularization_losses=None) - total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.') - - # Optionally multiply bias gradients by train_config.bias_grad_multiplier. - if train_config.bias_grad_multiplier: - biases_regex_list = ['.*/biases'] - grads_and_vars = variables_helper.multiply_gradients_matching_regex( - grads_and_vars, - biases_regex_list, - multiplier=train_config.bias_grad_multiplier) - - # Optionally freeze some layers by setting their gradients to be zero. - if train_config.freeze_variables: - grads_and_vars = variables_helper.freeze_gradients_matching_regex( - grads_and_vars, train_config.freeze_variables) - - # Optionally clip gradients - if train_config.gradient_clipping_by_norm > 0: - with tf.name_scope('clip_grads'): - grads_and_vars = slim.learning.clip_gradient_norms( - grads_and_vars, train_config.gradient_clipping_by_norm) - - # Create gradient updates. - grad_updates = training_optimizer.apply_gradients(grads_and_vars, - global_step=global_step) - update_ops.append(grad_updates) - - update_op = tf.group(*update_ops) - with tf.control_dependencies([update_op]): - train_tensor = tf.identity(total_loss, name='train_op') - - # Add summaries. - for model_var in slim.get_model_variables(): - global_summaries.add(tf.summary.histogram(model_var.op.name, model_var)) - for loss_tensor in tf.losses.get_losses(): - global_summaries.add(tf.summary.scalar(loss_tensor.op.name, loss_tensor)) - global_summaries.add( - tf.summary.scalar('TotalLoss', tf.losses.get_total_loss())) - - # Add the summaries from the first clone. These contain the summaries - # created by model_fn and either optimize_clones() or _gather_clone_loss(). - summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, - first_clone_scope)) - summaries |= global_summaries - - # Merge all summaries together. - summary_op = tf.summary.merge(list(summaries), name='summary_op') - - # Soft placement allows placing on CPU ops without GPU implementation. - session_config = tf.ConfigProto(allow_soft_placement=True, - log_device_placement=False) - - # Save checkpoints regularly. - keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours - saver = tf.train.Saver( - keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) - - slim.learning.train( - train_tensor, - logdir=train_dir, - master=master, - is_chief=is_chief, - session_config=session_config, - startup_delay_steps=train_config.startup_delay_steps, - init_fn=init_fn, - summary_op=summary_op, - number_of_steps=( - train_config.num_steps if train_config.num_steps else None), - save_summaries_secs=120, - sync_optimizer=sync_optimizer, - saver=saver) diff --git a/object_detection/trainer_test.py b/object_detection/trainer_test.py deleted file mode 100644 index caa8c1eb..00000000 --- a/object_detection/trainer_test.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.trainer.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection import trainer -from object_detection.core import losses -from object_detection.core import model -from object_detection.core import standard_fields as fields -from object_detection.protos import train_pb2 - - -NUMBER_OF_CLASSES = 2 - - -def get_input_function(): - """A function to get test inputs. Returns an image with one box.""" - image = tf.random_uniform([32, 32, 3], dtype=tf.float32) - key = tf.constant('image_000000') - class_label = tf.random_uniform( - [1], minval=0, maxval=NUMBER_OF_CLASSES, dtype=tf.int32) - box_label = tf.random_uniform( - [1, 4], minval=0.4, maxval=0.6, dtype=tf.float32) - - return { - fields.InputDataFields.image: image, - fields.InputDataFields.key: key, - fields.InputDataFields.groundtruth_classes: class_label, - fields.InputDataFields.groundtruth_boxes: box_label - } - - -class FakeDetectionModel(model.DetectionModel): - """A simple (and poor) DetectionModel for use in test.""" - - def __init__(self): - super(FakeDetectionModel, self).__init__(num_classes=NUMBER_OF_CLASSES) - self._classification_loss = losses.WeightedSigmoidClassificationLoss( - anchorwise_output=True) - self._localization_loss = losses.WeightedSmoothL1LocalizationLoss( - anchorwise_output=True) - - def preprocess(self, inputs): - """Input preprocessing, resizes images to 28x28. - - Args: - inputs: a [batch, height_in, width_in, channels] float32 tensor - representing a batch of images with values between 0 and 255.0. - - Returns: - preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor. - """ - return tf.image.resize_images(inputs, [28, 28]) - - def predict(self, preprocessed_inputs): - """Prediction tensors from inputs tensor. - - Args: - preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor. - - Returns: - prediction_dict: a dictionary holding prediction tensors to be - passed to the Loss or Postprocess functions. - """ - flattened_inputs = tf.contrib.layers.flatten(preprocessed_inputs) - class_prediction = tf.contrib.layers.fully_connected( - flattened_inputs, self._num_classes) - box_prediction = tf.contrib.layers.fully_connected(flattened_inputs, 4) - - return { - 'class_predictions_with_background': tf.reshape( - class_prediction, [-1, 1, self._num_classes]), - 'box_encodings': tf.reshape(box_prediction, [-1, 1, 4]) - } - - def postprocess(self, prediction_dict, **params): - """Convert predicted output tensors to final detections. Unused. - - Args: - prediction_dict: a dictionary holding prediction tensors. - **params: Additional keyword arguments for specific implementations of - DetectionModel. - - Returns: - detections: a dictionary with empty fields. - """ - return { - 'detection_boxes': None, - 'detection_scores': None, - 'detection_classes': None, - 'num_detections': None - } - - def loss(self, prediction_dict): - """Compute scalar loss tensors with respect to provided groundtruth. - - Calling this function requires that groundtruth tensors have been - provided via the provide_groundtruth function. - - Args: - prediction_dict: a dictionary holding predicted tensors - - Returns: - a dictionary mapping strings (loss names) to scalar tensors representing - loss values. - """ - batch_reg_targets = tf.stack( - self.groundtruth_lists(fields.BoxListFields.boxes)) - batch_cls_targets = tf.stack( - self.groundtruth_lists(fields.BoxListFields.classes)) - weights = tf.constant( - 1.0, dtype=tf.float32, - shape=[len(self.groundtruth_lists(fields.BoxListFields.boxes)), 1]) - - location_losses = self._localization_loss( - prediction_dict['box_encodings'], batch_reg_targets, - weights=weights) - cls_losses = self._classification_loss( - prediction_dict['class_predictions_with_background'], batch_cls_targets, - weights=weights) - - loss_dict = { - 'localization_loss': tf.reduce_sum(location_losses), - 'classification_loss': tf.reduce_sum(cls_losses), - } - return loss_dict - - def restore_map(self, from_detection_checkpoint=True): - """Returns a map of variables to load from a foreign checkpoint. - - Args: - from_detection_checkpoint: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - - Returns: - A dict mapping variable names to variables. - """ - return {var.op.name: var for var in tf.global_variables()} - - -class TrainerTest(tf.test.TestCase): - - def test_configure_trainer_and_train_two_steps(self): - train_config_text_proto = """ - optimizer { - adam_optimizer { - learning_rate { - constant_learning_rate { - learning_rate: 0.01 - } - } - } - } - data_augmentation_options { - random_adjust_brightness { - max_delta: 0.2 - } - } - data_augmentation_options { - random_adjust_contrast { - min_delta: 0.7 - max_delta: 1.1 - } - } - num_steps: 2 - """ - train_config = train_pb2.TrainConfig() - text_format.Merge(train_config_text_proto, train_config) - - train_dir = self.get_temp_dir() - - trainer.train(create_tensor_dict_fn=get_input_function, - create_model_fn=FakeDetectionModel, - train_config=train_config, - master='', - task=0, - num_clones=1, - worker_replicas=1, - clone_on_cpu=True, - ps_tasks=0, - worker_job_name='worker', - is_chief=True, - train_dir=train_dir) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/BUILD b/object_detection/utils/BUILD deleted file mode 100644 index 7e511c95..00000000 --- a/object_detection/utils/BUILD +++ /dev/null @@ -1,324 +0,0 @@ -# Tensorflow Object Detection API: Utility functions. - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -# Apache 2.0 - -py_library( - name = "category_util", - srcs = ["category_util.py"], - deps = ["//tensorflow"], -) - -py_library( - name = "config_util", - srcs = ["config_util.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/protos:eval_py_pb2", - "//tensorflow_models/object_detection/protos:input_reader_py_pb2", - "//tensorflow_models/object_detection/protos:model_py_pb2", - "//tensorflow_models/object_detection/protos:pipeline_py_pb2", - "//tensorflow_models/object_detection/protos:train_py_pb2", - ], -) - -py_library( - name = "dataset_util", - srcs = ["dataset_util.py"], - deps = [ - "//tensorflow", - ], -) - -py_library( - name = "label_map_util", - srcs = ["label_map_util.py"], - deps = [ - "//third_party/py/google/protobuf", - "//tensorflow", - "//tensorflow_models/object_detection/protos:string_int_label_map_py_pb2", - ], -) - -py_library( - name = "learning_schedules", - srcs = ["learning_schedules.py"], - deps = [ - "//tensorflow", - ], -) - -py_library( - name = "metrics", - srcs = ["metrics.py"], - deps = ["//third_party/py/numpy"], -) - -py_library( - name = "np_box_list", - srcs = ["np_box_list.py"], - deps = ["//tensorflow"], -) - -py_library( - name = "np_box_list_ops", - srcs = ["np_box_list_ops.py"], - deps = [ - ":np_box_list", - ":np_box_ops", - "//tensorflow", - ], -) - -py_library( - name = "np_box_ops", - srcs = ["np_box_ops.py"], - deps = ["//tensorflow"], -) - -py_library( - name = "object_detection_evaluation", - srcs = ["object_detection_evaluation.py"], - deps = [ - ":label_map_util", - ":metrics", - ":per_image_evaluation", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_library( - name = "ops", - srcs = ["ops.py"], - deps = [ - ":static_shape", - "//tensorflow", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:box_list_ops", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_library( - name = "per_image_evaluation", - srcs = ["per_image_evaluation.py"], - deps = [ - ":np_box_list", - ":np_box_list_ops", - "//tensorflow", - ], -) - -py_library( - name = "shape_utils", - srcs = ["shape_utils.py"], - deps = ["//tensorflow"], -) - -py_library( - name = "static_shape", - srcs = ["static_shape.py"], - deps = [], -) - -py_library( - name = "test_utils", - srcs = ["test_utils.py"], - deps = [ - "//tensorflow", - "//tensorflow_models/object_detection/core:anchor_generator", - "//tensorflow_models/object_detection/core:box_coder", - "//tensorflow_models/object_detection/core:box_list", - "//tensorflow_models/object_detection/core:box_predictor", - "//tensorflow_models/object_detection/core:matcher", - "//tensorflow_models/object_detection/utils:shape_utils", - ], -) - -py_library( - name = "variables_helper", - srcs = ["variables_helper.py"], - deps = [ - "//tensorflow", - ], -) - -py_library( - name = "visualization_utils", - srcs = ["visualization_utils.py"], - deps = [ - "//third_party/py/PIL:pil", - "//third_party/py/matplotlib", - "//third_party/py/six", - "//tensorflow", - ], -) - -py_test( - name = "category_util_test", - srcs = ["category_util_test.py"], - deps = [ - ":category_util", - "//tensorflow", - ], -) - -py_test( - name = "config_util_test", - srcs = ["config_util_test.py"], - deps = [ - ":config_util", - "//tensorflow:tensorflow_google", - "//tensorflow_models/object_detection/protos:input_reader_py_pb2", - "//tensorflow_models/object_detection/protos:model_py_pb2", - "//tensorflow_models/object_detection/protos:pipeline_py_pb2", - "//tensorflow_models/object_detection/protos:train_py_pb2", - ], -) - -py_test( - name = "dataset_util_test", - srcs = ["dataset_util_test.py"], - deps = [ - ":dataset_util", - "//tensorflow", - ], -) - -py_test( - name = "label_map_util_test", - srcs = ["label_map_util_test.py"], - deps = [ - ":label_map_util", - "//tensorflow", - ], -) - -py_test( - name = "learning_schedules_test", - srcs = ["learning_schedules_test.py"], - deps = [ - ":learning_schedules", - "//tensorflow", - ], -) - -py_test( - name = "metrics_test", - srcs = ["metrics_test.py"], - deps = [ - ":metrics", - "//tensorflow", - ], -) - -py_test( - name = "np_box_list_test", - srcs = ["np_box_list_test.py"], - deps = [ - ":np_box_list", - "//tensorflow", - ], -) - -py_test( - name = "np_box_list_ops_test", - srcs = ["np_box_list_ops_test.py"], - deps = [ - ":np_box_list", - ":np_box_list_ops", - "//tensorflow", - ], -) - -py_test( - name = "np_box_ops_test", - srcs = ["np_box_ops_test.py"], - deps = [ - ":np_box_ops", - "//tensorflow", - ], -) - -py_test( - name = "object_detection_evaluation_test", - srcs = ["object_detection_evaluation_test.py"], - deps = [ - ":object_detection_evaluation", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_test( - name = "ops_test", - srcs = ["ops_test.py"], - deps = [ - ":ops", - "//tensorflow", - "//tensorflow_models/object_detection/core:standard_fields", - ], -) - -py_test( - name = "per_image_evaluation_test", - srcs = ["per_image_evaluation_test.py"], - deps = [ - ":per_image_evaluation", - "//tensorflow", - ], -) - -py_test( - name = "shape_utils_test", - srcs = ["shape_utils_test.py"], - deps = [ - ":shape_utils", - "//tensorflow", - ], -) - -py_test( - name = "static_shape_test", - srcs = ["static_shape_test.py"], - deps = [ - ":static_shape", - "//tensorflow", - ], -) - -py_test( - name = "test_utils_test", - srcs = ["test_utils_test.py"], - deps = [ - ":test_utils", - "//tensorflow", - ], -) - -py_test( - name = "variables_helper_test", - srcs = ["variables_helper_test.py"], - deps = [ - ":variables_helper", - "//tensorflow", - ], -) - -py_test( - name = "visualization_utils_test", - srcs = ["visualization_utils_test.py"], - data = [ - "//tensorflow_models/object_detection/test_images:image1.jpg", - ], - deps = [ - ":visualization_utils", - "//third_party/py/PIL:pil", - ], -) diff --git a/object_detection/utils/__init__.py b/object_detection/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/object_detection/utils/__pycache__/__init__.cpython-35.pyc b/object_detection/utils/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index d87697a2..00000000 Binary files a/object_detection/utils/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc b/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc deleted file mode 100644 index 9d680ab0..00000000 Binary files a/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc b/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc deleted file mode 100644 index a26f73b2..00000000 Binary files a/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/ops.cpython-35.pyc b/object_detection/utils/__pycache__/ops.cpython-35.pyc deleted file mode 100644 index 66cbd75d..00000000 Binary files a/object_detection/utils/__pycache__/ops.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc b/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc deleted file mode 100644 index 0b99fd7f..00000000 Binary files a/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/static_shape.cpython-35.pyc b/object_detection/utils/__pycache__/static_shape.cpython-35.pyc deleted file mode 100644 index 2e303314..00000000 Binary files a/object_detection/utils/__pycache__/static_shape.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc b/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc deleted file mode 100644 index c7fd5b5a..00000000 Binary files a/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc and /dev/null differ diff --git a/object_detection/utils/category_util.py b/object_detection/utils/category_util.py deleted file mode 100644 index fdd9c1c1..00000000 --- a/object_detection/utils/category_util.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions for importing/exporting Object Detection categories.""" -import csv - -import tensorflow as tf - - -def load_categories_from_csv_file(csv_path): - """Loads categories from a csv file. - - The CSV file should have one comma delimited numeric category id and string - category name pair per line. For example: - - 0,"cat" - 1,"dog" - 2,"bird" - ... - - Args: - csv_path: Path to the csv file to be parsed into categories. - Returns: - categories: A list of dictionaries representing all possible categories. - The categories will contain an integer 'id' field and a string - 'name' field. - Raises: - ValueError: If the csv file is incorrectly formatted. - """ - categories = [] - - with tf.gfile.Open(csv_path, 'r') as csvfile: - reader = csv.reader(csvfile, delimiter=',', quotechar='"') - for row in reader: - if not row: - continue - - if len(row) != 2: - raise ValueError('Expected 2 fields per row in csv: %s' % ','.join(row)) - - category_id = int(row[0]) - category_name = row[1] - categories.append({'id': category_id, 'name': category_name}) - - return categories - - -def save_categories_to_csv_file(categories, csv_path): - """Saves categories to a csv file. - - Args: - categories: A list of dictionaries representing categories to save to file. - Each category must contain an 'id' and 'name' field. - csv_path: Path to the csv file to be parsed into categories. - """ - categories.sort(key=lambda x: x['id']) - with tf.gfile.Open(csv_path, 'w') as csvfile: - writer = csv.writer(csvfile, delimiter=',', quotechar='"') - for category in categories: - writer.writerow([category['id'], category['name']]) diff --git a/object_detection/utils/category_util_test.py b/object_detection/utils/category_util_test.py deleted file mode 100644 index 9c99079e..00000000 --- a/object_detection/utils/category_util_test.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.category_util.""" -import os - -import tensorflow as tf - -from object_detection.utils import category_util - - -class EvalUtilTest(tf.test.TestCase): - - def test_load_categories_from_csv_file(self): - csv_data = """ - 0,"cat" - 1,"dog" - 2,"bird" - """.strip(' ') - csv_path = os.path.join(self.get_temp_dir(), 'test.csv') - with tf.gfile.Open(csv_path, 'wb') as f: - f.write(csv_data) - - categories = category_util.load_categories_from_csv_file(csv_path) - self.assertTrue({'id': 0, 'name': 'cat'} in categories) - self.assertTrue({'id': 1, 'name': 'dog'} in categories) - self.assertTrue({'id': 2, 'name': 'bird'} in categories) - - def test_save_categories_to_csv_file(self): - categories = [ - {'id': 0, 'name': 'cat'}, - {'id': 1, 'name': 'dog'}, - {'id': 2, 'name': 'bird'}, - ] - csv_path = os.path.join(self.get_temp_dir(), 'test.csv') - category_util.save_categories_to_csv_file(categories, csv_path) - saved_categories = category_util.load_categories_from_csv_file(csv_path) - self.assertEqual(saved_categories, categories) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/config_util.py b/object_detection/utils/config_util.py deleted file mode 100644 index 1bf30089..00000000 --- a/object_detection/utils/config_util.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for reading and updating configuration files.""" - -import tensorflow as tf - -from google.protobuf import text_format - -from object_detection.protos import eval_pb2 -from object_detection.protos import input_reader_pb2 -from object_detection.protos import model_pb2 -from object_detection.protos import pipeline_pb2 -from object_detection.protos import train_pb2 - - -def get_configs_from_pipeline_file(pipeline_config_path): - """Reads configuration from a pipeline_pb2.TrainEvalPipelineConfig. - - Args: - pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text - proto. - - Returns: - Dictionary of configuration objects. Keys are `model`, `train_config`, - `train_input_config`, `eval_config`, `eval_input_config`. Value are the - corresponding config objects. - """ - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - with tf.gfile.GFile(pipeline_config_path, "r") as f: - proto_str = f.read() - text_format.Merge(proto_str, pipeline_config) - - configs = {} - configs["model"] = pipeline_config.model - configs["train_config"] = pipeline_config.train_config - configs["train_input_config"] = pipeline_config.train_input_reader - configs["eval_config"] = pipeline_config.eval_config - configs["eval_input_config"] = pipeline_config.eval_input_reader - - return configs - - -def create_pipeline_proto_from_configs(configs): - """Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary. - - This function nearly performs the inverse operation of - get_configs_from_pipeline_file(). Instead of returning a file path, it returns - a `TrainEvalPipelineConfig` object. - - Args: - configs: Dictionary of configs. See get_configs_from_pipeline_file(). - - Returns: - A fully populated pipeline_pb2.TrainEvalPipelineConfig. - """ - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.model.CopyFrom(configs["model"]) - pipeline_config.train_config.CopyFrom(configs["train_config"]) - pipeline_config.train_input_reader.CopyFrom(configs["train_input_config"]) - pipeline_config.eval_config.CopyFrom(configs["eval_config"]) - pipeline_config.eval_input_reader.CopyFrom(configs["eval_input_config"]) - return pipeline_config - - -def get_configs_from_multiple_files(model_config_path="", - train_config_path="", - train_input_config_path="", - eval_config_path="", - eval_input_config_path=""): - """Reads training configuration from multiple config files. - - Args: - model_config_path: Path to model_pb2.DetectionModel. - train_config_path: Path to train_pb2.TrainConfig. - train_input_config_path: Path to input_reader_pb2.InputReader. - eval_config_path: Path to eval_pb2.EvalConfig. - eval_input_config_path: Path to input_reader_pb2.InputReader. - - Returns: - Dictionary of configuration objects. Keys are `model`, `train_config`, - `train_input_config`, `eval_config`, `eval_input_config`. Key/Values are - returned only for valid (non-empty) strings. - """ - configs = {} - if model_config_path: - model_config = model_pb2.DetectionModel() - with tf.gfile.GFile(model_config_path, "r") as f: - text_format.Merge(f.read(), model_config) - configs["model"] = model_config - - if train_config_path: - train_config = train_pb2.TrainConfig() - with tf.gfile.GFile(train_config_path, "r") as f: - text_format.Merge(f.read(), train_config) - configs["train_config"] = train_config - - if train_input_config_path: - train_input_config = input_reader_pb2.InputReader() - with tf.gfile.GFile(train_input_config_path, "r") as f: - text_format.Merge(f.read(), train_input_config) - configs["train_input_config"] = train_input_config - - if eval_config_path: - eval_config = eval_pb2.EvalConfig() - with tf.gfile.GFile(eval_config_path, "r") as f: - text_format.Merge(f.read(), eval_config) - configs["eval_config"] = eval_config - - if eval_input_config_path: - eval_input_config = input_reader_pb2.InputReader() - with tf.gfile.GFile(eval_input_config_path, "r") as f: - text_format.Merge(f.read(), eval_input_config) - configs["eval_input_config"] = eval_input_config - - return configs - - -def get_number_of_classes(model_config): - """Returns the number of classes for a detection model. - - Args: - model_config: A model_pb2.DetectionModel. - - Returns: - Number of classes. - - Raises: - ValueError: If the model type is not recognized. - """ - meta_architecture = model_config.WhichOneof("model") - if meta_architecture == "faster_rcnn": - return model_config.faster_rcnn.num_classes - if meta_architecture == "ssd": - return model_config.ssd.num_classes - - raise ValueError("Expected the model to be one of 'faster_rcnn' or 'ssd'.") - - -def get_optimizer_type(train_config): - """Returns the optimizer type for training. - - Args: - train_config: A train_pb2.TrainConfig. - - Returns: - The type of the optimizer - """ - return train_config.optimizer.WhichOneof("optimizer") - - -def get_learning_rate_type(optimizer_config): - """Returns the learning rate type for training. - - Args: - optimizer_config: An optimizer_pb2.Optimizer. - - Returns: - The type of the learning rate. - """ - return optimizer_config.learning_rate.WhichOneof("learning_rate") - - -def merge_external_params_with_configs(configs, hparams=None, **kwargs): - """Updates `configs` dictionary based on supplied parameters. - - This utility is for modifying specific fields in the object detection configs. - Say that one would like to experiment with different learning rates, momentum - values, or batch sizes. Rather than creating a new config text file for each - experiment, one can use a single base config file, and update particular - values. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - hparams: A `HParams`. - **kwargs: Extra keyword arguments that are treated the same way as - attribute/value pairs in `hparams`. Note that hyperparameters with the - same names will override keyword arguments. - - Returns: - `configs` dictionary. - """ - - if hparams: - kwargs.update(hparams.values()) - for key, value in kwargs.items(): - if key == "learning_rate": - _update_initial_learning_rate(configs, value) - tf.logging.info("Overwriting learning rate: %f", value) - if key == "batch_size": - _update_batch_size(configs, value) - tf.logging.info("Overwriting batch size: %d", value) - if key == "momentum_optimizer_value": - _update_momentum_optimizer_value(configs, value) - tf.logging.info("Overwriting momentum optimizer value: %f", value) - if key == "classification_localization_weight_ratio": - # Localization weight is fixed to 1.0. - _update_classification_localization_weight_ratio(configs, value) - if key == "focal_loss_gamma": - _update_focal_loss_gamma(configs, value) - if key == "focal_loss_alpha": - _update_focal_loss_alpha(configs, value) - if key == "train_steps": - _update_train_steps(configs, value) - tf.logging.info("Overwriting train steps: %d", value) - if key == "eval_steps": - _update_eval_steps(configs, value) - tf.logging.info("Overwriting eval steps: %d", value) - if key == "train_input_path": - _update_input_path(configs["train_input_config"], value) - tf.logging.info("Overwriting train input path: %s", value) - if key == "eval_input_path": - _update_input_path(configs["eval_input_config"], value) - tf.logging.info("Overwriting eval input path: %s", value) - if key == "label_map_path": - if value: - _update_label_map_path(configs, value) - tf.logging.info("Overwriting label map path: %s", value) - return configs - - -def _update_initial_learning_rate(configs, learning_rate): - """Updates `configs` to reflect the new initial learning rate. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - learning_rate: Initial learning rate for optimizer. - - Raises: - TypeError: if optimizer type is not supported, or if learning rate type is - not supported. - """ - - optimizer_type = get_optimizer_type(configs["train_config"]) - if optimizer_type == "rms_prop_optimizer": - optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer - elif optimizer_type == "momentum_optimizer": - optimizer_config = configs["train_config"].optimizer.momentum_optimizer - elif optimizer_type == "adam_optimizer": - optimizer_config = configs["train_config"].optimizer.adam_optimizer - else: - raise TypeError("Optimizer %s is not supported." % optimizer_type) - - learning_rate_type = get_learning_rate_type(optimizer_config) - if learning_rate_type == "constant_learning_rate": - constant_lr = optimizer_config.learning_rate.constant_learning_rate - constant_lr.learning_rate = learning_rate - elif learning_rate_type == "exponential_decay_learning_rate": - exponential_lr = ( - optimizer_config.learning_rate.exponential_decay_learning_rate) - exponential_lr.initial_learning_rate = learning_rate - elif learning_rate_type == "manual_step_learning_rate": - manual_lr = optimizer_config.learning_rate.manual_step_learning_rate - original_learning_rate = manual_lr.initial_learning_rate - learning_rate_scaling = float(learning_rate) / original_learning_rate - manual_lr.initial_learning_rate = learning_rate - for schedule in manual_lr.schedule: - schedule.learning_rate *= learning_rate_scaling - else: - raise TypeError("Learning rate %s is not supported." % learning_rate_type) - - -def _update_batch_size(configs, batch_size): - """Updates `configs` to reflect the new training batch size. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - batch_size: Batch size to use for training (Ideally a power of 2). Inputs - are rounded, and capped to be 1 or greater. - """ - configs["train_config"].batch_size = max(1, int(round(batch_size))) - - -def _update_momentum_optimizer_value(configs, momentum): - """Updates `configs` to reflect the new momentum value. - - Momentum is only supported for RMSPropOptimizer and MomentumOptimizer. For any - other optimizer, no changes take place. The configs dictionary is updated in - place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - momentum: New momentum value. Values are clipped at 0.0 and 1.0. - - Raises: - TypeError: If the optimizer type is not `rms_prop_optimizer` or - `momentum_optimizer`. - """ - optimizer_type = get_optimizer_type(configs["train_config"]) - if optimizer_type == "rms_prop_optimizer": - optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer - elif optimizer_type == "momentum_optimizer": - optimizer_config = configs["train_config"].optimizer.momentum_optimizer - else: - raise TypeError("Optimizer type must be one of `rms_prop_optimizer` or " - "`momentum_optimizer`.") - - optimizer_config.momentum_optimizer_value = min(max(0.0, momentum), 1.0) - - -def _update_classification_localization_weight_ratio(configs, ratio): - """Updates the classification/localization weight loss ratio. - - Detection models usually define a loss weight for both classification and - objectness. This function updates the weights such that the ratio between - classification weight to localization weight is the ratio provided. - Arbitrarily, localization weight is set to 1.0. - - Note that in the case of Faster R-CNN, this same ratio is applied to the first - stage objectness loss weight relative to localization loss weight. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - ratio: Desired ratio of classification (and/or objectness) loss weight to - localization loss weight. - """ - meta_architecture = configs["model"].WhichOneof("model") - if meta_architecture == "faster_rcnn": - model = configs["model"].faster_rcnn - model.first_stage_localization_loss_weight = 1.0 - model.first_stage_objectness_loss_weight = ratio - model.second_stage_localization_loss_weight = 1.0 - model.second_stage_classification_loss_weight = ratio - if meta_architecture == "ssd": - model = configs["model"].ssd - model.loss.localization_weight = 1.0 - model.loss.classification_weight = ratio - - -def _get_classification_loss(model_config): - """Returns the classification loss for a model.""" - meta_architecture = model_config.WhichOneof("model") - if meta_architecture == "faster_rcnn": - model = model_config.faster_rcnn - classification_loss = model.second_stage_classification_loss - if meta_architecture == "ssd": - model = model_config.ssd - classification_loss = model.loss.classification_loss - else: - raise TypeError("Did not recognize the model architecture.") - return classification_loss - - -def _update_focal_loss_gamma(configs, gamma): - """Updates the gamma value for a sigmoid focal loss. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - gamma: Exponent term in focal loss. - - Raises: - TypeError: If the classification loss is not `weighted_sigmoid_focal`. - """ - classification_loss = _get_classification_loss(configs["model"]) - classification_loss_type = classification_loss.WhichOneof( - "classification_loss") - if classification_loss_type != "weighted_sigmoid_focal": - raise TypeError("Classification loss must be `weighted_sigmoid_focal`.") - classification_loss.weighted_sigmoid_focal.gamma = gamma - - -def _update_focal_loss_alpha(configs, alpha): - """Updates the alpha value for a sigmoid focal loss. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - alpha: Class weight multiplier for sigmoid loss. - - Raises: - TypeError: If the classification loss is not `weighted_sigmoid_focal`. - """ - classification_loss = _get_classification_loss(configs["model"]) - classification_loss_type = classification_loss.WhichOneof( - "classification_loss") - if classification_loss_type != "weighted_sigmoid_focal": - raise TypeError("Classification loss must be `weighted_sigmoid_focal`.") - classification_loss.weighted_sigmoid_focal.alpha = alpha - - -def _update_train_steps(configs, train_steps): - """Updates `configs` to reflect new number of training steps.""" - configs["train_config"].num_steps = int(train_steps) - - -def _update_eval_steps(configs, eval_steps): - """Updates `configs` to reflect new number of eval steps per evaluation.""" - configs["eval_config"].num_examples = int(eval_steps) - - -def _update_input_path(input_config, input_path): - """Updates input configuration to reflect a new input path. - - The input_config object is updated in place, and hence not returned. - - Args: - input_config: A input_reader_pb2.InputReader. - input_path: A path to data or list of paths. - - Raises: - TypeError: if input reader type is not `tf_record_input_reader`. - """ - input_reader_type = input_config.WhichOneof("input_reader") - if input_reader_type == "tf_record_input_reader": - input_config.tf_record_input_reader.ClearField("input_path") - if isinstance(input_path, list): - input_config.tf_record_input_reader.input_path.extend(input_path) - else: - input_config.tf_record_input_reader.input_path.append(input_path) - else: - raise TypeError("Input reader type must be `tf_record_input_reader`.") - - -def _update_label_map_path(configs, label_map_path): - """Updates the label map path for both train and eval input readers. - - The configs dictionary is updated in place, and hence not returned. - - Args: - configs: Dictionary of configuration objects. See outputs from - get_configs_from_pipeline_file() or get_configs_from_multiple_files(). - label_map_path: New path to `StringIntLabelMap` pbtxt file. - """ - configs["train_input_config"].label_map_path = label_map_path - configs["eval_input_config"].label_map_path = label_map_path diff --git a/object_detection/utils/config_util_test.py b/object_detection/utils/config_util_test.py deleted file mode 100644 index 075509e8..00000000 --- a/object_detection/utils/config_util_test.py +++ /dev/null @@ -1,401 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.utils.config_util.""" - -import os - -import google3 -import tensorflow.google as tf - -from google.protobuf import text_format - -from object_detection.protos import eval_pb2 -from object_detection.protos import input_reader_pb2 -from object_detection.protos import model_pb2 -from object_detection.protos import pipeline_pb2 -from object_detection.protos import train_pb2 -from object_detection.utils import config_util - - -def _write_config(config, config_path): - """Writes a config object to disk.""" - config_text = text_format.MessageToString(config) - with tf.gfile.Open(config_path, "wb") as f: - f.write(config_text) - - -def _update_optimizer_with_constant_learning_rate(optimizer, learning_rate): - """Adds a new constant learning rate.""" - constant_lr = optimizer.learning_rate.constant_learning_rate - constant_lr.learning_rate = learning_rate - - -def _update_optimizer_with_exponential_decay_learning_rate( - optimizer, learning_rate): - """Adds a new exponential decay learning rate.""" - exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate - exponential_lr.initial_learning_rate = learning_rate - - -def _update_optimizer_with_manual_step_learning_rate( - optimizer, initial_learning_rate, learning_rate_scaling): - """Adds a learning rate schedule.""" - manual_lr = optimizer.learning_rate.manual_step_learning_rate - manual_lr.initial_learning_rate = initial_learning_rate - for i in range(3): - schedule = manual_lr.schedule.add() - schedule.learning_rate = initial_learning_rate * learning_rate_scaling**i - - -class ConfigUtilTest(tf.test.TestCase): - - def test_get_configs_from_pipeline_file(self): - """Test that proto configs can be read from pipeline config file.""" - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.model.faster_rcnn.num_classes = 10 - pipeline_config.train_config.batch_size = 32 - pipeline_config.train_input_reader.label_map_path = "path/to/label_map" - pipeline_config.eval_config.num_examples = 20 - pipeline_config.eval_input_reader.queue_capacity = 100 - - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - self.assertProtoEquals(pipeline_config.model, configs["model"]) - self.assertProtoEquals(pipeline_config.train_config, - configs["train_config"]) - self.assertProtoEquals(pipeline_config.train_input_reader, - configs["train_input_config"]) - self.assertProtoEquals(pipeline_config.eval_config, - configs["eval_config"]) - self.assertProtoEquals(pipeline_config.eval_input_reader, - configs["eval_input_config"]) - - def test_create_pipeline_proto_from_configs(self): - """Tests that proto can be reconstructed from configs dictionary.""" - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.model.faster_rcnn.num_classes = 10 - pipeline_config.train_config.batch_size = 32 - pipeline_config.train_input_reader.label_map_path = "path/to/label_map" - pipeline_config.eval_config.num_examples = 20 - pipeline_config.eval_input_reader.queue_capacity = 100 - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - pipeline_config_reconstructed = ( - config_util.create_pipeline_proto_from_configs(configs)) - self.assertEqual(pipeline_config, pipeline_config_reconstructed) - - def test_get_configs_from_multiple_files(self): - """Tests that proto configs can be read from multiple files.""" - temp_dir = self.get_temp_dir() - - # Write model config file. - model_config_path = os.path.join(temp_dir, "model.config") - model = model_pb2.DetectionModel() - model.faster_rcnn.num_classes = 10 - _write_config(model, model_config_path) - - # Write train config file. - train_config_path = os.path.join(temp_dir, "train.config") - train_config = train_config = train_pb2.TrainConfig() - train_config.batch_size = 32 - _write_config(train_config, train_config_path) - - # Write train input config file. - train_input_config_path = os.path.join(temp_dir, "train_input.config") - train_input_config = input_reader_pb2.InputReader() - train_input_config.label_map_path = "path/to/label_map" - _write_config(train_input_config, train_input_config_path) - - # Write eval config file. - eval_config_path = os.path.join(temp_dir, "eval.config") - eval_config = eval_pb2.EvalConfig() - eval_config.num_examples = 20 - _write_config(eval_config, eval_config_path) - - # Write eval input config file. - eval_input_config_path = os.path.join(temp_dir, "eval_input.config") - eval_input_config = input_reader_pb2.InputReader() - eval_input_config.label_map_path = "path/to/another/label_map" - _write_config(eval_input_config, eval_input_config_path) - - configs = config_util.get_configs_from_multiple_files( - model_config_path=model_config_path, - train_config_path=train_config_path, - train_input_config_path=train_input_config_path, - eval_config_path=eval_config_path, - eval_input_config_path=eval_input_config_path) - self.assertProtoEquals(model, configs["model"]) - self.assertProtoEquals(train_config, configs["train_config"]) - self.assertProtoEquals(train_input_config, - configs["train_input_config"]) - self.assertProtoEquals(eval_config, configs["eval_config"]) - self.assertProtoEquals(eval_input_config, - configs["eval_input_config"]) - - def _assertOptimizerWithNewLearningRate(self, optimizer_name): - """Asserts successful updating of all learning rate schemes.""" - original_learning_rate = 0.7 - learning_rate_scaling = 0.1 - hparams = tf.HParams(learning_rate=0.15) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - # Constant learning rate. - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) - _update_optimizer_with_constant_learning_rate(optimizer, - original_learning_rate) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - optimizer = getattr(configs["train_config"].optimizer, optimizer_name) - constant_lr = optimizer.learning_rate.constant_learning_rate - self.assertAlmostEqual(hparams.learning_rate, constant_lr.learning_rate) - - # Exponential decay learning rate. - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) - _update_optimizer_with_exponential_decay_learning_rate( - optimizer, original_learning_rate) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - optimizer = getattr(configs["train_config"].optimizer, optimizer_name) - exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate - self.assertAlmostEqual(hparams.learning_rate, - exponential_lr.initial_learning_rate) - - # Manual step learning rate. - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) - _update_optimizer_with_manual_step_learning_rate( - optimizer, original_learning_rate, learning_rate_scaling) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - optimizer = getattr(configs["train_config"].optimizer, optimizer_name) - manual_lr = optimizer.learning_rate.manual_step_learning_rate - self.assertAlmostEqual(hparams.learning_rate, - manual_lr.initial_learning_rate) - for i, schedule in enumerate(manual_lr.schedule): - self.assertAlmostEqual(hparams.learning_rate * learning_rate_scaling**i, - schedule.learning_rate) - - def testRMSPropWithNewLearingRate(self): - """Tests new learning rates for RMSProp Optimizer.""" - self._assertOptimizerWithNewLearningRate("rms_prop_optimizer") - - def testMomentumOptimizerWithNewLearningRate(self): - """Tests new learning rates for Momentum Optimizer.""" - self._assertOptimizerWithNewLearningRate("momentum_optimizer") - - def testAdamOptimizerWithNewLearningRate(self): - """Tests new learning rates for Adam Optimizer.""" - self._assertOptimizerWithNewLearningRate("adam_optimizer") - - def testNewBatchSize(self): - """Tests that batch size is updated appropriately.""" - original_batch_size = 2 - hparams = tf.HParams(batch_size=16) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.train_config.batch_size = original_batch_size - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - new_batch_size = configs["train_config"].batch_size - self.assertEqual(16, new_batch_size) - - def testNewBatchSizeWithClipping(self): - """Tests that batch size is clipped to 1 from below.""" - original_batch_size = 2 - hparams = tf.HParams(batch_size=0.5) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.train_config.batch_size = original_batch_size - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - new_batch_size = configs["train_config"].batch_size - self.assertEqual(1, new_batch_size) # Clipped to 1.0. - - def testNewMomentumOptimizerValue(self): - """Tests that new momentum value is updated appropriately.""" - original_momentum_value = 0.4 - hparams = tf.HParams(momentum_optimizer_value=1.1) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - optimizer_config = pipeline_config.train_config.optimizer.rms_prop_optimizer - optimizer_config.momentum_optimizer_value = original_momentum_value - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer - new_momentum_value = optimizer_config.momentum_optimizer_value - self.assertAlmostEqual(1.0, new_momentum_value) # Clipped to 1.0. - - def testNewClassificationLocalizationWeightRatio(self): - """Tests that the loss weight ratio is updated appropriately.""" - original_localization_weight = 0.1 - original_classification_weight = 0.2 - new_weight_ratio = 5.0 - hparams = tf.HParams( - classification_localization_weight_ratio=new_weight_ratio) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.model.ssd.loss.localization_weight = ( - original_localization_weight) - pipeline_config.model.ssd.loss.classification_weight = ( - original_classification_weight) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - loss = configs["model"].ssd.loss - self.assertAlmostEqual(1.0, loss.localization_weight) - self.assertAlmostEqual(new_weight_ratio, loss.classification_weight) - - def testNewFocalLossParameters(self): - """Tests that the loss weight ratio is updated appropriately.""" - original_alpha = 1.0 - original_gamma = 1.0 - new_alpha = 0.3 - new_gamma = 2.0 - hparams = tf.HParams(focal_loss_alpha=new_alpha, focal_loss_gamma=new_gamma) - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - classification_loss = pipeline_config.model.ssd.loss.classification_loss - classification_loss.weighted_sigmoid_focal.alpha = original_alpha - classification_loss.weighted_sigmoid_focal.gamma = original_gamma - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs(configs, hparams) - classification_loss = configs["model"].ssd.loss.classification_loss - self.assertAlmostEqual(new_alpha, - classification_loss.weighted_sigmoid_focal.alpha) - self.assertAlmostEqual(new_gamma, - classification_loss.weighted_sigmoid_focal.gamma) - - def testMergingKeywordArguments(self): - """Tests that keyword arguments get merged as do hyperparameters.""" - original_num_train_steps = 100 - original_num_eval_steps = 5 - desired_num_train_steps = 10 - desired_num_eval_steps = 1 - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.train_config.num_steps = original_num_train_steps - pipeline_config.eval_config.num_examples = original_num_eval_steps - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs( - configs, - train_steps=desired_num_train_steps, - eval_steps=desired_num_eval_steps) - train_steps = configs["train_config"].num_steps - eval_steps = configs["eval_config"].num_examples - self.assertEqual(desired_num_train_steps, train_steps) - self.assertEqual(desired_num_eval_steps, eval_steps) - - def testGetNumberOfClasses(self): - """Tests that number of classes can be retrieved.""" - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.model.faster_rcnn.num_classes = 20 - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - number_of_classes = config_util.get_number_of_classes(configs["model"]) - self.assertEqual(20, number_of_classes) - - def testNewTrainInputPath(self): - """Tests that train input path can be overwritten with single file.""" - original_train_path = ["path/to/data"] - new_train_path = "another/path/to/data" - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - reader_config = pipeline_config.train_input_reader.tf_record_input_reader - reader_config.input_path.extend(original_train_path) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs( - configs, train_input_path=new_train_path) - reader_config = configs["train_input_config"].tf_record_input_reader - final_path = reader_config.input_path - self.assertEqual([new_train_path], final_path) - - def testNewTrainInputPathList(self): - """Tests that train input path can be overwritten with multiple files.""" - original_train_path = ["path/to/data"] - new_train_path = ["another/path/to/data", "yet/another/path/to/data"] - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - reader_config = pipeline_config.train_input_reader.tf_record_input_reader - reader_config.input_path.extend(original_train_path) - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs( - configs, train_input_path=new_train_path) - reader_config = configs["train_input_config"].tf_record_input_reader - final_path = reader_config.input_path - self.assertEqual(new_train_path, final_path) - - def testNewLabelMapPath(self): - """Tests that label map path can be overwritten in input readers.""" - original_label_map_path = "path/to/original/label_map" - new_label_map_path = "path//to/new/label_map" - pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") - - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - train_input_reader = pipeline_config.train_input_reader - train_input_reader.label_map_path = original_label_map_path - eval_input_reader = pipeline_config.eval_input_reader - eval_input_reader.label_map_path = original_label_map_path - _write_config(pipeline_config, pipeline_config_path) - - configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) - configs = config_util.merge_external_params_with_configs( - configs, label_map_path=new_label_map_path) - self.assertEqual(new_label_map_path, - configs["train_input_config"].label_map_path) - self.assertEqual(new_label_map_path, - configs["eval_input_config"].label_map_path) - - -if __name__ == "__main__": - tf.test.main() diff --git a/object_detection/utils/dataset_util.py b/object_detection/utils/dataset_util.py deleted file mode 100644 index 014a9118..00000000 --- a/object_detection/utils/dataset_util.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for creating TFRecord data sets.""" - -import tensorflow as tf - - -def int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def int64_list_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def bytes_list_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - - -def float_list_feature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -def read_examples_list(path): - """Read list of training or validation examples. - - The file is assumed to contain a single example per line where the first - token in the line is an identifier that allows us to find the image and - annotation xml for that example. - - For example, the line: - xyz 3 - would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). - - Args: - path: absolute path to examples list file. - - Returns: - list of example identifiers (strings). - """ - with tf.gfile.GFile(path) as fid: - lines = fid.readlines() - return [line.strip().split(' ')[0] for line in lines] - - -def recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - """ - if not xml: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} diff --git a/object_detection/utils/dataset_util_test.py b/object_detection/utils/dataset_util_test.py deleted file mode 100644 index 99cfb2cd..00000000 --- a/object_detection/utils/dataset_util_test.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.dataset_util.""" - -import os -import tensorflow as tf - -from object_detection.utils import dataset_util - - -class DatasetUtilTest(tf.test.TestCase): - - def test_read_examples_list(self): - example_list_data = """example1 1\nexample2 2""" - example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt') - with tf.gfile.Open(example_list_path, 'wb') as f: - f.write(example_list_data) - - examples = dataset_util.read_examples_list(example_list_path) - self.assertListEqual(['example1', 'example2'], examples) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/label_map_util.py b/object_detection/utils/label_map_util.py deleted file mode 100644 index bf7bae63..00000000 --- a/object_detection/utils/label_map_util.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Label map utility functions.""" - -import logging - -import tensorflow as tf -from google.protobuf import text_format -from object_detection.protos import string_int_label_map_pb2 - - -def _validate_label_map(label_map): - """Checks if a label map is valid. - - Args: - label_map: StringIntLabelMap to validate. - - Raises: - ValueError: if label map is invalid. - """ - for item in label_map.item: - if item.id < 1: - raise ValueError('Label map ids should be >= 1.') - - -def create_category_index(categories): - """Creates dictionary of COCO compatible categories keyed by category id. - - Args: - categories: a list of dicts, each of which has the following keys: - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza'. - - Returns: - category_index: a dict containing the same entries as categories, but keyed - by the 'id' field of each category. - """ - category_index = {} - for cat in categories: - category_index[cat['id']] = cat - return category_index - - -def convert_label_map_to_categories(label_map, - max_num_classes, - use_display_name=True): - """Loads label map proto and returns categories list compatible with eval. - - This function loads a label map and returns a list of dicts, each of which - has the following keys: - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza'. - We only allow class into the list if its id-label_id_offset is - between 0 (inclusive) and max_num_classes (exclusive). - If there are several items mapping to the same id in the label map, - we will only keep the first one in the categories list. - - Args: - label_map: a StringIntLabelMapProto or None. If None, a default categories - list is created with max_num_classes categories. - max_num_classes: maximum number of (consecutive) label indices to include. - use_display_name: (boolean) choose whether to load 'display_name' field - as category name. If False or if the display_name field does not exist, - uses 'name' field as category names instead. - Returns: - categories: a list of dictionaries representing all possible categories. - """ - categories = [] - list_of_ids_already_added = [] - if not label_map: - label_id_offset = 1 - for class_id in range(max_num_classes): - categories.append({ - 'id': class_id + label_id_offset, - 'name': 'category_{}'.format(class_id + label_id_offset) - }) - return categories - for item in label_map.item: - if not 0 < item.id <= max_num_classes: - logging.info('Ignore item %d since it falls outside of requested ' - 'label range.', item.id) - continue - if use_display_name and item.HasField('display_name'): - name = item.display_name - else: - name = item.name - if item.id not in list_of_ids_already_added: - list_of_ids_already_added.append(item.id) - categories.append({'id': item.id, 'name': name}) - return categories - - -def load_labelmap(path): - """Loads label map proto. - - Args: - path: path to StringIntLabelMap proto text file. - Returns: - a StringIntLabelMapProto - """ - with tf.gfile.GFile(path, 'r') as fid: - label_map_string = fid.read() - label_map = string_int_label_map_pb2.StringIntLabelMap() - try: - text_format.Merge(label_map_string, label_map) - except text_format.ParseError: - label_map.ParseFromString(label_map_string) - _validate_label_map(label_map) - return label_map - - -def get_label_map_dict(label_map_path, use_display_name=False): - """Reads a label map and returns a dictionary of label names to id. - - Args: - label_map_path: path to label_map. - use_display_name: whether to use the label map items' display names as keys. - - Returns: - A dictionary mapping label names to id. - """ - label_map = load_labelmap(label_map_path) - label_map_dict = {} - for item in label_map.item: - if use_display_name: - label_map_dict[item.display_name] = item.id - else: - label_map_dict[item.name] = item.id - return label_map_dict - - -def create_category_index_from_labelmap(label_map_path): - """Reads a label map and returns a category index. - - Args: - label_map_path: Path to `StringIntLabelMap` proto text file. - - Returns: - A category index, which is a dictionary that maps integer ids to dicts - containing categories, e.g. - {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...} - """ - label_map = load_labelmap(label_map_path) - max_num_classes = max(item.id for item in label_map.item) - categories = convert_label_map_to_categories(label_map, max_num_classes) - return create_category_index(categories) - - -def create_class_agnostic_category_index(): - """Creates a category index with a single `object` class.""" - return {1: {'id': 1, 'name': 'object'}} diff --git a/object_detection/utils/label_map_util_test.py b/object_detection/utils/label_map_util_test.py deleted file mode 100644 index 8671754c..00000000 --- a/object_detection/utils/label_map_util_test.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.label_map_util.""" - -import os -import tensorflow as tf - -from google.protobuf import text_format -from object_detection.protos import string_int_label_map_pb2 -from object_detection.utils import label_map_util - - -class LabelMapUtilTest(tf.test.TestCase): - - def _generate_label_map(self, num_classes): - label_map_proto = string_int_label_map_pb2.StringIntLabelMap() - for i in range(1, num_classes + 1): - item = label_map_proto.item.add() - item.id = i - item.name = 'label_' + str(i) - item.display_name = str(i) - return label_map_proto - - def test_get_label_map_dict(self): - label_map_string = """ - item { - id:2 - name:'cat' - } - item { - id:1 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - - label_map_dict = label_map_util.get_label_map_dict(label_map_path) - self.assertEqual(label_map_dict['dog'], 1) - self.assertEqual(label_map_dict['cat'], 2) - - def test_get_label_map_dict_display(self): - label_map_string = """ - item { - id:2 - display_name:'cat' - } - item { - id:1 - display_name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - - label_map_dict = label_map_util.get_label_map_dict( - label_map_path, use_display_name=True) - self.assertEqual(label_map_dict['dog'], 1) - self.assertEqual(label_map_dict['cat'], 2) - - def test_load_bad_label_map(self): - label_map_string = """ - item { - id:0 - name:'class that should not be indexed at zero' - } - item { - id:2 - name:'cat' - } - item { - id:1 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - - with self.assertRaises(ValueError): - label_map_util.load_labelmap(label_map_path) - - def test_keep_categories_with_unique_id(self): - label_map_proto = string_int_label_map_pb2.StringIntLabelMap() - label_map_string = """ - item { - id:2 - name:'cat' - } - item { - id:1 - name:'child' - } - item { - id:1 - name:'person' - } - item { - id:1 - name:'n00007846' - } - """ - text_format.Merge(label_map_string, label_map_proto) - categories = label_map_util.convert_label_map_to_categories( - label_map_proto, max_num_classes=3) - self.assertListEqual([{ - 'id': 2, - 'name': u'cat' - }, { - 'id': 1, - 'name': u'child' - }], categories) - - def test_convert_label_map_to_categories_no_label_map(self): - categories = label_map_util.convert_label_map_to_categories( - None, max_num_classes=3) - expected_categories_list = [{ - 'name': u'category_1', - 'id': 1 - }, { - 'name': u'category_2', - 'id': 2 - }, { - 'name': u'category_3', - 'id': 3 - }] - self.assertListEqual(expected_categories_list, categories) - - def test_convert_label_map_to_coco_categories(self): - label_map_proto = self._generate_label_map(num_classes=4) - categories = label_map_util.convert_label_map_to_categories( - label_map_proto, max_num_classes=3) - expected_categories_list = [{ - 'name': u'1', - 'id': 1 - }, { - 'name': u'2', - 'id': 2 - }, { - 'name': u'3', - 'id': 3 - }] - self.assertListEqual(expected_categories_list, categories) - - def test_convert_label_map_to_coco_categories_with_few_classes(self): - label_map_proto = self._generate_label_map(num_classes=4) - cat_no_offset = label_map_util.convert_label_map_to_categories( - label_map_proto, max_num_classes=2) - expected_categories_list = [{ - 'name': u'1', - 'id': 1 - }, { - 'name': u'2', - 'id': 2 - }] - self.assertListEqual(expected_categories_list, cat_no_offset) - - def test_create_category_index(self): - categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}] - category_index = label_map_util.create_category_index(categories) - self.assertDictEqual({ - 1: { - 'name': u'1', - 'id': 1 - }, - 2: { - 'name': u'2', - 'id': 2 - } - }, category_index) - - def test_create_category_index_from_labelmap(self): - label_map_string = """ - item { - id:2 - name:'cat' - } - item { - id:1 - name:'dog' - } - """ - label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') - with tf.gfile.Open(label_map_path, 'wb') as f: - f.write(label_map_string) - - category_index = label_map_util.create_category_index_from_labelmap( - label_map_path) - self.assertDictEqual({ - 1: { - 'name': u'dog', - 'id': 1 - }, - 2: { - 'name': u'cat', - 'id': 2 - } - }, category_index) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/learning_schedules.py b/object_detection/utils/learning_schedules.py deleted file mode 100644 index 14583527..00000000 --- a/object_detection/utils/learning_schedules.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Library of common learning rate schedules.""" - -import numpy as np -import tensorflow as tf - - -def exponential_decay_with_burnin(global_step, - learning_rate_base, - learning_rate_decay_steps, - learning_rate_decay_factor, - burnin_learning_rate=0.0, - burnin_steps=0): - """Exponential decay schedule with burn-in period. - - In this schedule, learning rate is fixed at burnin_learning_rate - for a fixed period, before transitioning to a regular exponential - decay schedule. - - Args: - global_step: int tensor representing global step. - learning_rate_base: base learning rate. - learning_rate_decay_steps: steps to take between decaying the learning rate. - Note that this includes the number of burn-in steps. - learning_rate_decay_factor: multiplicative factor by which to decay - learning rate. - burnin_learning_rate: initial learning rate during burn-in period. If - 0.0 (which is the default), then the burn-in learning rate is simply - set to learning_rate_base. - burnin_steps: number of steps to use burnin learning rate. - - Returns: - a (scalar) float tensor representing learning rate - """ - if burnin_learning_rate == 0: - burnin_learning_rate = learning_rate_base - post_burnin_learning_rate = tf.train.exponential_decay( - learning_rate_base, - global_step, - learning_rate_decay_steps, - learning_rate_decay_factor, - staircase=True) - return tf.cond( - tf.less(global_step, burnin_steps), - lambda: tf.convert_to_tensor(burnin_learning_rate), - lambda: post_burnin_learning_rate) - - -def cosine_decay_with_warmup(global_step, - learning_rate_base, - total_steps, - warmup_learning_rate=0.0, - warmup_steps=0): - """Cosine decay schedule with warm up period. - - Cosine annealing learning rate as described in: - Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts. - ICLR 2017. https://arxiv.org/abs/1608.03983 - In this schedule, the learning rate grows linearly from warmup_learning_rate - to learning_rate_base for warmup_steps, then transitions to a cosine decay - schedule. - - Args: - global_step: int64 (scalar) tensor representing global step. - learning_rate_base: base learning rate. - total_steps: total number of training steps. - warmup_learning_rate: initial learning rate for warm up. - warmup_steps: number of warmup steps. - - Returns: - a (scalar) float tensor representing learning rate. - - Raises: - ValueError: if warmup_learning_rate is larger than learning_rate_base, - or if warmup_steps is larger than total_steps. - """ - if learning_rate_base < warmup_learning_rate: - raise ValueError('learning_rate_base must be larger ' - 'or equal to warmup_learning_rate.') - if total_steps < warmup_steps: - raise ValueError('total_steps must be larger or equal to ' - 'warmup_steps.') - learning_rate = 0.5 * learning_rate_base * ( - 1 + tf.cos(np.pi * tf.cast( - global_step - warmup_steps, tf.float32 - ) / float(total_steps - warmup_steps))) - if warmup_steps > 0: - slope = (learning_rate_base - warmup_learning_rate) / warmup_steps - pre_cosine_learning_rate = slope * tf.cast( - global_step, tf.float32) + warmup_learning_rate - learning_rate = tf.cond( - tf.less(global_step, warmup_steps), lambda: pre_cosine_learning_rate, - lambda: learning_rate) - return learning_rate - - -def manual_stepping(global_step, boundaries, rates): - """Manually stepped learning rate schedule. - - This function provides fine grained control over learning rates. One must - specify a sequence of learning rates as well as a set of integer steps - at which the current learning rate must transition to the next. For example, - if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning - rate returned by this function is .1 for global_step=0,...,4, .01 for - global_step=5...9, and .001 for global_step=10 and onward. - - Args: - global_step: int64 (scalar) tensor representing global step. - boundaries: a list of global steps at which to switch learning - rates. This list is assumed to consist of increasing positive integers. - rates: a list of (float) learning rates corresponding to intervals between - the boundaries. The length of this list must be exactly - len(boundaries) + 1. - - Returns: - a (scalar) float tensor representing learning rate - Raises: - ValueError: if one of the following checks fails: - 1. boundaries is a strictly increasing list of positive integers - 2. len(rates) == len(boundaries) + 1 - """ - if any([b < 0 for b in boundaries]) or any( - [not isinstance(b, int) for b in boundaries]): - raise ValueError('boundaries must be a list of positive integers') - if any([bnext <= b for bnext, b in zip(boundaries[1:], boundaries[:-1])]): - raise ValueError('Entries in boundaries must be strictly increasing.') - if any([not isinstance(r, float) for r in rates]): - raise ValueError('Learning rates must be floats') - if len(rates) != len(boundaries) + 1: - raise ValueError('Number of provided learning rates must exceed ' - 'number of boundary points by exactly 1.') - step_boundaries = tf.constant(boundaries, tf.int64) - learning_rates = tf.constant(rates, tf.float32) - unreached_boundaries = tf.reshape( - tf.where(tf.greater(step_boundaries, global_step)), [-1]) - unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0) - index = tf.reshape(tf.reduce_min(unreached_boundaries), [1]) - return tf.reshape(tf.slice(learning_rates, index, [1]), []) diff --git a/object_detection/utils/learning_schedules_test.py b/object_detection/utils/learning_schedules_test.py deleted file mode 100644 index 8fdc8915..00000000 --- a/object_detection/utils/learning_schedules_test.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.learning_schedules.""" -import tensorflow as tf - -from object_detection.utils import learning_schedules - - -class LearningSchedulesTest(tf.test.TestCase): - - def testExponentialDecayWithBurnin(self): - global_step = tf.placeholder(tf.int32, []) - learning_rate_base = 1.0 - learning_rate_decay_steps = 3 - learning_rate_decay_factor = .1 - burnin_learning_rate = .5 - burnin_steps = 2 - exp_rates = [.5, .5, 1, .1, .1, .1, .01, .01] - learning_rate = learning_schedules.exponential_decay_with_burnin( - global_step, learning_rate_base, learning_rate_decay_steps, - learning_rate_decay_factor, burnin_learning_rate, burnin_steps) - with self.test_session() as sess: - output_rates = [] - for input_global_step in range(8): - output_rate = sess.run(learning_rate, - feed_dict={global_step: input_global_step}) - output_rates.append(output_rate) - self.assertAllClose(output_rates, exp_rates) - - def testCosineDecayWithWarmup(self): - global_step = tf.placeholder(tf.int32, []) - learning_rate_base = 1.0 - total_steps = 100 - warmup_learning_rate = 0.1 - warmup_steps = 9 - input_global_steps = [0, 4, 8, 9, 100] - exp_rates = [0.1, 0.5, 0.9, 1.0, 0] - learning_rate = learning_schedules.cosine_decay_with_warmup( - global_step, learning_rate_base, total_steps, - warmup_learning_rate, warmup_steps) - with self.test_session() as sess: - output_rates = [] - for input_global_step in input_global_steps: - output_rate = sess.run(learning_rate, - feed_dict={global_step: input_global_step}) - output_rates.append(output_rate) - self.assertAllClose(output_rates, exp_rates) - - def testManualStepping(self): - global_step = tf.placeholder(tf.int64, []) - boundaries = [2, 3, 7] - rates = [1.0, 2.0, 3.0, 4.0] - exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0] - learning_rate = learning_schedules.manual_stepping(global_step, boundaries, - rates) - with self.test_session() as sess: - output_rates = [] - for input_global_step in range(10): - output_rate = sess.run(learning_rate, - feed_dict={global_step: input_global_step}) - output_rates.append(output_rate) - self.assertAllClose(output_rates, exp_rates) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/metrics.py b/object_detection/utils/metrics.py deleted file mode 100644 index 719f1549..00000000 --- a/object_detection/utils/metrics.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions for computing metrics like precision, recall, CorLoc and etc.""" -from __future__ import division - -import numpy as np - - -def compute_precision_recall(scores, labels, num_gt): - """Compute precision and recall. - - Args: - scores: A float numpy array representing detection score - labels: A boolean numpy array representing true/false positive labels - num_gt: Number of ground truth instances - - Raises: - ValueError: if the input is not of the correct format - - Returns: - precision: Fraction of positive instances over detected ones. This value is - None if no ground truth labels are present. - recall: Fraction of detected positive instance over all positive instances. - This value is None if no ground truth labels are present. - - """ - if not isinstance( - labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1: - raise ValueError("labels must be single dimension bool numpy array") - - if not isinstance( - scores, np.ndarray) or len(scores.shape) != 1: - raise ValueError("scores must be single dimension numpy array") - - if num_gt < np.sum(labels): - raise ValueError("Number of true positives must be smaller than num_gt.") - - if len(scores) != len(labels): - raise ValueError("scores and labels must be of the same size.") - - if num_gt == 0: - return None, None - - sorted_indices = np.argsort(scores) - sorted_indices = sorted_indices[::-1] - labels = labels.astype(int) - true_positive_labels = labels[sorted_indices] - false_positive_labels = 1 - true_positive_labels - cum_true_positives = np.cumsum(true_positive_labels) - cum_false_positives = np.cumsum(false_positive_labels) - precision = cum_true_positives.astype(float) / ( - cum_true_positives + cum_false_positives) - recall = cum_true_positives.astype(float) / num_gt - return precision, recall - - -def compute_average_precision(precision, recall): - """Compute Average Precision according to the definition in VOCdevkit. - - Precision is modified to ensure that it does not decrease as recall - decrease. - - Args: - precision: A float [N, 1] numpy array of precisions - recall: A float [N, 1] numpy array of recalls - - Raises: - ValueError: if the input is not of the correct format - - Returns: - average_precison: The area under the precision recall curve. NaN if - precision and recall are None. - - """ - if precision is None: - if recall is not None: - raise ValueError("If precision is None, recall must also be None") - return np.NAN - - if not isinstance(precision, np.ndarray) or not isinstance(recall, - np.ndarray): - raise ValueError("precision and recall must be numpy array") - if precision.dtype != np.float or recall.dtype != np.float: - raise ValueError("input must be float numpy array.") - if len(precision) != len(recall): - raise ValueError("precision and recall must be of the same size.") - if not precision.size: - return 0.0 - if np.amin(precision) < 0 or np.amax(precision) > 1: - raise ValueError("Precision must be in the range of [0, 1].") - if np.amin(recall) < 0 or np.amax(recall) > 1: - raise ValueError("recall must be in the range of [0, 1].") - if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)): - raise ValueError("recall must be a non-decreasing array") - - recall = np.concatenate([[0], recall, [1]]) - precision = np.concatenate([[0], precision, [0]]) - - # Preprocess precision to be a non-decreasing array - for i in range(len(precision) - 2, -1, -1): - precision[i] = np.maximum(precision[i], precision[i + 1]) - - indices = np.where(recall[1:] != recall[:-1])[0] + 1 - average_precision = np.sum( - (recall[indices] - recall[indices - 1]) * precision[indices]) - return average_precision - - -def compute_cor_loc(num_gt_imgs_per_class, - num_images_correctly_detected_per_class): - """Compute CorLoc according to the definition in the following paper. - - https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf - - Returns nans if there are no ground truth images for a class. - - Args: - num_gt_imgs_per_class: 1D array, representing number of images containing - at least one object instance of a particular class - num_images_correctly_detected_per_class: 1D array, representing number of - images that are correctly detected at least one object instance of a - particular class - - Returns: - corloc_per_class: A float numpy array represents the corloc score of each - class - """ - return np.where( - num_gt_imgs_per_class == 0, - np.nan, - num_images_correctly_detected_per_class / num_gt_imgs_per_class) diff --git a/object_detection/utils/metrics_test.py b/object_detection/utils/metrics_test.py deleted file mode 100644 index a2064bbf..00000000 --- a/object_detection/utils/metrics_test.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.metrics.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import metrics - - -class MetricsTest(tf.test.TestCase): - - def test_compute_cor_loc(self): - num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int) - num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], - dtype=int) - corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, - num_images_correctly_detected_per_class) - expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float) - self.assertTrue(np.allclose(corloc, expected_corloc)) - - def test_compute_cor_loc_nans(self): - num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int) - num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0], - dtype=int) - corloc = metrics.compute_cor_loc(num_gt_imgs_per_class, - num_images_correctly_detected_per_class) - expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float) - self.assertAllClose(corloc, expected_corloc) - - def test_compute_precision_recall(self): - num_gt = 10 - scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) - labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) - accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) - expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6]) - expected_recall = accumulated_tp_count / num_gt - precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) - self.assertAllClose(precision, expected_precision) - self.assertAllClose(recall, expected_recall) - - def test_compute_average_precision(self): - precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float) - recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float) - processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0], - dtype=float) - recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float) - expected_mean_ap = np.sum(recall_interval * processed_precision) - mean_ap = metrics.compute_average_precision(precision, recall) - self.assertAlmostEqual(expected_mean_ap, mean_ap) - - def test_compute_precision_recall_and_ap_no_groundtruth(self): - num_gt = 0 - scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) - labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool) - expected_precision = None - expected_recall = None - precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) - self.assertEquals(precision, expected_precision) - self.assertEquals(recall, expected_recall) - ap = metrics.compute_average_precision(precision, recall) - self.assertTrue(np.isnan(ap)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/np_box_list.py b/object_detection/utils/np_box_list.py deleted file mode 100644 index 70263ca3..00000000 --- a/object_detection/utils/np_box_list.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Numpy BoxList classes and functions.""" - -import numpy as np - - -class BoxList(object): - """Box collection. - - BoxList represents a list of bounding boxes as numpy array, where each - bounding box is represented as a row of 4 numbers, - [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a - given list correspond to a single image. - - Optionally, users can add additional related fields (such as - objectness/classification scores). - """ - - def __init__(self, data): - """Constructs box collection. - - Args: - data: a numpy array of shape [N, 4] representing box coordinates - - Raises: - ValueError: if bbox data is not a numpy array - ValueError: if invalid dimensions for bbox data - """ - if not isinstance(data, np.ndarray): - raise ValueError('data must be a numpy array.') - if len(data.shape) != 2 or data.shape[1] != 4: - raise ValueError('Invalid dimensions for box data.') - if data.dtype != np.float32 and data.dtype != np.float64: - raise ValueError('Invalid data type for box data: float is required.') - if not self._is_valid_boxes(data): - raise ValueError('Invalid box data. data must be a numpy array of ' - 'N*[y_min, x_min, y_max, x_max]') - self.data = {'boxes': data} - - def num_boxes(self): - """Return number of boxes held in collections.""" - return self.data['boxes'].shape[0] - - def get_extra_fields(self): - """Return all non-box fields.""" - return [k for k in self.data.keys() if k != 'boxes'] - - def has_field(self, field): - return field in self.data - - def add_field(self, field, field_data): - """Add data to a specified field. - - Args: - field: a string parameter used to speficy a related field to be accessed. - field_data: a numpy array of [N, ...] representing the data associated - with the field. - Raises: - ValueError: if the field is already exist or the dimension of the field - data does not matches the number of boxes. - """ - if self.has_field(field): - raise ValueError('Field ' + field + 'already exists') - if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes(): - raise ValueError('Invalid dimensions for field data') - self.data[field] = field_data - - def get(self): - """Convenience function for accesssing box coordinates. - - Returns: - a numpy array of shape [N, 4] representing box corners - """ - return self.get_field('boxes') - - def get_field(self, field): - """Accesses data associated with the specified field in the box collection. - - Args: - field: a string parameter used to speficy a related field to be accessed. - - Returns: - a numpy 1-d array representing data of an associated field - - Raises: - ValueError: if invalid field - """ - if not self.has_field(field): - raise ValueError('field {} does not exist'.format(field)) - return self.data[field] - - def get_coordinates(self): - """Get corner coordinates of boxes. - - Returns: - a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max] - """ - box_coordinates = self.get() - y_min = box_coordinates[:, 0] - x_min = box_coordinates[:, 1] - y_max = box_coordinates[:, 2] - x_max = box_coordinates[:, 3] - return [y_min, x_min, y_max, x_max] - - def _is_valid_boxes(self, data): - """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin]. - - Args: - data: a numpy array of shape [N, 4] representing box coordinates - - Returns: - a boolean indicating whether all ymax of boxes are equal or greater than - ymin, and all xmax of boxes are equal or greater than xmin. - """ - if data.shape[0] > 0: - for i in range(data.shape[0]): - if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]: - return False - return True diff --git a/object_detection/utils/np_box_list_ops.py b/object_detection/utils/np_box_list_ops.py deleted file mode 100644 index cb9fee85..00000000 --- a/object_detection/utils/np_box_list_ops.py +++ /dev/null @@ -1,555 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bounding Box List operations for Numpy BoxLists. - -Example box operations that are supported: - * Areas: compute bounding box areas - * IOU: pairwise intersection-over-union scores -""" - -import numpy as np - -from object_detection.utils import np_box_list -from object_detection.utils import np_box_ops - - -class SortOrder(object): - """Enum class for sort order. - - Attributes: - ascend: ascend order. - descend: descend order. - """ - ASCEND = 1 - DESCEND = 2 - - -def area(boxlist): - """Computes area of boxes. - - Args: - boxlist: BoxList holding N boxes - - Returns: - a numpy array with shape [N*1] representing box areas - """ - y_min, x_min, y_max, x_max = boxlist.get_coordinates() - return (y_max - y_min) * (x_max - x_min) - - -def intersection(boxlist1, boxlist2): - """Compute pairwise intersection areas between boxes. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - - Returns: - a numpy array with shape [N*M] representing pairwise intersection area - """ - return np_box_ops.intersection(boxlist1.get(), boxlist2.get()) - - -def iou(boxlist1, boxlist2): - """Computes pairwise intersection-over-union between box collections. - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - - Returns: - a numpy array with shape [N, M] representing pairwise iou scores. - """ - return np_box_ops.iou(boxlist1.get(), boxlist2.get()) - - -def ioa(boxlist1, boxlist2): - """Computes pairwise intersection-over-area between box collections. - - Intersection-over-area (ioa) between two boxes box1 and box2 is defined as - their intersection area over box2's area. Note that ioa is not symmetric, - that is, IOA(box1, box2) != IOA(box2, box1). - - Args: - boxlist1: BoxList holding N boxes - boxlist2: BoxList holding M boxes - - Returns: - a numpy array with shape [N, M] representing pairwise ioa scores. - """ - return np_box_ops.ioa(boxlist1.get(), boxlist2.get()) - - -def gather(boxlist, indices, fields=None): - """Gather boxes from BoxList according to indices and return new BoxList. - - By default, Gather returns boxes corresponding to the input index list, as - well as all additional fields stored in the boxlist (indexing into the - first dimension). However one can optionally only gather from a - subset of fields. - - Args: - boxlist: BoxList holding N boxes - indices: a 1-d numpy array of type int_ - fields: (optional) list of fields to also gather from. If None (default), - all fields are gathered from. Pass an empty fields list to only gather - the box coordinates. - - Returns: - subboxlist: a BoxList corresponding to the subset of the input BoxList - specified by indices - - Raises: - ValueError: if specified field is not contained in boxlist or if the - indices are not of type int_ - """ - if indices.size: - if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0: - raise ValueError('indices are out of valid range.') - subboxlist = np_box_list.BoxList(boxlist.get()[indices, :]) - if fields is None: - fields = boxlist.get_extra_fields() - for field in fields: - extra_field_data = boxlist.get_field(field) - subboxlist.add_field(field, extra_field_data[indices, ...]) - return subboxlist - - -def sort_by_field(boxlist, field, order=SortOrder.DESCEND): - """Sort boxes and associated fields according to a scalar field. - - A common use case is reordering the boxes according to descending scores. - - Args: - boxlist: BoxList holding N boxes. - field: A BoxList field for sorting and reordering the BoxList. - order: (Optional) 'descend' or 'ascend'. Default is descend. - - Returns: - sorted_boxlist: A sorted BoxList with the field in the specified order. - - Raises: - ValueError: if specified field does not exist or is not of single dimension. - ValueError: if the order is not either descend or ascend. - """ - if not boxlist.has_field(field): - raise ValueError('Field ' + field + ' does not exist') - if len(boxlist.get_field(field).shape) != 1: - raise ValueError('Field ' + field + 'should be single dimension.') - if order != SortOrder.DESCEND and order != SortOrder.ASCEND: - raise ValueError('Invalid sort order') - - field_to_sort = boxlist.get_field(field) - sorted_indices = np.argsort(field_to_sort) - if order == SortOrder.DESCEND: - sorted_indices = sorted_indices[::-1] - return gather(boxlist, sorted_indices) - - -def non_max_suppression(boxlist, - max_output_size=10000, - iou_threshold=1.0, - score_threshold=-10.0): - """Non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. In each iteration, the detected bounding box with - highest score in the available pool is selected. - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. All scores belong to the same class. - max_output_size: maximum number of retained boxes - iou_threshold: intersection over union threshold. - score_threshold: minimum score threshold. Remove the boxes with scores - less than this value. Default value is set to -10. A very - low threshold to pass pretty much all the boxes, unless - the user sets a different score threshold. - - Returns: - a BoxList holding M boxes where M <= max_output_size - Raises: - ValueError: if 'scores' field does not exist - ValueError: if threshold is not in [0, 1] - ValueError: if max_output_size < 0 - """ - if not boxlist.has_field('scores'): - raise ValueError('Field scores does not exist') - if iou_threshold < 0. or iou_threshold > 1.0: - raise ValueError('IOU threshold must be in [0, 1]') - if max_output_size < 0: - raise ValueError('max_output_size must be bigger than 0.') - - boxlist = filter_scores_greater_than(boxlist, score_threshold) - if boxlist.num_boxes() == 0: - return boxlist - - boxlist = sort_by_field(boxlist, 'scores') - - # Prevent further computation if NMS is disabled. - if iou_threshold == 1.0: - if boxlist.num_boxes() > max_output_size: - selected_indices = np.arange(max_output_size) - return gather(boxlist, selected_indices) - else: - return boxlist - - boxes = boxlist.get() - num_boxes = boxlist.num_boxes() - # is_index_valid is True only for all remaining valid boxes, - is_index_valid = np.full(num_boxes, 1, dtype=bool) - selected_indices = [] - num_output = 0 - for i in xrange(num_boxes): - if num_output < max_output_size: - if is_index_valid[i]: - num_output += 1 - selected_indices.append(i) - is_index_valid[i] = False - valid_indices = np.where(is_index_valid)[0] - if valid_indices.size == 0: - break - - intersect_over_union = np_box_ops.iou( - np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :]) - intersect_over_union = np.squeeze(intersect_over_union, axis=0) - is_index_valid[valid_indices] = np.logical_and( - is_index_valid[valid_indices], - intersect_over_union <= iou_threshold) - return gather(boxlist, np.array(selected_indices)) - - -def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh, - max_output_size): - """Multi-class version of non maximum suppression. - - This op greedily selects a subset of detection bounding boxes, pruning - away boxes that have high IOU (intersection over union) overlap (> thresh) - with already selected boxes. It operates independently for each class for - which scores are provided (via the scores field of the input box_list), - pruning boxes with score less than a provided threshold prior to - applying NMS. - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. This scores field is a tensor that can - be 1 dimensional (in the case of a single class) or 2-dimensional, which - which case we assume that it takes the shape [num_boxes, num_classes]. - We further assume that this rank is known statically and that - scores.shape[1] is also known (i.e., the number of classes is fixed - and known at graph construction time). - score_thresh: scalar threshold for score (low scoring boxes are removed). - iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap - with previously selected boxes are removed). - max_output_size: maximum number of retained boxes per class. - - Returns: - a BoxList holding M boxes with a rank-1 scores field representing - corresponding scores for each box with scores sorted in decreasing order - and a rank-1 classes field representing a class label for each box. - Raises: - ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have - a valid scores field. - """ - if not 0 <= iou_thresh <= 1.0: - raise ValueError('thresh must be between 0 and 1') - if not isinstance(boxlist, np_box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - scores = boxlist.get_field('scores') - if len(scores.shape) == 1: - scores = np.reshape(scores, [-1, 1]) - elif len(scores.shape) == 2: - if scores.shape[1] is None: - raise ValueError('scores field must have statically defined second ' - 'dimension') - else: - raise ValueError('scores field must be of rank 1 or 2') - num_boxes = boxlist.num_boxes() - num_scores = scores.shape[0] - num_classes = scores.shape[1] - - if num_boxes != num_scores: - raise ValueError('Incorrect scores field length: actual vs expected.') - - selected_boxes_list = [] - for class_idx in range(num_classes): - boxlist_and_class_scores = np_box_list.BoxList(boxlist.get()) - class_scores = np.reshape(scores[0:num_scores, class_idx], [-1]) - boxlist_and_class_scores.add_field('scores', class_scores) - boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores, - score_thresh) - nms_result = non_max_suppression(boxlist_filt, - max_output_size=max_output_size, - iou_threshold=iou_thresh, - score_threshold=score_thresh) - nms_result.add_field( - 'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx) - selected_boxes_list.append(nms_result) - selected_boxes = concatenate(selected_boxes_list) - sorted_boxes = sort_by_field(selected_boxes, 'scores') - return sorted_boxes - - -def scale(boxlist, y_scale, x_scale): - """Scale box coordinates in x and y dimensions. - - Args: - boxlist: BoxList holding N boxes - y_scale: float - x_scale: float - - Returns: - boxlist: BoxList holding N boxes - """ - y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) - y_min = y_scale * y_min - y_max = y_scale * y_max - x_min = x_scale * x_min - x_max = x_scale * x_max - scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max])) - - fields = boxlist.get_extra_fields() - for field in fields: - extra_field_data = boxlist.get_field(field) - scaled_boxlist.add_field(field, extra_field_data) - - return scaled_boxlist - - -def clip_to_window(boxlist, window): - """Clip bounding boxes to a window. - - This op clips input bounding boxes (represented by bounding box - corners) to a window, optionally filtering out boxes that do not - overlap at all with the window. - - Args: - boxlist: BoxList holding M_in boxes - window: a numpy array of shape [4] representing the - [y_min, x_min, y_max, x_max] window to which the op - should clip boxes. - - Returns: - a BoxList holding M_out boxes where M_out <= M_in - """ - y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) - win_y_min = window[0] - win_x_min = window[1] - win_y_max = window[2] - win_x_max = window[3] - y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min) - y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min) - x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min) - x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min) - clipped = np_box_list.BoxList( - np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped])) - clipped = _copy_extra_fields(clipped, boxlist) - areas = area(clipped) - nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)), - [-1]).astype(np.int32) - return gather(clipped, nonzero_area_indices) - - -def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0): - """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. - - For each box in boxlist1, we want its IOA to be more than minoverlap with - at least one of the boxes in boxlist2. If it does not, we remove it. - - Args: - boxlist1: BoxList holding N boxes. - boxlist2: BoxList holding M boxes. - minoverlap: Minimum required overlap between boxes, to count them as - overlapping. - - Returns: - A pruned boxlist with size [N', 4]. - """ - intersection_over_area = ioa(boxlist2, boxlist1) # [M, N] tensor - intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor - keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap)) - keep_inds = np.nonzero(keep_bool)[0] - new_boxlist1 = gather(boxlist1, keep_inds) - return new_boxlist1 - - -def prune_outside_window(boxlist, window): - """Prunes bounding boxes that fall outside a given window. - - This function prunes bounding boxes that even partially fall outside the given - window. See also ClipToWindow which only prunes bounding boxes that fall - completely outside the window, and clips any bounding boxes that partially - overflow. - - Args: - boxlist: a BoxList holding M_in boxes. - window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax] - of the window. - - Returns: - pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in. - valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes - in the input tensor. - """ - - y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) - win_y_min = window[0] - win_x_min = window[1] - win_y_max = window[2] - win_x_max = window[3] - coordinate_violations = np.hstack([np.less(y_min, win_y_min), - np.less(x_min, win_x_min), - np.greater(y_max, win_y_max), - np.greater(x_max, win_x_max)]) - valid_indices = np.reshape( - np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1]) - return gather(boxlist, valid_indices), valid_indices - - -def concatenate(boxlists, fields=None): - """Concatenate list of BoxLists. - - This op concatenates a list of input BoxLists into a larger BoxList. It also - handles concatenation of BoxList fields as long as the field tensor shapes - are equal except for the first dimension. - - Args: - boxlists: list of BoxList objects - fields: optional list of fields to also concatenate. By default, all - fields from the first BoxList in the list are included in the - concatenation. - - Returns: - a BoxList with number of boxes equal to - sum([boxlist.num_boxes() for boxlist in BoxList]) - Raises: - ValueError: if boxlists is invalid (i.e., is not a list, is empty, or - contains non BoxList objects), or if requested fields are not contained in - all boxlists - """ - if not isinstance(boxlists, list): - raise ValueError('boxlists should be a list') - if not boxlists: - raise ValueError('boxlists should have nonzero length') - for boxlist in boxlists: - if not isinstance(boxlist, np_box_list.BoxList): - raise ValueError('all elements of boxlists should be BoxList objects') - concatenated = np_box_list.BoxList( - np.vstack([boxlist.get() for boxlist in boxlists])) - if fields is None: - fields = boxlists[0].get_extra_fields() - for field in fields: - first_field_shape = boxlists[0].get_field(field).shape - first_field_shape = first_field_shape[1:] - for boxlist in boxlists: - if not boxlist.has_field(field): - raise ValueError('boxlist must contain all requested fields') - field_shape = boxlist.get_field(field).shape - field_shape = field_shape[1:] - if field_shape != first_field_shape: - raise ValueError('field %s must have same shape for all boxlists ' - 'except for the 0th dimension.' % field) - concatenated_field = np.concatenate( - [boxlist.get_field(field) for boxlist in boxlists], axis=0) - concatenated.add_field(field, concatenated_field) - return concatenated - - -def filter_scores_greater_than(boxlist, thresh): - """Filter to keep only boxes with score exceeding a given threshold. - - This op keeps the collection of boxes whose corresponding scores are - greater than the input threshold. - - Args: - boxlist: BoxList holding N boxes. Must contain a 'scores' field - representing detection scores. - thresh: scalar threshold - - Returns: - a BoxList holding M boxes where M <= N - - Raises: - ValueError: if boxlist not a BoxList object or if it does not - have a scores field - """ - if not isinstance(boxlist, np_box_list.BoxList): - raise ValueError('boxlist must be a BoxList') - if not boxlist.has_field('scores'): - raise ValueError('input boxlist must have \'scores\' field') - scores = boxlist.get_field('scores') - if len(scores.shape) > 2: - raise ValueError('Scores should have rank 1 or 2') - if len(scores.shape) == 2 and scores.shape[1] != 1: - raise ValueError('Scores should have rank 1 or have shape ' - 'consistent with [None, 1]') - high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), - [-1]).astype(np.int32) - return gather(boxlist, high_score_indices) - - -def change_coordinate_frame(boxlist, window): - """Change coordinate frame of the boxlist to be relative to window's frame. - - Given a window of the form [ymin, xmin, ymax, xmax], - changes bounding box coordinates from boxlist to be relative to this window - (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). - - An example use case is data augmentation: where we are given groundtruth - boxes (boxlist) and would like to randomly crop the image to some - window (window). In this case we need to change the coordinate frame of - each groundtruth box to be relative to this new window. - - Args: - boxlist: A BoxList object holding N boxes. - window: a size 4 1-D numpy array. - - Returns: - Returns a BoxList object with N boxes. - """ - win_height = window[2] - window[0] - win_width = window[3] - window[1] - boxlist_new = scale( - np_box_list.BoxList(boxlist.get() - - [window[0], window[1], window[0], window[1]]), - 1.0 / win_height, 1.0 / win_width) - _copy_extra_fields(boxlist_new, boxlist) - - return boxlist_new - - -def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): - """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to. - - Args: - boxlist_to_copy_to: BoxList to which extra fields are copied. - boxlist_to_copy_from: BoxList from which fields are copied. - - Returns: - boxlist_to_copy_to with extra fields. - """ - for field in boxlist_to_copy_from.get_extra_fields(): - boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field)) - return boxlist_to_copy_to - - -def _update_valid_indices_by_removing_high_iou_boxes( - selected_indices, is_index_valid, intersect_over_union, threshold): - max_iou = np.max(intersect_over_union[:, selected_indices], axis=1) - return np.logical_and(is_index_valid, max_iou <= threshold) diff --git a/object_detection/utils/np_box_list_ops_test.py b/object_detection/utils/np_box_list_ops_test.py deleted file mode 100644 index 24a2cc8c..00000000 --- a/object_detection/utils/np_box_list_ops_test.py +++ /dev/null @@ -1,414 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.np_box_list_ops.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import np_box_list -from object_detection.utils import np_box_list_ops - - -class AreaRelatedTest(tf.test.TestCase): - - def setUp(self): - boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], - dtype=float) - boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.boxlist1 = np_box_list.BoxList(boxes1) - self.boxlist2 = np_box_list.BoxList(boxes2) - - def test_area(self): - areas = np_box_list_ops.area(self.boxlist1) - expected_areas = np.array([6.0, 5.0], dtype=float) - self.assertAllClose(expected_areas, areas) - - def test_intersection(self): - intersection = np_box_list_ops.intersection(self.boxlist1, self.boxlist2) - expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]], - dtype=float) - self.assertAllClose(intersection, expected_intersection) - - def test_iou(self): - iou = np_box_list_ops.iou(self.boxlist1, self.boxlist2) - expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0], - [1.0 / 16.0, 0.0, 5.0 / 400.0]], - dtype=float) - self.assertAllClose(iou, expected_iou) - - def test_ioa(self): - boxlist1 = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - boxlist2 = np_box_list.BoxList( - np.array( - [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32)) - ioa21 = np_box_list_ops.ioa(boxlist2, boxlist1) - expected_ioa21 = np.array([[0.5, 0.0], - [1.0, 1.0]], - dtype=np.float32) - self.assertAllClose(ioa21, expected_ioa21) - - def test_scale(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - boxlist_scaled = np_box_list_ops.scale(boxlist, 2.0, 3.0) - expected_boxlist_scaled = np_box_list.BoxList( - np.array( - [[0.5, 0.75, 1.5, 2.25], [0.0, 0.0, 1.0, 2.25]], dtype=np.float32)) - self.assertAllClose(expected_boxlist_scaled.get(), boxlist_scaled.get()) - - def test_clip_to_window(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75], - [-0.2, -0.3, 0.7, 1.5]], - dtype=np.float32)) - boxlist_clipped = np_box_list_ops.clip_to_window(boxlist, - [0.0, 0.0, 1.0, 1.0]) - expected_boxlist_clipped = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75], - [0.0, 0.0, 0.7, 1.0]], - dtype=np.float32)) - self.assertAllClose(expected_boxlist_clipped.get(), boxlist_clipped.get()) - - def test_prune_outside_window(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75], - [-0.2, -0.3, 0.7, 1.5]], - dtype=np.float32)) - boxlist_pruned, _ = np_box_list_ops.prune_outside_window( - boxlist, [0.0, 0.0, 1.0, 1.0]) - expected_boxlist_pruned = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - self.assertAllClose(expected_boxlist_pruned.get(), boxlist_pruned.get()) - - def test_concatenate(self): - boxlist1 = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - boxlist2 = np_box_list.BoxList( - np.array( - [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32)) - boxlists = [boxlist1, boxlist2] - boxlist_concatenated = np_box_list_ops.concatenate(boxlists) - boxlist_concatenated_expected = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75], - [0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], - dtype=np.float32)) - self.assertAllClose(boxlist_concatenated_expected.get(), - boxlist_concatenated.get()) - - def test_change_coordinate_frame(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - boxlist_coord = np_box_list_ops.change_coordinate_frame( - boxlist, np.array([0, 0, 0.5, 0.5], dtype=np.float32)) - expected_boxlist_coord = np_box_list.BoxList( - np.array([[0.5, 0.5, 1.5, 1.5], [0, 0, 1.0, 1.5]], dtype=np.float32)) - self.assertAllClose(boxlist_coord.get(), expected_boxlist_coord.get()) - - def test_filter_scores_greater_than(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype= - np.float32)) - boxlist.add_field('scores', np.array([0.8, 0.2], np.float32)) - boxlist_greater = np_box_list_ops.filter_scores_greater_than(boxlist, 0.5) - - expected_boxlist_greater = np_box_list.BoxList( - np.array([[0.25, 0.25, 0.75, 0.75]], dtype=np.float32)) - - self.assertAllClose(boxlist_greater.get(), expected_boxlist_greater.get()) - - -class GatherOpsTest(tf.test.TestCase): - - def setUp(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.boxlist = np_box_list.BoxList(boxes) - self.boxlist.add_field('scores', np.array([0.5, 0.7, 0.9], dtype=float)) - self.boxlist.add_field('labels', - np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], - [0, 0, 0, 0, 1]], - dtype=int)) - - def test_gather_with_out_of_range_indices(self): - indices = np.array([3, 1], dtype=int) - boxlist = self.boxlist - with self.assertRaises(ValueError): - np_box_list_ops.gather(boxlist, indices) - - def test_gather_with_invalid_multidimensional_indices(self): - indices = np.array([[0, 1], [1, 2]], dtype=int) - boxlist = self.boxlist - with self.assertRaises(ValueError): - np_box_list_ops.gather(boxlist, indices) - - def test_gather_without_fields_specified(self): - indices = np.array([2, 0, 1], dtype=int) - boxlist = self.boxlist - subboxlist = np_box_list_ops.gather(boxlist, indices) - - expected_scores = np.array([0.9, 0.5, 0.7], dtype=float) - self.assertAllClose(expected_scores, subboxlist.get_field('scores')) - - expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0]], - dtype=float) - self.assertAllClose(expected_boxes, subboxlist.get()) - - expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0], - [0, 1, 0, 0, 0]], - dtype=int) - self.assertAllClose(expected_labels, subboxlist.get_field('labels')) - - def test_gather_with_invalid_field_specified(self): - indices = np.array([2, 0, 1], dtype=int) - boxlist = self.boxlist - - with self.assertRaises(ValueError): - np_box_list_ops.gather(boxlist, indices, 'labels') - - with self.assertRaises(ValueError): - np_box_list_ops.gather(boxlist, indices, ['objectness']) - - def test_gather_with_fields_specified(self): - indices = np.array([2, 0, 1], dtype=int) - boxlist = self.boxlist - subboxlist = np_box_list_ops.gather(boxlist, indices, ['labels']) - - self.assertFalse(subboxlist.has_field('scores')) - - expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0]], - dtype=float) - self.assertAllClose(expected_boxes, subboxlist.get()) - - expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0], - [0, 1, 0, 0, 0]], - dtype=int) - self.assertAllClose(expected_labels, subboxlist.get_field('labels')) - - -class SortByFieldTest(tf.test.TestCase): - - def setUp(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.boxlist = np_box_list.BoxList(boxes) - self.boxlist.add_field('scores', np.array([0.5, 0.9, 0.4], dtype=float)) - self.boxlist.add_field('labels', - np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], - [0, 0, 0, 0, 1]], - dtype=int)) - - def test_with_invalid_field(self): - with self.assertRaises(ValueError): - np_box_list_ops.sort_by_field(self.boxlist, 'objectness') - with self.assertRaises(ValueError): - np_box_list_ops.sort_by_field(self.boxlist, 'labels') - - def test_with_invalid_sorting_order(self): - with self.assertRaises(ValueError): - np_box_list_ops.sort_by_field(self.boxlist, 'scores', 'Descending') - - def test_with_descending_sorting(self): - sorted_boxlist = np_box_list_ops.sort_by_field(self.boxlist, 'scores') - - expected_boxes = np.array([[14.0, 14.0, 15.0, 15.0], [3.0, 4.0, 6.0, 8.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.assertAllClose(expected_boxes, sorted_boxlist.get()) - - expected_scores = np.array([0.9, 0.5, 0.4], dtype=float) - self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores')) - - def test_with_ascending_sorting(self): - sorted_boxlist = np_box_list_ops.sort_by_field( - self.boxlist, 'scores', np_box_list_ops.SortOrder.ASCEND) - - expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], - [3.0, 4.0, 6.0, 8.0], - [14.0, 14.0, 15.0, 15.0],], - dtype=float) - self.assertAllClose(expected_boxes, sorted_boxlist.get()) - - expected_scores = np.array([0.4, 0.5, 0.9], dtype=float) - self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores')) - - -class NonMaximumSuppressionTest(tf.test.TestCase): - - def setUp(self): - self._boxes = np.array([[0, 0, 1, 1], - [0, 0.1, 1, 1.1], - [0, -0.1, 1, 0.9], - [0, 10, 1, 11], - [0, 10.1, 1, 11.1], - [0, 100, 1, 101]], - dtype=float) - self._boxlist = np_box_list.BoxList(self._boxes) - - def test_with_no_scores_field(self): - boxlist = np_box_list.BoxList(self._boxes) - max_output_size = 3 - iou_threshold = 0.5 - - with self.assertRaises(ValueError): - np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - - def test_nms_disabled_max_output_size_equals_three(self): - boxlist = np_box_list.BoxList(self._boxes) - boxlist.add_field('scores', - np.array([.9, .75, .6, .95, .2, .3], dtype=float)) - max_output_size = 3 - iou_threshold = 1. # No NMS - - expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 0.1, 1, 1.1]], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_select_from_three_clusters(self): - boxlist = np_box_list.BoxList(self._boxes) - boxlist.add_field('scores', - np.array([.9, .75, .6, .95, .2, .3], dtype=float)) - max_output_size = 3 - iou_threshold = 0.5 - - expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_select_at_most_two_from_three_clusters(self): - boxlist = np_box_list.BoxList(self._boxes) - boxlist.add_field('scores', - np.array([.9, .75, .6, .95, .5, .3], dtype=float)) - max_output_size = 2 - iou_threshold = 0.5 - - expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1]], dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_select_at_most_thirty_from_three_clusters(self): - boxlist = np_box_list.BoxList(self._boxes) - boxlist.add_field('scores', - np.array([.9, .75, .6, .95, .5, .3], dtype=float)) - max_output_size = 30 - iou_threshold = 0.5 - - expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_select_from_ten_indentical_boxes(self): - boxes = np.array(10 * [[0, 0, 1, 1]], dtype=float) - boxlist = np_box_list.BoxList(boxes) - boxlist.add_field('scores', np.array(10 * [0.8])) - iou_threshold = .5 - max_output_size = 3 - expected_boxes = np.array([[0, 0, 1, 1]], dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_different_iou_threshold(self): - boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], [200, 200, 210, 300], - [200, 200, 210, 250]], - dtype=float) - boxlist = np_box_list.BoxList(boxes) - boxlist.add_field('scores', np.array([0.9, 0.8, 0.7, 0.6])) - max_output_size = 4 - - iou_threshold = .4 - expected_boxes = np.array([[0, 0, 20, 100], - [200, 200, 210, 300],], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - iou_threshold = .5 - expected_boxes = np.array([[0, 0, 20, 100], [200, 200, 210, 300], - [200, 200, 210, 250]], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - iou_threshold = .8 - expected_boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], - [200, 200, 210, 300], [200, 200, 210, 250]], - dtype=float) - nms_boxlist = np_box_list_ops.non_max_suppression( - boxlist, max_output_size, iou_threshold) - self.assertAllClose(nms_boxlist.get(), expected_boxes) - - def test_multiclass_nms(self): - boxlist = np_box_list.BoxList( - np.array( - [[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]], - dtype=np.float32)) - scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3], - [0.7, -0.7, 0.6, 0.2, -0.9], - [0.4, 0.34, -0.9, 0.2, 0.31]], - dtype=np.float32) - boxlist.add_field('scores', scores) - boxlist_clean = np_box_list_ops.multi_class_non_max_suppression( - boxlist, score_thresh=0.25, iou_thresh=0.1, max_output_size=3) - - scores_clean = boxlist_clean.get_field('scores') - classes_clean = boxlist_clean.get_field('classes') - boxes = boxlist_clean.get() - expected_scores = np.array([0.7, 0.6, 0.34, 0.31]) - expected_classes = np.array([0, 2, 1, 4]) - expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8], - [0.4, 0.2, 0.8, 0.8], - [0.6, 0.0, 1.0, 1.0], - [0.6, 0.0, 1.0, 1.0]], - dtype=np.float32) - self.assertAllClose(scores_clean, expected_scores) - self.assertAllClose(classes_clean, expected_classes) - self.assertAllClose(boxes, expected_boxes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/np_box_list_test.py b/object_detection/utils/np_box_list_test.py deleted file mode 100644 index bb0ee5d2..00000000 --- a/object_detection/utils/np_box_list_test.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.np_box_list_test.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import np_box_list - - -class BoxListTest(tf.test.TestCase): - - def test_invalid_box_data(self): - with self.assertRaises(ValueError): - np_box_list.BoxList([0, 0, 1, 1]) - - with self.assertRaises(ValueError): - np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int)) - - with self.assertRaises(ValueError): - np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float)) - - with self.assertRaises(ValueError): - np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float)) - - def test_has_field_with_existed_field(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - boxlist = np_box_list.BoxList(boxes) - self.assertTrue(boxlist.has_field('boxes')) - - def test_has_field_with_nonexisted_field(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - boxlist = np_box_list.BoxList(boxes) - self.assertFalse(boxlist.has_field('scores')) - - def test_get_field_with_existed_field(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - boxlist = np_box_list.BoxList(boxes) - self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes)) - - def test_get_field_with_nonexited_field(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - boxlist = np_box_list.BoxList(boxes) - with self.assertRaises(ValueError): - boxlist.get_field('scores') - - -class AddExtraFieldTest(tf.test.TestCase): - - def setUp(self): - boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.boxlist = np_box_list.BoxList(boxes) - - def test_add_already_existed_field(self): - with self.assertRaises(ValueError): - self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float)) - - def test_add_invalid_field_data(self): - with self.assertRaises(ValueError): - self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float)) - with self.assertRaises(ValueError): - self.boxlist.add_field('scores', - np.array([0.5, 0.7, 0.9, 0.1], dtype=float)) - - def test_add_single_dimensional_field_data(self): - boxlist = self.boxlist - scores = np.array([0.5, 0.7, 0.9], dtype=float) - boxlist.add_field('scores', scores) - self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores'))) - - def test_add_multi_dimensional_field_data(self): - boxlist = self.boxlist - labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], - dtype=int) - boxlist.add_field('labels', labels) - self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels'))) - - def test_get_extra_fields(self): - boxlist = self.boxlist - self.assertSameElements(boxlist.get_extra_fields(), []) - - scores = np.array([0.5, 0.7, 0.9], dtype=float) - boxlist.add_field('scores', scores) - self.assertSameElements(boxlist.get_extra_fields(), ['scores']) - - labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]], - dtype=int) - boxlist.add_field('labels', labels) - self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels']) - - def test_get_coordinates(self): - y_min, x_min, y_max, x_max = self.boxlist.get_coordinates() - - expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float) - expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float) - expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float) - expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float) - - self.assertTrue(np.allclose(y_min, expected_y_min)) - self.assertTrue(np.allclose(x_min, expected_x_min)) - self.assertTrue(np.allclose(y_max, expected_y_max)) - self.assertTrue(np.allclose(x_max, expected_x_max)) - - def test_num_boxes(self): - boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float) - boxlist = np_box_list.BoxList(boxes) - expected_num_boxes = 2 - self.assertEquals(boxlist.num_boxes(), expected_num_boxes) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/np_box_ops.py b/object_detection/utils/np_box_ops.py deleted file mode 100644 index b4b46a75..00000000 --- a/object_detection/utils/np_box_ops.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Operations for [N, 4] numpy arrays representing bounding boxes. - -Example box operations that are supported: - * Areas: compute bounding box areas - * IOU: pairwise intersection-over-union scores -""" -import numpy as np - - -def area(boxes): - """Computes area of boxes. - - Args: - boxes: Numpy array with shape [N, 4] holding N boxes - - Returns: - a numpy array with shape [N*1] representing box areas - """ - return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) - - -def intersection(boxes1, boxes2): - """Compute pairwise intersection areas between boxes. - - Args: - boxes1: a numpy array with shape [N, 4] holding N boxes - boxes2: a numpy array with shape [M, 4] holding M boxes - - Returns: - a numpy array with shape [N*M] representing pairwise intersection area - """ - [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) - [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) - - all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) - all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) - intersect_heights = np.maximum( - np.zeros(all_pairs_max_ymin.shape), - all_pairs_min_ymax - all_pairs_max_ymin) - all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) - all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) - intersect_widths = np.maximum( - np.zeros(all_pairs_max_xmin.shape), - all_pairs_min_xmax - all_pairs_max_xmin) - return intersect_heights * intersect_widths - - -def iou(boxes1, boxes2): - """Computes pairwise intersection-over-union between box collections. - - Args: - boxes1: a numpy array with shape [N, 4] holding N boxes. - boxes2: a numpy array with shape [M, 4] holding N boxes. - - Returns: - a numpy array with shape [N, M] representing pairwise iou scores. - """ - intersect = intersection(boxes1, boxes2) - area1 = area(boxes1) - area2 = area(boxes2) - union = np.expand_dims(area1, axis=1) + np.expand_dims( - area2, axis=0) - intersect - return intersect / union - - -def ioa(boxes1, boxes2): - """Computes pairwise intersection-over-area between box collections. - - Intersection-over-area (ioa) between two boxes box1 and box2 is defined as - their intersection area over box2's area. Note that ioa is not symmetric, - that is, IOA(box1, box2) != IOA(box2, box1). - - Args: - boxes1: a numpy array with shape [N, 4] holding N boxes. - boxes2: a numpy array with shape [M, 4] holding N boxes. - - Returns: - a numpy array with shape [N, M] representing pairwise ioa scores. - """ - intersect = intersection(boxes1, boxes2) - areas = np.expand_dims(area(boxes2), axis=0) - return intersect / areas diff --git a/object_detection/utils/np_box_ops_test.py b/object_detection/utils/np_box_ops_test.py deleted file mode 100644 index 730f3d20..00000000 --- a/object_detection/utils/np_box_ops_test.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.np_box_ops.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import np_box_ops - - -class BoxOpsTests(tf.test.TestCase): - - def setUp(self): - boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], - dtype=float) - boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], - [0.0, 0.0, 20.0, 20.0]], - dtype=float) - self.boxes1 = boxes1 - self.boxes2 = boxes2 - - def testArea(self): - areas = np_box_ops.area(self.boxes1) - expected_areas = np.array([6.0, 5.0], dtype=float) - self.assertAllClose(expected_areas, areas) - - def testIntersection(self): - intersection = np_box_ops.intersection(self.boxes1, self.boxes2) - expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]], - dtype=float) - self.assertAllClose(intersection, expected_intersection) - - def testIOU(self): - iou = np_box_ops.iou(self.boxes1, self.boxes2) - expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0], - [1.0 / 16.0, 0.0, 5.0 / 400.0]], - dtype=float) - self.assertAllClose(iou, expected_iou) - - def testIOA(self): - boxes1 = np.array([[0.25, 0.25, 0.75, 0.75], - [0.0, 0.0, 0.5, 0.75]], - dtype=np.float32) - boxes2 = np.array([[0.5, 0.25, 1.0, 1.0], - [0.0, 0.0, 1.0, 1.0]], - dtype=np.float32) - ioa21 = np_box_ops.ioa(boxes2, boxes1) - expected_ioa21 = np.array([[0.5, 0.0], - [1.0, 1.0]], - dtype=np.float32) - self.assertAllClose(ioa21, expected_ioa21) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/object_detection_evaluation.py b/object_detection/utils/object_detection_evaluation.py deleted file mode 100644 index 5db1557d..00000000 --- a/object_detection/utils/object_detection_evaluation.py +++ /dev/null @@ -1,616 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""object_detection_evaluation module. - -ObjectDetectionEvaluation is a class which manages ground truth information of a -object detection dataset, and computes frequently used detection metrics such as -Precision, Recall, CorLoc of the provided detection results. -It supports the following operations: -1) Add ground truth information of images sequentially. -2) Add detection result of images sequentially. -3) Evaluate detection metrics on already inserted detection results. -4) Write evaluation result into a pickle file for future processing or - visualization. - -Note: This module operates on numpy boxes and box lists. -""" - -from abc import ABCMeta -from abc import abstractmethod -import collections -import logging -import numpy as np - -from object_detection.core import standard_fields -from object_detection.utils import label_map_util -from object_detection.utils import metrics -from object_detection.utils import per_image_evaluation - - -class DetectionEvaluator(object): - """Interface for object detection evalution classes. - - Example usage of the Evaluator: - ------------------------------ - evaluator = DetectionEvaluator(categories) - - # Detections and groundtruth for image 1. - evaluator.add_single_groundtruth_image_info(...) - evaluator.add_single_detected_image_info(...) - - # Detections and groundtruth for image 2. - evaluator.add_single_groundtruth_image_info(...) - evaluator.add_single_detected_image_info(...) - - metrics_dict = evaluator.evaluate() - """ - __metaclass__ = ABCMeta - - def __init__(self, categories): - """Constructor. - - Args: - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - """ - self._categories = categories - - @abstractmethod - def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): - """Adds groundtruth for a single image to be used for evaluation. - - Args: - image_id: A unique string/integer identifier for the image. - groundtruth_dict: A dictionary of groundtruth numpy arrays required - for evaluations. - """ - pass - - @abstractmethod - def add_single_detected_image_info(self, image_id, detections_dict): - """Adds detections for a single image to be used for evaluation. - - Args: - image_id: A unique string/integer identifier for the image. - detections_dict: A dictionary of detection numpy arrays required - for evaluation. - """ - pass - - @abstractmethod - def evaluate(self): - """Evaluates detections and returns a dictionary of metrics.""" - pass - - @abstractmethod - def clear(self): - """Clears the state to prepare for a fresh evaluation.""" - pass - - -class ObjectDetectionEvaluator(DetectionEvaluator): - """A class to evaluate detections.""" - - def __init__(self, - categories, - matching_iou_threshold=0.5, - evaluate_corlocs=False, - metric_prefix=None, - use_weighted_mean_ap=False): - """Constructor. - - Args: - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - matching_iou_threshold: IOU threshold to use for matching groundtruth - boxes to detection boxes. - evaluate_corlocs: (optional) boolean which determines if corloc scores - are to be returned or not. - metric_prefix: (optional) string prefix for metric name; if None, no - prefix is used. - use_weighted_mean_ap: (optional) boolean which determines if the mean - average precision is computed directly from the scores and tp_fp_labels - of all classes. - """ - super(ObjectDetectionEvaluator, self).__init__(categories) - self._num_classes = max([cat['id'] for cat in categories]) - self._matching_iou_threshold = matching_iou_threshold - self._use_weighted_mean_ap = use_weighted_mean_ap - self._label_id_offset = 1 - self._evaluation = ObjectDetectionEvaluation( - self._num_classes, - matching_iou_threshold=self._matching_iou_threshold, - use_weighted_mean_ap=self._use_weighted_mean_ap, - label_id_offset=self._label_id_offset) - self._image_ids = set([]) - self._evaluate_corlocs = evaluate_corlocs - self._metric_prefix = (metric_prefix + '/') if metric_prefix else '' - - def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): - """Adds groundtruth for a single image to be used for evaluation. - - Args: - image_id: A unique string/integer identifier for the image. - groundtruth_dict: A dictionary containing - - standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array - of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of - the format [ymin, xmin, ymax, xmax] in absolute image coordinates. - standard_fields.InputDataFields.groundtruth_classes: integer numpy array - of shape [num_boxes] containing 1-indexed groundtruth classes for the - boxes. - standard_fields.InputDataFields.groundtruth_difficult: Optional length - M numpy boolean array denoting whether a ground truth box is a - difficult instance or not. This field is optional to support the case - that no boxes are difficult. - - Raises: - ValueError: On adding groundtruth for an image more than once. - """ - if image_id in self._image_ids: - raise ValueError('Image with id {} already added.'.format(image_id)) - - groundtruth_classes = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_classes] - groundtruth_classes -= self._label_id_offset - # If the key is not present in the groundtruth_dict or the array is empty - # (unless there are no annotations for the groundtruth on this image) - # use values from the dictionary or insert None otherwise. - if (standard_fields.InputDataFields.groundtruth_difficult in - groundtruth_dict.keys() and - (groundtruth_dict[standard_fields.InputDataFields.groundtruth_difficult] - .size or not groundtruth_classes.size)): - groundtruth_difficult = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_difficult] - else: - groundtruth_difficult = None - if not len(self._image_ids) % 1000: - logging.warn( - 'image %s does not have groundtruth difficult flag specified', - image_id) - self._evaluation.add_single_ground_truth_image_info( - image_id, - groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes], - groundtruth_classes, - groundtruth_is_difficult_list=groundtruth_difficult) - self._image_ids.update([image_id]) - - def add_single_detected_image_info(self, image_id, detections_dict): - """Adds detections for a single image to be used for evaluation. - - Args: - image_id: A unique string/integer identifier for the image. - detections_dict: A dictionary containing - - standard_fields.DetectionResultFields.detection_boxes: float32 numpy - array of shape [num_boxes, 4] containing `num_boxes` detection boxes - of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. - standard_fields.DetectionResultFields.detection_scores: float32 numpy - array of shape [num_boxes] containing detection scores for the boxes. - standard_fields.DetectionResultFields.detection_classes: integer numpy - array of shape [num_boxes] containing 1-indexed detection classes for - the boxes. - """ - detection_classes = detections_dict[ - standard_fields.DetectionResultFields.detection_classes] - detection_classes -= self._label_id_offset - self._evaluation.add_single_detected_image_info( - image_id, - detections_dict[standard_fields.DetectionResultFields.detection_boxes], - detections_dict[standard_fields.DetectionResultFields.detection_scores], - detection_classes) - - def evaluate(self): - """Compute evaluation result. - - Returns: - A dictionary of metrics with the following fields - - - 1. summary_metrics: - 'Precision/mAP@IOU': mean average precision at - the specified IOU threshold. - - 2. per_category_ap: category specific results with keys of the form - 'PerformanceByCategory/mAP@IOU/category'. - """ - (per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = ( - self._evaluation.evaluate()) - pascal_metrics = { - self._metric_prefix + - 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold): - mean_ap - } - if self._evaluate_corlocs: - pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format( - self._matching_iou_threshold)] = mean_corloc - category_index = label_map_util.create_category_index(self._categories) - for idx in range(per_class_ap.size): - if idx + self._label_id_offset in category_index: - display_name = ( - self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( - self._matching_iou_threshold, - category_index[idx + self._label_id_offset]['name'])) - pascal_metrics[display_name] = per_class_ap[idx] - - # Optionally add CorLoc metrics.classes - if self._evaluate_corlocs: - display_name = ( - self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}' - .format(self._matching_iou_threshold, - category_index[idx + self._label_id_offset]['name'])) - pascal_metrics[display_name] = per_class_corloc[idx] - - return pascal_metrics - - def clear(self): - """Clears the state to prepare for a fresh evaluation.""" - self._evaluation = ObjectDetectionEvaluation( - self._num_classes, - matching_iou_threshold=self._matching_iou_threshold, - use_weighted_mean_ap=self._use_weighted_mean_ap, - label_id_offset=self._label_id_offset) - self._image_ids.clear() - - -class PascalDetectionEvaluator(ObjectDetectionEvaluator): - """A class to evaluate detections using PASCAL metrics.""" - - def __init__(self, categories, matching_iou_threshold=0.5): - super(PascalDetectionEvaluator, self).__init__( - categories, - matching_iou_threshold=matching_iou_threshold, - evaluate_corlocs=False, - metric_prefix='PASCAL', - use_weighted_mean_ap=False) - - -class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator): - """A class to evaluate detections using weighted PASCAL metrics. - - Weighted PASCAL metrics computes the mean average precision as the average - precision given the scores and tp_fp_labels of all classes. In comparison, - PASCAL metrics computes the mean average precision as the mean of the - per-class average precisions. - - This definition is very similar to the mean of the per-class average - precisions weighted by class frequency. However, they are typically not the - same as the average precision is not a linear function of the scores and - tp_fp_labels. - """ - - def __init__(self, categories, matching_iou_threshold=0.5): - super(WeightedPascalDetectionEvaluator, self).__init__( - categories, - matching_iou_threshold=matching_iou_threshold, - evaluate_corlocs=False, - metric_prefix='WeightedPASCAL', - use_weighted_mean_ap=True) - - -class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): - """A class to evaluate detections using Open Images V2 metrics. - - Open Images V2 introduce group_of type of bounding boxes and this metric - handles those boxes appropriately. - """ - - def __init__(self, - categories, - matching_iou_threshold=0.5, - evaluate_corlocs=False): - """Constructor. - - Args: - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - matching_iou_threshold: IOU threshold to use for matching groundtruth - boxes to detection boxes. - evaluate_corlocs: if True, additionally evaluates and returns CorLoc. - """ - super(OpenImagesDetectionEvaluator, self).__init__( - categories, - matching_iou_threshold, - evaluate_corlocs, - metric_prefix='OpenImagesV2') - - def add_single_ground_truth_image_info(self, image_id, groundtruth_dict): - """Adds groundtruth for a single image to be used for evaluation. - - Args: - image_id: A unique string/integer identifier for the image. - groundtruth_dict: A dictionary containing - - standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array - of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of - the format [ymin, xmin, ymax, xmax] in absolute image coordinates. - standard_fields.InputDataFields.groundtruth_classes: integer numpy array - of shape [num_boxes] containing 1-indexed groundtruth classes for the - boxes. - standard_fields.InputDataFields.groundtruth_group_of: Optional length - M numpy boolean array denoting whether a groundtruth box contains a - group of instances. - - Raises: - ValueError: On adding groundtruth for an image more than once. - """ - if image_id in self._image_ids: - raise ValueError('Image with id {} already added.'.format(image_id)) - - groundtruth_classes = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_classes] - groundtruth_classes -= self._label_id_offset - # If the key is not present in the groundtruth_dict or the array is empty - # (unless there are no annotations for the groundtruth on this image) - # use values from the dictionary or insert None otherwise. - if (standard_fields.InputDataFields.groundtruth_group_of in - groundtruth_dict.keys() and - (groundtruth_dict[standard_fields.InputDataFields.groundtruth_group_of] - .size or not groundtruth_classes.size)): - groundtruth_group_of = groundtruth_dict[ - standard_fields.InputDataFields.groundtruth_group_of] - else: - groundtruth_group_of = None - if not len(self._image_ids) % 1000: - logging.warn( - 'image %s does not have groundtruth group_of flag specified', - image_id) - self._evaluation.add_single_ground_truth_image_info( - image_id, - groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes], - groundtruth_classes, - groundtruth_is_difficult_list=None, - groundtruth_is_group_of_list=groundtruth_group_of) - self._image_ids.update([image_id]) - - -ObjectDetectionEvalMetrics = collections.namedtuple( - 'ObjectDetectionEvalMetrics', [ - 'average_precisions', 'mean_ap', 'precisions', 'recalls', 'corlocs', - 'mean_corloc' - ]) - - -class ObjectDetectionEvaluation(object): - """Internal implementation of Pascal object detection metrics.""" - - def __init__(self, - num_groundtruth_classes, - matching_iou_threshold=0.5, - nms_iou_threshold=1.0, - nms_max_output_boxes=10000, - use_weighted_mean_ap=False, - label_id_offset=0): - self.per_image_eval = per_image_evaluation.PerImageEvaluation( - num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold, - nms_max_output_boxes) - self.num_class = num_groundtruth_classes - self.label_id_offset = label_id_offset - - self.groundtruth_boxes = {} - self.groundtruth_class_labels = {} - self.groundtruth_is_difficult_list = {} - self.groundtruth_is_group_of_list = {} - self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int) - self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int) - - self.detection_keys = set() - self.scores_per_class = [[] for _ in range(self.num_class)] - self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)] - self.num_images_correctly_detected_per_class = np.zeros(self.num_class) - self.average_precision_per_class = np.empty(self.num_class, dtype=float) - self.average_precision_per_class.fill(np.nan) - self.precisions_per_class = [] - self.recalls_per_class = [] - self.corloc_per_class = np.ones(self.num_class, dtype=float) - - self.use_weighted_mean_ap = use_weighted_mean_ap - - def clear_detections(self): - self.detection_keys = {} - self.scores_per_class = [[] for _ in range(self.num_class)] - self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)] - self.num_images_correctly_detected_per_class = np.zeros(self.num_class) - self.average_precision_per_class = np.zeros(self.num_class, dtype=float) - self.precisions_per_class = [] - self.recalls_per_class = [] - self.corloc_per_class = np.ones(self.num_class, dtype=float) - - def add_single_ground_truth_image_info(self, - image_key, - groundtruth_boxes, - groundtruth_class_labels, - groundtruth_is_difficult_list=None, - groundtruth_is_group_of_list=None): - """Adds groundtruth for a single image to be used for evaluation. - - Args: - image_key: A unique string/integer identifier for the image. - groundtruth_boxes: float32 numpy array of shape [num_boxes, 4] - containing `num_boxes` groundtruth boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - groundtruth_class_labels: integer numpy array of shape [num_boxes] - containing 0-indexed groundtruth classes for the boxes. - groundtruth_is_difficult_list: A length M numpy boolean array denoting - whether a ground truth box is a difficult instance or not. To support - the case that no boxes are difficult, it is by default set as None. - groundtruth_is_group_of_list: A length M numpy boolean array denoting - whether a ground truth box is a group-of box or not. To support - the case that no boxes are groups-of, it is by default set as None. - """ - if image_key in self.groundtruth_boxes: - logging.warn( - 'image %s has already been added to the ground truth database.', - image_key) - return - - self.groundtruth_boxes[image_key] = groundtruth_boxes - self.groundtruth_class_labels[image_key] = groundtruth_class_labels - if groundtruth_is_difficult_list is None: - num_boxes = groundtruth_boxes.shape[0] - groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool) - self.groundtruth_is_difficult_list[ - image_key] = groundtruth_is_difficult_list.astype(dtype=bool) - if groundtruth_is_group_of_list is None: - num_boxes = groundtruth_boxes.shape[0] - groundtruth_is_group_of_list = np.zeros(num_boxes, dtype=bool) - self.groundtruth_is_group_of_list[ - image_key] = groundtruth_is_group_of_list.astype(dtype=bool) - - self._update_ground_truth_statistics( - groundtruth_class_labels, - groundtruth_is_difficult_list.astype(dtype=bool), - groundtruth_is_group_of_list.astype(dtype=bool)) - - def add_single_detected_image_info(self, image_key, detected_boxes, - detected_scores, detected_class_labels): - """Adds detections for a single image to be used for evaluation. - - Args: - image_key: A unique string/integer identifier for the image. - detected_boxes: float32 numpy array of shape [num_boxes, 4] - containing `num_boxes` detection boxes of the format - [ymin, xmin, ymax, xmax] in absolute image coordinates. - detected_scores: float32 numpy array of shape [num_boxes] containing - detection scores for the boxes. - detected_class_labels: integer numpy array of shape [num_boxes] containing - 0-indexed detection classes for the boxes. - - Raises: - ValueError: if the number of boxes, scores and class labels differ in - length. - """ - if (len(detected_boxes) != len(detected_scores) or - len(detected_boxes) != len(detected_class_labels)): - raise ValueError('detected_boxes, detected_scores and ' - 'detected_class_labels should all have same lengths. Got' - '[%d, %d, %d]' % len(detected_boxes), - len(detected_scores), len(detected_class_labels)) - - if image_key in self.detection_keys: - logging.warn( - 'image %s has already been added to the detection result database', - image_key) - return - - self.detection_keys.add(image_key) - if image_key in self.groundtruth_boxes: - groundtruth_boxes = self.groundtruth_boxes[image_key] - groundtruth_class_labels = self.groundtruth_class_labels[image_key] - groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[ - image_key] - groundtruth_is_group_of_list = self.groundtruth_is_group_of_list[ - image_key] - else: - groundtruth_boxes = np.empty(shape=[0, 4], dtype=float) - groundtruth_class_labels = np.array([], dtype=int) - groundtruth_is_difficult_list = np.array([], dtype=bool) - groundtruth_is_group_of_list = np.array([], dtype=bool) - scores, tp_fp_labels, is_class_correctly_detected_in_image = ( - self.per_image_eval.compute_object_detection_metrics( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels, - groundtruth_is_difficult_list, groundtruth_is_group_of_list)) - - for i in range(self.num_class): - if scores[i].shape[0] > 0: - self.scores_per_class[i].append(scores[i]) - self.tp_fp_labels_per_class[i].append(tp_fp_labels[i]) - (self.num_images_correctly_detected_per_class - ) += is_class_correctly_detected_in_image - - def _update_ground_truth_statistics(self, groundtruth_class_labels, - groundtruth_is_difficult_list, - groundtruth_is_group_of_list): - """Update grouth truth statitistics. - - 1. Difficult boxes are ignored when counting the number of ground truth - instances as done in Pascal VOC devkit. - 2. Difficult boxes are treated as normal boxes when computing CorLoc related - statitistics. - - Args: - groundtruth_class_labels: An integer numpy array of length M, - representing M class labels of object instances in ground truth - groundtruth_is_difficult_list: A boolean numpy array of length M denoting - whether a ground truth box is a difficult instance or not - groundtruth_is_group_of_list: A boolean numpy array of length M denoting - whether a ground truth box is a group-of box or not - """ - for class_index in range(self.num_class): - num_gt_instances = np.sum(groundtruth_class_labels[ - ~groundtruth_is_difficult_list - & ~groundtruth_is_group_of_list] == class_index) - self.num_gt_instances_per_class[class_index] += num_gt_instances - if np.any(groundtruth_class_labels == class_index): - self.num_gt_imgs_per_class[class_index] += 1 - - def evaluate(self): - """Compute evaluation result. - - Returns: - A named tuple with the following fields - - average_precision: float numpy array of average precision for - each class. - mean_ap: mean average precision of all classes, float scalar - precisions: List of precisions, each precision is a float numpy - array - recalls: List of recalls, each recall is a float numpy array - corloc: numpy float array - mean_corloc: Mean CorLoc score for each class, float scalar - """ - if (self.num_gt_instances_per_class == 0).any(): - logging.warn( - 'The following classes have no ground truth examples: %s', - np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + - self.label_id_offset) - - if self.use_weighted_mean_ap: - all_scores = np.array([], dtype=float) - all_tp_fp_labels = np.array([], dtype=bool) - - for class_index in range(self.num_class): - if self.num_gt_instances_per_class[class_index] == 0: - continue - if not self.scores_per_class[class_index]: - scores = np.array([], dtype=float) - tp_fp_labels = np.array([], dtype=bool) - else: - scores = np.concatenate(self.scores_per_class[class_index]) - tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) - if self.use_weighted_mean_ap: - all_scores = np.append(all_scores, scores) - all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) - precision, recall = metrics.compute_precision_recall( - scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) - self.precisions_per_class.append(precision) - self.recalls_per_class.append(recall) - average_precision = metrics.compute_average_precision(precision, recall) - self.average_precision_per_class[class_index] = average_precision - - self.corloc_per_class = metrics.compute_cor_loc( - self.num_gt_imgs_per_class, - self.num_images_correctly_detected_per_class) - - if self.use_weighted_mean_ap: - num_gt_instances = np.sum(self.num_gt_instances_per_class) - precision, recall = metrics.compute_precision_recall( - all_scores, all_tp_fp_labels, num_gt_instances) - mean_ap = metrics.compute_average_precision(precision, recall) - else: - mean_ap = np.nanmean(self.average_precision_per_class) - mean_corloc = np.nanmean(self.corloc_per_class) - return ObjectDetectionEvalMetrics( - self.average_precision_per_class, mean_ap, self.precisions_per_class, - self.recalls_per_class, self.corloc_per_class, mean_corloc) diff --git a/object_detection/utils/object_detection_evaluation_test.py b/object_detection/utils/object_detection_evaluation_test.py deleted file mode 100644 index fcaf80a2..00000000 --- a/object_detection/utils/object_detection_evaluation_test.py +++ /dev/null @@ -1,436 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.object_detection_evaluation.""" - -import numpy as np -import tensorflow as tf - -from object_detection.core import standard_fields -from object_detection.utils import object_detection_evaluation - - -class OpenImagesV2EvaluationTest(tf.test.TestCase): - - def test_returns_correct_metric_values(self): - categories = [{ - 'id': 1, - 'name': 'cat' - }, { - 'id': 2, - 'name': 'dog' - }, { - 'id': 3, - 'name': 'elephant' - }] - - oiv2_evaluator = object_detection_evaluation.OpenImagesDetectionEvaluator( - categories) - image_key1 = 'img1' - groundtruth_boxes1 = np.array( - [[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - oiv2_evaluator.add_single_ground_truth_image_info(image_key1, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1, - standard_fields.InputDataFields.groundtruth_group_of: - np.array([], dtype=bool) - }) - image_key2 = 'img2' - groundtruth_boxes2 = np.array( - [[10, 10, 11, 11], [500, 500, 510, 510], [10, 10, 12, 12]], dtype=float) - groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int) - groundtruth_is_group_of_list2 = np.array([False, True, False], dtype=bool) - oiv2_evaluator.add_single_ground_truth_image_info(image_key2, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes2, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels2, - standard_fields.InputDataFields.groundtruth_group_of: - groundtruth_is_group_of_list2 - }) - image_key3 = 'img3' - groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_class_labels3 = np.array([2], dtype=int) - oiv2_evaluator.add_single_ground_truth_image_info(image_key3, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes3, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels3 - }) - # Add detections - image_key = 'img2' - detected_boxes = np.array( - [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]], - dtype=float) - detected_class_labels = np.array([1, 1, 3], dtype=int) - detected_scores = np.array([0.7, 0.8, 0.9], dtype=float) - oiv2_evaluator.add_single_detected_image_info(image_key, { - standard_fields.DetectionResultFields.detection_boxes: - detected_boxes, - standard_fields.DetectionResultFields.detection_scores: - detected_scores, - standard_fields.DetectionResultFields.detection_classes: - detected_class_labels - }) - metrics = oiv2_evaluator.evaluate() - self.assertAlmostEqual( - metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/dog'], 0.0) - self.assertAlmostEqual( - metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0) - self.assertAlmostEqual( - metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666) - self.assertAlmostEqual(metrics['OpenImagesV2/Precision/mAP@0.5IOU'], - 0.05555555) - oiv2_evaluator.clear() - self.assertFalse(oiv2_evaluator._image_ids) - - -class PascalEvaluationTest(tf.test.TestCase): - - def test_returns_correct_metric_values(self): - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - # Add groundtruth - pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator( - categories) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - pascal_evaluator.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1, - standard_fields.InputDataFields.groundtruth_difficult: - np.array([], dtype=bool)}) - image_key2 = 'img2' - groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], - [10, 10, 12, 12]], dtype=float) - groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int) - groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) - pascal_evaluator.add_single_ground_truth_image_info( - image_key2, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels2, - standard_fields.InputDataFields.groundtruth_difficult: - groundtruth_is_difficult_list2}) - image_key3 = 'img3' - groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_class_labels3 = np.array([2], dtype=int) - pascal_evaluator.add_single_ground_truth_image_info( - image_key3, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels3}) - - # Add detections - image_key = 'img2' - detected_boxes = np.array( - [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]], - dtype=float) - detected_class_labels = np.array([1, 1, 3], dtype=int) - detected_scores = np.array([0.7, 0.8, 0.9], dtype=float) - pascal_evaluator.add_single_detected_image_info( - image_key, - {standard_fields.DetectionResultFields.detection_boxes: detected_boxes, - standard_fields.DetectionResultFields.detection_scores: - detected_scores, - standard_fields.DetectionResultFields.detection_classes: - detected_class_labels}) - - metrics = pascal_evaluator.evaluate() - self.assertAlmostEqual( - metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/dog'], 0.0) - self.assertAlmostEqual( - metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0) - self.assertAlmostEqual( - metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666) - self.assertAlmostEqual(metrics['PASCAL/Precision/mAP@0.5IOU'], 0.05555555) - pascal_evaluator.clear() - self.assertFalse(pascal_evaluator._image_ids) - - def test_value_error_on_duplicate_images(self): - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - # Add groundtruth - pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator( - categories) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - pascal_evaluator.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - with self.assertRaises(ValueError): - pascal_evaluator.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - - -class WeightedPascalEvaluationTest(tf.test.TestCase): - - def setUp(self): - self.categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - - def create_and_add_common_ground_truth(self): - # Add groundtruth - self.wp_eval = ( - object_detection_evaluation.WeightedPascalDetectionEvaluator( - self.categories)) - - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - # add 'img2' separately - image_key3 = 'img3' - groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_class_labels3 = np.array([2], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key3, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels3}) - - def add_common_detected(self): - image_key = 'img2' - detected_boxes = np.array( - [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]], - dtype=float) - detected_class_labels = np.array([1, 1, 3], dtype=int) - detected_scores = np.array([0.7, 0.8, 0.9], dtype=float) - self.wp_eval.add_single_detected_image_info( - image_key, - {standard_fields.DetectionResultFields.detection_boxes: detected_boxes, - standard_fields.DetectionResultFields.detection_scores: - detected_scores, - standard_fields.DetectionResultFields.detection_classes: - detected_class_labels}) - - def test_returns_correct_metric_values(self): - self.create_and_add_common_ground_truth() - image_key2 = 'img2' - groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], - [10, 10, 12, 12]], dtype=float) - groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key2, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels2 - }) - self.add_common_detected() - - metrics = self.wp_eval.evaluate() - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/dog'], 0.0) - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0) - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 4) - self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix + - 'Precision/mAP@0.5IOU'], - 1. / (4 + 1 + 2) / 3) - self.wp_eval.clear() - self.assertFalse(self.wp_eval._image_ids) - - def test_returns_correct_metric_values_with_difficult_list(self): - self.create_and_add_common_ground_truth() - image_key2 = 'img2' - groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], - [10, 10, 12, 12]], dtype=float) - groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int) - groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) - self.wp_eval.add_single_ground_truth_image_info( - image_key2, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels2, - standard_fields.InputDataFields.groundtruth_difficult: - groundtruth_is_difficult_list2 - }) - self.add_common_detected() - - metrics = self.wp_eval.evaluate() - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/dog'], 0.0) - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0) - self.assertAlmostEqual( - metrics[self.wp_eval._metric_prefix + - 'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 3) - self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix + - 'Precision/mAP@0.5IOU'], - 1. / (3 + 1 + 2) / 3) - self.wp_eval.clear() - self.assertFalse(self.wp_eval._image_ids) - - def test_value_error_on_duplicate_images(self): - # Add groundtruth - self.wp_eval = ( - object_detection_evaluation.WeightedPascalDetectionEvaluator( - self.categories)) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - with self.assertRaises(ValueError): - self.wp_eval.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - - -class ObjectDetectionEvaluationTest(tf.test.TestCase): - - def setUp(self): - num_groundtruth_classes = 3 - self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation( - num_groundtruth_classes) - - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int) - self.od_eval.add_single_ground_truth_image_info( - image_key1, groundtruth_boxes1, groundtruth_class_labels1) - image_key2 = 'img2' - groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], - [10, 10, 12, 12]], dtype=float) - groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int) - groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) - groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool) - self.od_eval.add_single_ground_truth_image_info( - image_key2, groundtruth_boxes2, groundtruth_class_labels2, - groundtruth_is_difficult_list2, groundtruth_is_group_of_list2) - - image_key3 = 'img3' - groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_class_labels3 = np.array([1], dtype=int) - self.od_eval.add_single_ground_truth_image_info( - image_key3, groundtruth_boxes3, groundtruth_class_labels3) - - image_key = 'img2' - detected_boxes = np.array( - [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]], - dtype=float) - detected_class_labels = np.array([0, 0, 2], dtype=int) - detected_scores = np.array([0.7, 0.8, 0.9], dtype=float) - self.od_eval.add_single_detected_image_info( - image_key, detected_boxes, detected_scores, detected_class_labels) - - def test_add_single_ground_truth_image_info(self): - expected_num_gt_instances_per_class = np.array([3, 1, 1], dtype=int) - expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int) - self.assertTrue(np.array_equal(expected_num_gt_instances_per_class, - self.od_eval.num_gt_instances_per_class)) - self.assertTrue(np.array_equal(expected_num_gt_imgs_per_class, - self.od_eval.num_gt_imgs_per_class)) - groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], - [10, 10, 12, 12]], dtype=float) - self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes['img2'], - groundtruth_boxes2)) - groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) - self.assertTrue(np.allclose( - self.od_eval.groundtruth_is_difficult_list['img2'], - groundtruth_is_difficult_list2)) - groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool) - self.assertTrue( - np.allclose(self.od_eval.groundtruth_is_group_of_list['img2'], - groundtruth_is_group_of_list2)) - - groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int) - self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[ - 'img1'], groundtruth_class_labels1)) - - def test_add_single_detected_image_info(self): - expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [], - [np.array([0.9], dtype=float)]] - expected_tp_fp_labels_per_class = [[np.array([0, 1], dtype=bool)], [], - [np.array([0], dtype=bool)]] - expected_num_images_correctly_detected_per_class = np.array([0, 0, 0], - dtype=int) - for i in range(self.od_eval.num_class): - for j in range(len(expected_scores_per_class[i])): - self.assertTrue(np.allclose(expected_scores_per_class[i][j], - self.od_eval.scores_per_class[i][j])) - self.assertTrue(np.array_equal(expected_tp_fp_labels_per_class[i][ - j], self.od_eval.tp_fp_labels_per_class[i][j])) - self.assertTrue(np.array_equal( - expected_num_images_correctly_detected_per_class, - self.od_eval.num_images_correctly_detected_per_class)) - - def test_evaluate(self): - (average_precision_per_class, mean_ap, precisions_per_class, - recalls_per_class, corloc_per_class, - mean_corloc) = self.od_eval.evaluate() - expected_precisions_per_class = [np.array([0, 0.5], dtype=float), - np.array([], dtype=float), - np.array([0], dtype=float)] - expected_recalls_per_class = [ - np.array([0, 1. / 3.], dtype=float), np.array([], dtype=float), - np.array([0], dtype=float) - ] - expected_average_precision_per_class = np.array([1. / 6., 0, 0], - dtype=float) - expected_corloc_per_class = np.array([0, np.divide(0, 0), 0], dtype=float) - expected_mean_ap = 1. / 18 - expected_mean_corloc = 0.0 - for i in range(self.od_eval.num_class): - self.assertTrue(np.allclose(expected_precisions_per_class[i], - precisions_per_class[i])) - self.assertTrue(np.allclose(expected_recalls_per_class[i], - recalls_per_class[i])) - self.assertTrue(np.allclose(expected_average_precision_per_class, - average_precision_per_class)) - self.assertTrue(np.allclose(expected_corloc_per_class, corloc_per_class)) - self.assertAlmostEqual(expected_mean_ap, mean_ap) - self.assertAlmostEqual(expected_mean_corloc, mean_corloc) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/ops.py b/object_detection/utils/ops.py deleted file mode 100644 index 03325cfd..00000000 --- a/object_detection/utils/ops.py +++ /dev/null @@ -1,741 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A module for helper tensorflow ops.""" -import math -import numpy as np -import six - -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import standard_fields as fields -from object_detection.utils import static_shape - - -def expanded_shape(orig_shape, start_dim, num_dims): - """Inserts multiple ones into a shape vector. - - Inserts an all-1 vector of length num_dims at position start_dim into a shape. - Can be combined with tf.reshape to generalize tf.expand_dims. - - Args: - orig_shape: the shape into which the all-1 vector is added (int32 vector) - start_dim: insertion position (int scalar) - num_dims: length of the inserted all-1 vector (int scalar) - Returns: - An int32 vector of length tf.size(orig_shape) + num_dims. - """ - with tf.name_scope('ExpandedShape'): - start_dim = tf.expand_dims(start_dim, 0) # scalar to rank-1 - before = tf.slice(orig_shape, [0], start_dim) - add_shape = tf.ones(tf.reshape(num_dims, [1]), dtype=tf.int32) - after = tf.slice(orig_shape, start_dim, [-1]) - new_shape = tf.concat([before, add_shape, after], 0) - return new_shape - - -def normalized_to_image_coordinates(normalized_boxes, image_shape, - parallel_iterations=32): - """Converts a batch of boxes from normal to image coordinates. - - Args: - normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in - normalized coordinates. - image_shape: a float32 tensor of shape [4] containing the image shape. - parallel_iterations: parallelism for the map_fn op. - - Returns: - absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the - boxes in image coordinates. - """ - def _to_absolute_coordinates(normalized_boxes): - return box_list_ops.to_absolute_coordinates( - box_list.BoxList(normalized_boxes), - image_shape[1], image_shape[2], check_range=False).get() - - absolute_boxes = tf.map_fn( - _to_absolute_coordinates, - elems=(normalized_boxes), - dtype=tf.float32, - parallel_iterations=parallel_iterations, - back_prop=True) - return absolute_boxes - - -def meshgrid(x, y): - """Tiles the contents of x and y into a pair of grids. - - Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y - are vectors. Generally, this will give: - - xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n) - ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m) - - Keep in mind that the order of the arguments and outputs is reverse relative - to the order of the indices they go into, done for compatibility with numpy. - The output tensors have the same shapes. Specifically: - - xgrid.get_shape() = y.get_shape().concatenate(x.get_shape()) - ygrid.get_shape() = y.get_shape().concatenate(x.get_shape()) - - Args: - x: A tensor of arbitrary shape and rank. xgrid will contain these values - varying in its last dimensions. - y: A tensor of arbitrary shape and rank. ygrid will contain these values - varying in its first dimensions. - Returns: - A tuple of tensors (xgrid, ygrid). - """ - with tf.name_scope('Meshgrid'): - x = tf.convert_to_tensor(x) - y = tf.convert_to_tensor(y) - x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y)) - y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x)) - - xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape) - ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape) - new_shape = y.get_shape().concatenate(x.get_shape()) - xgrid.set_shape(new_shape) - ygrid.set_shape(new_shape) - - return xgrid, ygrid - - -def pad_to_multiple(tensor, multiple): - """Returns the tensor zero padded to the specified multiple. - - Appends 0s to the end of the first and second dimension (height and width) of - the tensor until both dimensions are a multiple of the input argument - 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input - multiple of 4, PadToMultiple will append 0s so that the resulting tensor will - be of shape [1, 4, 8, 1]. - - Args: - tensor: rank 4 float32 tensor, where - tensor -> [batch_size, height, width, channels]. - multiple: the multiple to pad to. - - Returns: - padded_tensor: the tensor zero padded to the specified multiple. - """ - tensor_shape = tensor.get_shape() - batch_size = static_shape.get_batch_size(tensor_shape) - tensor_height = static_shape.get_height(tensor_shape) - tensor_width = static_shape.get_width(tensor_shape) - tensor_depth = static_shape.get_depth(tensor_shape) - - if batch_size is None: - batch_size = tf.shape(tensor)[0] - - if tensor_height is None: - tensor_height = tf.shape(tensor)[1] - padded_tensor_height = tf.to_int32( - tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple - else: - padded_tensor_height = int( - math.ceil(float(tensor_height) / multiple) * multiple) - - if tensor_width is None: - tensor_width = tf.shape(tensor)[2] - padded_tensor_width = tf.to_int32( - tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple - else: - padded_tensor_width = int( - math.ceil(float(tensor_width) / multiple) * multiple) - - if (padded_tensor_height == tensor_height and - padded_tensor_width == tensor_width): - return tensor - - if tensor_depth is None: - tensor_depth = tf.shape(tensor)[3] - - # Use tf.concat instead of tf.pad to preserve static shape - height_pad = tf.zeros([ - batch_size, padded_tensor_height - tensor_height, tensor_width, - tensor_depth - ]) - padded_tensor = tf.concat([tensor, height_pad], 1) - width_pad = tf.zeros([ - batch_size, padded_tensor_height, padded_tensor_width - tensor_width, - tensor_depth - ]) - padded_tensor = tf.concat([padded_tensor, width_pad], 2) - - return padded_tensor - - -def padded_one_hot_encoding(indices, depth, left_pad): - """Returns a zero padded one-hot tensor. - - This function converts a sparse representation of indices (e.g., [4]) to a - zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and - left_pad = 1). If `indices` is empty, the result will simply be a tensor of - shape (0, depth + left_pad). If depth = 0, then this function just returns - `None`. - - Args: - indices: an integer tensor of shape [num_indices]. - depth: depth for the one-hot tensor (integer). - left_pad: number of zeros to left pad the one-hot tensor with (integer). - - Returns: - padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns - `None` if the depth is zero. - - Raises: - ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are - either negative or non-integers. - - TODO: add runtime checks for depth and indices. - """ - if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int): - raise ValueError('`depth` must be a non-negative integer.') - if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int): - raise ValueError('`left_pad` must be a non-negative integer.') - if depth == 0: - return None - if len(indices.get_shape().as_list()) != 1: - raise ValueError('`indices` must have rank 1') - - def one_hot_and_pad(): - one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth, - on_value=1, off_value=0), tf.float32) - return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT') - result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad, - lambda: tf.zeros((depth + left_pad, 0))) - return tf.reshape(result, [-1, depth + left_pad]) - - -def dense_to_sparse_boxes(dense_locations, dense_num_boxes, num_classes): - """Converts bounding boxes from dense to sparse form. - - Args: - dense_locations: a [max_num_boxes, 4] tensor in which only the first k rows - are valid bounding box location coordinates, where k is the sum of - elements in dense_num_boxes. - dense_num_boxes: a [max_num_classes] tensor indicating the counts of - various bounding box classes e.g. [1, 0, 0, 2] means that the first - bounding box is of class 0 and the second and third bounding boxes are - of class 3. The sum of elements in this tensor is the number of valid - bounding boxes. - num_classes: number of classes - - Returns: - box_locations: a [num_boxes, 4] tensor containing only valid bounding - boxes (i.e. the first num_boxes rows of dense_locations) - box_classes: a [num_boxes] tensor containing the classes of each bounding - box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3] - """ - - num_valid_boxes = tf.reduce_sum(dense_num_boxes) - box_locations = tf.slice(dense_locations, - tf.constant([0, 0]), tf.stack([num_valid_boxes, 4])) - tiled_classes = [tf.tile([i], tf.expand_dims(dense_num_boxes[i], 0)) - for i in range(num_classes)] - box_classes = tf.concat(tiled_classes, 0) - box_locations.set_shape([None, 4]) - return box_locations, box_classes - - -def indices_to_dense_vector(indices, - size, - indices_value=1., - default_value=0, - dtype=tf.float32): - """Creates dense vector with indices set to specific value and rest to zeros. - - This function exists because it is unclear if it is safe to use - tf.sparse_to_dense(indices, [size], 1, validate_indices=False) - with indices which are not ordered. - This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) - - Args: - indices: 1d Tensor with integer indices which are to be set to - indices_values. - size: scalar with size (integer) of output Tensor. - indices_value: values of elements specified by indices in the output vector - default_value: values of other elements in the output vector. - dtype: data type. - - Returns: - dense 1D Tensor of shape [size] with indices set to indices_values and the - rest set to default_value. - """ - size = tf.to_int32(size) - zeros = tf.ones([size], dtype=dtype) * default_value - values = tf.ones_like(indices, dtype=dtype) * indices_value - - return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], - [zeros, values]) - - -def retain_groundtruth(tensor_dict, valid_indices): - """Retains groundtruth by valid indices. - - Args: - tensor_dict: a dictionary of following groundtruth tensors - - fields.InputDataFields.groundtruth_boxes - fields.InputDataFields.groundtruth_instance_masks - fields.InputDataFields.groundtruth_classes - fields.InputDataFields.groundtruth_is_crowd - fields.InputDataFields.groundtruth_area - fields.InputDataFields.groundtruth_label_types - fields.InputDataFields.groundtruth_difficult - valid_indices: a tensor with valid indices for the box-level groundtruth. - - Returns: - a dictionary of tensors containing only the groundtruth for valid_indices. - - Raises: - ValueError: If the shape of valid_indices is invalid. - ValueError: field fields.InputDataFields.groundtruth_boxes is - not present in tensor_dict. - """ - input_shape = valid_indices.get_shape().as_list() - if not (len(input_shape) == 1 or - (len(input_shape) == 2 and input_shape[1] == 1)): - raise ValueError('The shape of valid_indices is invalid.') - valid_indices = tf.reshape(valid_indices, [-1]) - valid_dict = {} - if fields.InputDataFields.groundtruth_boxes in tensor_dict: - # Prevents reshape failure when num_boxes is 0. - num_boxes = tf.maximum(tf.shape( - tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1) - for key in tensor_dict: - if key in [fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_instance_masks]: - valid_dict[key] = tf.gather(tensor_dict[key], valid_indices) - # Input decoder returns empty tensor when these fields are not provided. - # Needs to reshape into [num_boxes, -1] for tf.gather() to work. - elif key in [fields.InputDataFields.groundtruth_is_crowd, - fields.InputDataFields.groundtruth_area, - fields.InputDataFields.groundtruth_difficult, - fields.InputDataFields.groundtruth_label_types]: - valid_dict[key] = tf.reshape( - tf.gather(tf.reshape(tensor_dict[key], [num_boxes, -1]), - valid_indices), [-1]) - # Fields that are not associated with boxes. - else: - valid_dict[key] = tensor_dict[key] - else: - raise ValueError('%s not present in input tensor dict.' % ( - fields.InputDataFields.groundtruth_boxes)) - return valid_dict - - -def retain_groundtruth_with_positive_classes(tensor_dict): - """Retains only groundtruth with positive class ids. - - Args: - tensor_dict: a dictionary of following groundtruth tensors - - fields.InputDataFields.groundtruth_boxes - fields.InputDataFields.groundtruth_classes - fields.InputDataFields.groundtruth_is_crowd - fields.InputDataFields.groundtruth_area - fields.InputDataFields.groundtruth_label_types - fields.InputDataFields.groundtruth_difficult - - Returns: - a dictionary of tensors containing only the groundtruth with positive - classes. - - Raises: - ValueError: If groundtruth_classes tensor is not in tensor_dict. - """ - if fields.InputDataFields.groundtruth_classes not in tensor_dict: - raise ValueError('`groundtruth classes` not in tensor_dict.') - keep_indices = tf.where(tf.greater( - tensor_dict[fields.InputDataFields.groundtruth_classes], 0)) - return retain_groundtruth(tensor_dict, keep_indices) - - -def replace_nan_groundtruth_label_scores_with_ones(label_scores): - """Replaces nan label scores with 1.0. - - Args: - label_scores: a tensor containing object annoation label scores. - - Returns: - a tensor where NaN label scores have been replaced by ones. - """ - return tf.where( - tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores) - - -def filter_groundtruth_with_crowd_boxes(tensor_dict): - """Filters out groundtruth with boxes corresponding to crowd. - - Args: - tensor_dict: a dictionary of following groundtruth tensors - - fields.InputDataFields.groundtruth_boxes - fields.InputDataFields.groundtruth_classes - fields.InputDataFields.groundtruth_is_crowd - fields.InputDataFields.groundtruth_area - fields.InputDataFields.groundtruth_label_types - - Returns: - a dictionary of tensors containing only the groundtruth that have bounding - boxes. - """ - if fields.InputDataFields.groundtruth_is_crowd in tensor_dict: - is_crowd = tensor_dict[fields.InputDataFields.groundtruth_is_crowd] - is_not_crowd = tf.logical_not(is_crowd) - is_not_crowd_indices = tf.where(is_not_crowd) - tensor_dict = retain_groundtruth(tensor_dict, is_not_crowd_indices) - return tensor_dict - - -def filter_groundtruth_with_nan_box_coordinates(tensor_dict): - """Filters out groundtruth with no bounding boxes. - - Args: - tensor_dict: a dictionary of following groundtruth tensors - - fields.InputDataFields.groundtruth_boxes - fields.InputDataFields.groundtruth_instance_masks - fields.InputDataFields.groundtruth_classes - fields.InputDataFields.groundtruth_is_crowd - fields.InputDataFields.groundtruth_area - fields.InputDataFields.groundtruth_label_types - - Returns: - a dictionary of tensors containing only the groundtruth that have bounding - boxes. - """ - groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] - nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32( - tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0) - valid_indicator_vector = tf.logical_not(nan_indicator_vector) - valid_indices = tf.where(valid_indicator_vector) - - return retain_groundtruth(tensor_dict, valid_indices) - - -def normalize_to_target(inputs, - target_norm_value, - dim, - epsilon=1e-7, - trainable=True, - scope='NormalizeToTarget', - summarize=True): - """L2 normalizes the inputs across the specified dimension to a target norm. - - This op implements the L2 Normalization layer introduced in - Liu, Wei, et al. "SSD: Single Shot MultiBox Detector." - and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg. - "Parsenet: Looking wider to see better." and is useful for bringing - activations from multiple layers in a convnet to a standard scale. - - Note that the rank of `inputs` must be known and the dimension to which - normalization is to be applied should be statically defined. - - TODO: Add option to scale by L2 norm of the entire input. - - Args: - inputs: A `Tensor` of arbitrary size. - target_norm_value: A float value that specifies an initial target norm or - a list of floats (whose length must be equal to the depth along the - dimension to be normalized) specifying a per-dimension multiplier - after normalization. - dim: The dimension along which the input is normalized. - epsilon: A small value to add to the inputs to avoid dividing by zero. - trainable: Whether the norm is trainable or not - scope: Optional scope for variable_scope. - summarize: Whether or not to add a tensorflow summary for the op. - - Returns: - The input tensor normalized to the specified target norm. - - Raises: - ValueError: If dim is smaller than the number of dimensions in 'inputs'. - ValueError: If target_norm_value is not a float or a list of floats with - length equal to the depth along the dimension to be normalized. - """ - with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]): - if not inputs.get_shape(): - raise ValueError('The input rank must be known.') - input_shape = inputs.get_shape().as_list() - input_rank = len(input_shape) - if dim < 0 or dim >= input_rank: - raise ValueError( - 'dim must be non-negative but smaller than the input rank.') - if not input_shape[dim]: - raise ValueError('input shape should be statically defined along ' - 'the specified dimension.') - depth = input_shape[dim] - if not (isinstance(target_norm_value, float) or - (isinstance(target_norm_value, list) and - len(target_norm_value) == depth) and - all([isinstance(val, float) for val in target_norm_value])): - raise ValueError('target_norm_value must be a float or a list of floats ' - 'with length equal to the depth along the dimension to ' - 'be normalized.') - if isinstance(target_norm_value, float): - initial_norm = depth * [target_norm_value] - else: - initial_norm = target_norm_value - target_norm = tf.contrib.framework.model_variable( - name='weights', dtype=tf.float32, - initializer=tf.constant(initial_norm, dtype=tf.float32), - trainable=trainable) - if summarize: - mean = tf.reduce_mean(target_norm) - mean = tf.Print(mean, ['NormalizeToTarget:', mean]) - tf.summary.scalar(tf.get_variable_scope().name, mean) - lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True)) - mult_shape = input_rank*[1] - mult_shape[dim] = depth - return tf.reshape(target_norm, mult_shape) * tf.truediv(inputs, lengths) - - -def position_sensitive_crop_regions(image, - boxes, - box_ind, - crop_size, - num_spatial_bins, - global_pool, - extrapolation_value=None): - """Position-sensitive crop and pool rectangular regions from a feature grid. - - The output crops are split into `spatial_bins_y` vertical bins - and `spatial_bins_x` horizontal bins. For each intersection of a vertical - and a horizontal bin the output values are gathered by performing - `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of - channels of the image. This reduces `depth` by a factor of - `(spatial_bins_y * spatial_bins_x)`. - - When global_pool is True, this function implements a differentiable version - of position-sensitive RoI pooling used in - [R-FCN detection system](https://arxiv.org/abs/1605.06409). - - When global_pool is False, this function implements a differentiable version - of position-sensitive assembling operation used in - [instance FCN](https://arxiv.org/abs/1603.08678). - - Args: - image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, - `int16`, `int32`, `int64`, `half`, `float32`, `float64`. - A 4-D tensor of shape `[batch, image_height, image_width, depth]`. - Both `image_height` and `image_width` need to be positive. - boxes: A `Tensor` of type `float32`. - A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor - specifies the coordinates of a box in the `box_ind[i]` image and is - specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized - coordinate value of `y` is mapped to the image coordinate at - `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image - height is mapped to `[0, image_height - 1] in image height coordinates. - We do allow y1 > y2, in which case the sampled crop is an up-down flipped - version of the original image. The width dimension is treated similarly. - Normalized coordinates outside the `[0, 1]` range are allowed, in which - case we use `extrapolation_value` to extrapolate the input image values. - box_ind: A `Tensor` of type `int32`. - A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. - The value of `box_ind[i]` specifies the image that the `i`-th box refers - to. - crop_size: A list of two integers `[crop_height, crop_width]`. All - cropped image patches are resized to this size. The aspect ratio of the - image content is not preserved. Both `crop_height` and `crop_width` need - to be positive. - num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. - Represents the number of position-sensitive bins in y and x directions. - Both values should be >= 1. `crop_height` should be divisible by - `spatial_bins_y`, and similarly for width. - The number of image channels should be divisible by - (spatial_bins_y * spatial_bins_x). - Suggested value from R-FCN paper: [3, 3]. - global_pool: A boolean variable. - If True, we perform average global pooling on the features assembled from - the position-sensitive score maps. - If False, we keep the position-pooled features without global pooling - over the spatial coordinates. - Note that using global_pool=True is equivalent to but more efficient than - running the function with global_pool=False and then performing global - average pooling. - extrapolation_value: An optional `float`. Defaults to `0`. - Value used for extrapolation, when applicable. - Returns: - position_sensitive_features: A 4-D tensor of shape - `[num_boxes, K, K, crop_channels]`, - where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, - where K = 1 when global_pool is True (Average-pooled cropped regions), - and K = crop_size when global_pool is False. - Raises: - ValueError: Raised in four situations: - `num_spatial_bins` is not >= 1; - `num_spatial_bins` does not divide `crop_size`; - `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; - `bin_crop_size` is not square when global_pool=False due to the - constraint in function space_to_depth. - """ - total_bins = 1 - bin_crop_size = [] - - for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): - if num_bins < 1: - raise ValueError('num_spatial_bins should be >= 1') - - if crop_dim % num_bins != 0: - raise ValueError('crop_size should be divisible by num_spatial_bins') - - total_bins *= num_bins - bin_crop_size.append(crop_dim // num_bins) - - if not global_pool and bin_crop_size[0] != bin_crop_size[1]: - raise ValueError('Only support square bin crop size for now.') - - ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) - spatial_bins_y, spatial_bins_x = num_spatial_bins - - # Split each box into spatial_bins_y * spatial_bins_x bins. - position_sensitive_boxes = [] - for bin_y in range(spatial_bins_y): - step_y = (ymax - ymin) / spatial_bins_y - for bin_x in range(spatial_bins_x): - step_x = (xmax - xmin) / spatial_bins_x - box_coordinates = [ymin + bin_y * step_y, - xmin + bin_x * step_x, - ymin + (bin_y + 1) * step_y, - xmin + (bin_x + 1) * step_x, - ] - position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) - - image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3) - - image_crops = [] - for (split, box) in zip(image_splits, position_sensitive_boxes): - crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size, - extrapolation_value=extrapolation_value) - image_crops.append(crop) - - if global_pool: - # Average over all bins. - position_sensitive_features = tf.add_n(image_crops) / len(image_crops) - # Then average over spatial positions within the bins. - position_sensitive_features = tf.reduce_mean( - position_sensitive_features, [1, 2], keep_dims=True) - else: - # Reorder height/width to depth channel. - block_size = bin_crop_size[0] - if block_size >= 2: - image_crops = [tf.space_to_depth( - crop, block_size=block_size) for crop in image_crops] - - # Pack image_crops so that first dimension is for position-senstive boxes. - position_sensitive_features = tf.stack(image_crops, axis=0) - - # Unroll the position-sensitive boxes to spatial positions. - position_sensitive_features = tf.squeeze( - tf.batch_to_space_nd(position_sensitive_features, - block_shape=[1] + num_spatial_bins, - crops=tf.zeros((3, 2), dtype=tf.int32)), - squeeze_dims=[0]) - - # Reorder back the depth channel. - if block_size >= 2: - position_sensitive_features = tf.depth_to_space( - position_sensitive_features, block_size=block_size) - - return position_sensitive_features - - -def reframe_box_masks_to_image_masks(box_masks, boxes, image_height, - image_width): - """Transforms the box masks back to full image masks. - - Embeds masks in bounding boxes of larger masks whose shapes correspond to - image shape. - - Args: - box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width]. - boxes: A tf.float32 tensor of size [num_masks, 4] containing the box - corners. Row i contains [ymin, xmin, ymax, xmax] of the box - corresponding to mask i. Note that the box corners are in - normalized coordinates. - image_height: Image height. The output mask will have the same height as - the image height. - image_width: Image width. The output mask will have the same width as the - image width. - - Returns: - A tf.float32 tensor of size [num_masks, image_height, image_width]. - """ - # TODO: Make this a public function. - def transform_boxes_relative_to_boxes(boxes, reference_boxes): - boxes = tf.reshape(boxes, [-1, 2, 2]) - min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1) - max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1) - transformed_boxes = (boxes - min_corner) / (max_corner - min_corner) - return tf.reshape(transformed_boxes, [-1, 4]) - - box_masks = tf.expand_dims(box_masks, axis=3) - num_boxes = tf.shape(box_masks)[0] - unit_boxes = tf.concat( - [tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1) - reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes) - image_masks = tf.image.crop_and_resize(image=box_masks, - boxes=reverse_boxes, - box_ind=tf.range(num_boxes), - crop_size=[image_height, image_width], - extrapolation_value=0.0) - return tf.squeeze(image_masks, axis=3) - - -def merge_boxes_with_multiple_labels(boxes, classes, num_classes): - """Merges boxes with same coordinates and returns K-hot encoded classes. - - Args: - boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. - classes: A tf.int32 tensor with shape [N] holding class indices. - The class index starts at 0. - num_classes: total number of classes to use for K-hot encoding. - - Returns: - merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, - where N' <= N. - class_encodings: A tf.int32 tensor with shape [N', num_classes] holding - k-hot encodings for the merged boxes. - merged_box_indices: A tf.int32 tensor with shape [N'] holding original - indices of the boxes. - """ - def merge_numpy_boxes(boxes, classes, num_classes): - """Python function to merge numpy boxes.""" - if boxes.size < 1: - return (np.zeros([0, 4], dtype=np.float32), - np.zeros([0, num_classes], dtype=np.int32), - np.zeros([0], dtype=np.int32)) - box_to_class_indices = {} - for box_index in range(boxes.shape[0]): - box = tuple(boxes[box_index, :].tolist()) - class_index = classes[box_index] - if box not in box_to_class_indices: - box_to_class_indices[box] = [box_index, np.zeros([num_classes])] - box_to_class_indices[box][1][class_index] = 1 - merged_boxes = np.vstack(box_to_class_indices.keys()).astype(np.float32) - class_encodings = [item[1] for item in box_to_class_indices.values()] - class_encodings = np.vstack(class_encodings).astype(np.int32) - merged_box_indices = [item[0] for item in box_to_class_indices.values()] - merged_box_indices = np.array(merged_box_indices).astype(np.int32) - return merged_boxes, class_encodings, merged_box_indices - - merged_boxes, class_encodings, merged_box_indices = tf.py_func( - merge_numpy_boxes, [boxes, classes, num_classes], - [tf.float32, tf.int32, tf.int32]) - merged_boxes = tf.reshape(merged_boxes, [-1, 4]) - class_encodings = tf.reshape(class_encodings, [-1, num_classes]) - merged_box_indices = tf.reshape(merged_box_indices, [-1]) - return merged_boxes, class_encodings, merged_box_indices diff --git a/object_detection/utils/ops_test.py b/object_detection/utils/ops_test.py deleted file mode 100644 index 1bdd174b..00000000 --- a/object_detection/utils/ops_test.py +++ /dev/null @@ -1,1132 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.ops.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import standard_fields as fields -from object_detection.utils import ops - - -class NormalizedToImageCoordinatesTest(tf.test.TestCase): - - def test_normalized_to_image_coordinates(self): - normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4)) - normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]], - [[0.5, 0.5, 1.0, 1.0]]]) - image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32) - absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes, - image_shape, - parallel_iterations=2) - - expected_boxes = np.array([[[0, 0, 4, 4]], - [[2, 2, 4, 4]]]) - with self.test_session() as sess: - absolute_boxes = sess.run(absolute_boxes, - feed_dict={normalized_boxes: - normalized_boxes_np}) - - self.assertAllEqual(absolute_boxes, expected_boxes) - - -class MeshgridTest(tf.test.TestCase): - - def test_meshgrid_numpy_comparison(self): - """Tests meshgrid op with vectors, for which it should match numpy.""" - x = np.arange(4) - y = np.arange(6) - exp_xgrid, exp_ygrid = np.meshgrid(x, y) - xgrid, ygrid = ops.meshgrid(x, y) - with self.test_session() as sess: - xgrid_output, ygrid_output = sess.run([xgrid, ygrid]) - self.assertAllEqual(xgrid_output, exp_xgrid) - self.assertAllEqual(ygrid_output, exp_ygrid) - - def test_meshgrid_multidimensional(self): - np.random.seed(18) - x = np.random.rand(4, 1, 2).astype(np.float32) - y = np.random.rand(2, 3).astype(np.float32) - - xgrid, ygrid = ops.meshgrid(x, y) - - grid_shape = list(y.shape) + list(x.shape) - self.assertEqual(xgrid.get_shape().as_list(), grid_shape) - self.assertEqual(ygrid.get_shape().as_list(), grid_shape) - with self.test_session() as sess: - xgrid_output, ygrid_output = sess.run([xgrid, ygrid]) - - # Check the shape of the output grids - self.assertEqual(xgrid_output.shape, tuple(grid_shape)) - self.assertEqual(ygrid_output.shape, tuple(grid_shape)) - - # Check a few elements - test_elements = [((3, 0, 0), (1, 2)), - ((2, 0, 1), (0, 0)), - ((0, 0, 0), (1, 1))] - for xind, yind in test_elements: - # These are float equality tests, but the meshgrid op should not introduce - # rounding. - self.assertEqual(xgrid_output[yind + xind], x[xind]) - self.assertEqual(ygrid_output[yind + xind], y[yind]) - - -class OpsTestPadToMultiple(tf.test.TestCase): - - def test_zero_padding(self): - tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]]) - padded_tensor = ops.pad_to_multiple(tensor, 1) - with self.test_session() as sess: - padded_tensor_out = sess.run(padded_tensor) - self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape) - - def test_no_padding(self): - tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]]) - padded_tensor = ops.pad_to_multiple(tensor, 2) - with self.test_session() as sess: - padded_tensor_out = sess.run(padded_tensor) - self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape) - - def test_padding(self): - tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]]) - padded_tensor = ops.pad_to_multiple(tensor, 4) - with self.test_session() as sess: - padded_tensor_out = sess.run(padded_tensor) - self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape) - - -class OpsTestPaddedOneHotEncoding(tf.test.TestCase): - - def test_correct_one_hot_tensor_with_no_pad(self): - indices = tf.constant([1, 2, 3, 5]) - one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0) - expected_tensor = np.array([[0, 1, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 1]], np.float32) - with self.test_session() as sess: - out_one_hot_tensor = sess.run(one_hot_tensor) - self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10, - atol=1e-10) - - def test_correct_one_hot_tensor_with_pad_one(self): - indices = tf.constant([1, 2, 3, 5]) - one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1) - expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 1]], np.float32) - with self.test_session() as sess: - out_one_hot_tensor = sess.run(one_hot_tensor) - self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10, - atol=1e-10) - - def test_correct_one_hot_tensor_with_pad_three(self): - indices = tf.constant([1, 2, 3, 5]) - one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3) - expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32) - with self.test_session() as sess: - out_one_hot_tensor = sess.run(one_hot_tensor) - self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10, - atol=1e-10) - - def test_correct_padded_one_hot_tensor_with_empty_indices(self): - depth = 6 - pad = 2 - indices = tf.constant([]) - one_hot_tensor = ops.padded_one_hot_encoding( - indices, depth=depth, left_pad=pad) - expected_tensor = np.zeros((0, depth + pad)) - with self.test_session() as sess: - out_one_hot_tensor = sess.run(one_hot_tensor) - self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10, - atol=1e-10) - - def test_return_none_on_zero_depth(self): - indices = tf.constant([1, 2, 3, 4, 5]) - one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2) - self.assertEqual(one_hot_tensor, None) - - def test_raise_value_error_on_rank_two_input(self): - indices = tf.constant(1.0, shape=(2, 3)) - with self.assertRaises(ValueError): - ops.padded_one_hot_encoding(indices, depth=6, left_pad=2) - - def test_raise_value_error_on_negative_pad(self): - indices = tf.constant(1.0, shape=(2, 3)) - with self.assertRaises(ValueError): - ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1) - - def test_raise_value_error_on_float_pad(self): - indices = tf.constant(1.0, shape=(2, 3)) - with self.assertRaises(ValueError): - ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1) - - def test_raise_value_error_on_float_depth(self): - indices = tf.constant(1.0, shape=(2, 3)) - with self.assertRaises(ValueError): - ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2) - - -class OpsDenseToSparseBoxesTest(tf.test.TestCase): - - def test_return_all_boxes_when_all_input_boxes_are_valid(self): - num_classes = 4 - num_valid_boxes = 3 - code_size = 4 - dense_location_placeholder = tf.placeholder(tf.float32, - shape=(num_valid_boxes, - code_size)) - dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes)) - box_locations, box_classes = ops.dense_to_sparse_boxes( - dense_location_placeholder, dense_num_boxes_placeholder, num_classes) - feed_dict = {dense_location_placeholder: np.random.uniform( - size=[num_valid_boxes, code_size]), - dense_num_boxes_placeholder: np.array([1, 0, 0, 2], - dtype=np.int32)} - - expected_box_locations = feed_dict[dense_location_placeholder] - expected_box_classses = np.array([0, 3, 3]) - with self.test_session() as sess: - box_locations, box_classes = sess.run([box_locations, box_classes], - feed_dict=feed_dict) - - self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6, - atol=1e-6) - self.assertAllEqual(box_classes, expected_box_classses) - - def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self): - num_classes = 4 - num_valid_boxes = 3 - num_boxes = 10 - code_size = 4 - - dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes, - code_size)) - dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes)) - box_locations, box_classes = ops.dense_to_sparse_boxes( - dense_location_placeholder, dense_num_boxes_placeholder, num_classes) - feed_dict = {dense_location_placeholder: np.random.uniform( - size=[num_boxes, code_size]), - dense_num_boxes_placeholder: np.array([1, 0, 0, 2], - dtype=np.int32)} - - expected_box_locations = (feed_dict[dense_location_placeholder] - [:num_valid_boxes]) - expected_box_classses = np.array([0, 3, 3]) - with self.test_session() as sess: - box_locations, box_classes = sess.run([box_locations, box_classes], - feed_dict=feed_dict) - - self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6, - atol=1e-6) - self.assertAllEqual(box_classes, expected_box_classses) - - -class OpsTestIndicesToDenseVector(tf.test.TestCase): - - def test_indices_to_dense_vector(self): - size = 10000 - num_indices = np.random.randint(size) - rand_indices = np.random.permutation(np.arange(size))[0:num_indices] - - expected_output = np.zeros(size, dtype=np.float32) - expected_output[rand_indices] = 1. - - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector(tf_rand_indices, size) - - with self.test_session() as sess: - output = sess.run(indicator) - self.assertAllEqual(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - def test_indices_to_dense_vector_size_at_inference(self): - size = 5000 - num_indices = 250 - all_indices = np.arange(size) - rand_indices = np.random.permutation(all_indices)[0:num_indices] - - expected_output = np.zeros(size, dtype=np.float32) - expected_output[rand_indices] = 1. - - tf_all_indices = tf.placeholder(tf.int32) - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector(tf_rand_indices, - tf.shape(tf_all_indices)[0]) - feed_dict = {tf_all_indices: all_indices} - - with self.test_session() as sess: - output = sess.run(indicator, feed_dict=feed_dict) - self.assertAllEqual(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - def test_indices_to_dense_vector_int(self): - size = 500 - num_indices = 25 - rand_indices = np.random.permutation(np.arange(size))[0:num_indices] - - expected_output = np.zeros(size, dtype=np.int64) - expected_output[rand_indices] = 1 - - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector( - tf_rand_indices, size, 1, dtype=tf.int64) - - with self.test_session() as sess: - output = sess.run(indicator) - self.assertAllEqual(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - def test_indices_to_dense_vector_custom_values(self): - size = 100 - num_indices = 10 - rand_indices = np.random.permutation(np.arange(size))[0:num_indices] - indices_value = np.random.rand(1) - default_value = np.random.rand(1) - - expected_output = np.float32(np.ones(size) * default_value) - expected_output[rand_indices] = indices_value - - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector( - tf_rand_indices, - size, - indices_value=indices_value, - default_value=default_value) - - with self.test_session() as sess: - output = sess.run(indicator) - self.assertAllClose(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - def test_indices_to_dense_vector_all_indices_as_input(self): - size = 500 - num_indices = 500 - rand_indices = np.random.permutation(np.arange(size))[0:num_indices] - - expected_output = np.ones(size, dtype=np.float32) - - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector(tf_rand_indices, size) - - with self.test_session() as sess: - output = sess.run(indicator) - self.assertAllEqual(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - def test_indices_to_dense_vector_empty_indices_as_input(self): - size = 500 - rand_indices = [] - - expected_output = np.zeros(size, dtype=np.float32) - - tf_rand_indices = tf.constant(rand_indices) - indicator = ops.indices_to_dense_vector(tf_rand_indices, size) - - with self.test_session() as sess: - output = sess.run(indicator) - self.assertAllEqual(output, expected_output) - self.assertEqual(output.dtype, expected_output.dtype) - - -class GroundtruthFilterTest(tf.test.TestCase): - - def test_filter_groundtruth(self): - input_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - input_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - input_classes = tf.placeholder(tf.int32, shape=(None,)) - input_is_crowd = tf.placeholder(tf.bool, shape=(None,)) - input_area = tf.placeholder(tf.float32, shape=(None,)) - input_difficult = tf.placeholder(tf.float32, shape=(None,)) - input_label_types = tf.placeholder(tf.string, shape=(None,)) - valid_indices = tf.placeholder(tf.int32, shape=(None,)) - input_tensors = { - fields.InputDataFields.image: input_image, - fields.InputDataFields.groundtruth_boxes: input_boxes, - fields.InputDataFields.groundtruth_classes: input_classes, - fields.InputDataFields.groundtruth_is_crowd: input_is_crowd, - fields.InputDataFields.groundtruth_area: input_area, - fields.InputDataFields.groundtruth_difficult: input_difficult, - fields.InputDataFields.groundtruth_label_types: input_label_types - } - output_tensors = ops.retain_groundtruth(input_tensors, valid_indices) - - image_tensor = np.random.rand(224, 224, 3) - feed_dict = { - input_image: image_tensor, - input_boxes: - np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float), - input_classes: - np.array([1, 2], dtype=np.int32), - input_is_crowd: - np.array([False, True], dtype=np.bool), - input_area: - np.array([32, 48], dtype=np.float32), - input_difficult: - np.array([True, False], dtype=np.bool), - input_label_types: - np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_), - valid_indices: - np.array([0], dtype=np.int32) - } - expected_tensors = { - fields.InputDataFields.image: - image_tensor, - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1], - fields.InputDataFields.groundtruth_is_crowd: - [False], - fields.InputDataFields.groundtruth_area: - [32], - fields.InputDataFields.groundtruth_difficult: - [True], - fields.InputDataFields.groundtruth_label_types: - ['APPROPRIATE'] - } - with self.test_session() as sess: - output_tensors = sess.run(output_tensors, feed_dict=feed_dict) - for key in [fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_area]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_is_crowd, - fields.InputDataFields.groundtruth_label_types]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - def test_filter_with_missing_fields(self): - input_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - input_classes = tf.placeholder(tf.int32, shape=(None,)) - input_tensors = { - fields.InputDataFields.groundtruth_boxes: input_boxes, - fields.InputDataFields.groundtruth_classes: input_classes - } - valid_indices = tf.placeholder(tf.int32, shape=(None,)) - - feed_dict = { - input_boxes: - np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float), - input_classes: - np.array([1, 2], dtype=np.int32), - valid_indices: - np.array([0], dtype=np.int32) - } - expected_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1] - } - - output_tensors = ops.retain_groundtruth(input_tensors, valid_indices) - with self.test_session() as sess: - output_tensors = sess.run(output_tensors, feed_dict=feed_dict) - for key in [fields.InputDataFields.groundtruth_boxes]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - def test_filter_with_empty_fields(self): - input_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - input_classes = tf.placeholder(tf.int32, shape=(None,)) - input_is_crowd = tf.placeholder(tf.bool, shape=(None,)) - input_area = tf.placeholder(tf.float32, shape=(None,)) - input_difficult = tf.placeholder(tf.float32, shape=(None,)) - valid_indices = tf.placeholder(tf.int32, shape=(None,)) - input_tensors = { - fields.InputDataFields.groundtruth_boxes: input_boxes, - fields.InputDataFields.groundtruth_classes: input_classes, - fields.InputDataFields.groundtruth_is_crowd: input_is_crowd, - fields.InputDataFields.groundtruth_area: input_area, - fields.InputDataFields.groundtruth_difficult: input_difficult - } - output_tensors = ops.retain_groundtruth(input_tensors, valid_indices) - - feed_dict = { - input_boxes: - np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float), - input_classes: - np.array([1, 2], dtype=np.int32), - input_is_crowd: - np.array([False, True], dtype=np.bool), - input_area: - np.array([], dtype=np.float32), - input_difficult: - np.array([], dtype=np.float32), - valid_indices: - np.array([0], dtype=np.int32) - } - expected_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1], - fields.InputDataFields.groundtruth_is_crowd: - [False], - fields.InputDataFields.groundtruth_area: - [], - fields.InputDataFields.groundtruth_difficult: - [] - } - with self.test_session() as sess: - output_tensors = sess.run(output_tensors, feed_dict=feed_dict) - for key in [fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_area]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_is_crowd]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - def test_filter_with_empty_groundtruth_boxes(self): - input_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - input_classes = tf.placeholder(tf.int32, shape=(None,)) - input_is_crowd = tf.placeholder(tf.bool, shape=(None,)) - input_area = tf.placeholder(tf.float32, shape=(None,)) - input_difficult = tf.placeholder(tf.float32, shape=(None,)) - valid_indices = tf.placeholder(tf.int32, shape=(None,)) - input_tensors = { - fields.InputDataFields.groundtruth_boxes: input_boxes, - fields.InputDataFields.groundtruth_classes: input_classes, - fields.InputDataFields.groundtruth_is_crowd: input_is_crowd, - fields.InputDataFields.groundtruth_area: input_area, - fields.InputDataFields.groundtruth_difficult: input_difficult - } - output_tensors = ops.retain_groundtruth(input_tensors, valid_indices) - - feed_dict = { - input_boxes: - np.array([], dtype=np.float).reshape(0, 4), - input_classes: - np.array([], dtype=np.int32), - input_is_crowd: - np.array([], dtype=np.bool), - input_area: - np.array([], dtype=np.float32), - input_difficult: - np.array([], dtype=np.float32), - valid_indices: - np.array([], dtype=np.int32) - } - with self.test_session() as sess: - output_tensors = sess.run(output_tensors, feed_dict=feed_dict) - for key in input_tensors: - if key == fields.InputDataFields.groundtruth_boxes: - self.assertAllEqual([0, 4], output_tensors[key].shape) - else: - self.assertAllEqual([0], output_tensors[key].shape) - - -class RetainGroundTruthWithPositiveClasses(tf.test.TestCase): - - def test_filter_groundtruth_with_positive_classes(self): - input_image = tf.placeholder(tf.float32, shape=(None, None, 3)) - input_boxes = tf.placeholder(tf.float32, shape=(None, 4)) - input_classes = tf.placeholder(tf.int32, shape=(None,)) - input_is_crowd = tf.placeholder(tf.bool, shape=(None,)) - input_area = tf.placeholder(tf.float32, shape=(None,)) - input_difficult = tf.placeholder(tf.float32, shape=(None,)) - input_label_types = tf.placeholder(tf.string, shape=(None,)) - valid_indices = tf.placeholder(tf.int32, shape=(None,)) - input_tensors = { - fields.InputDataFields.image: input_image, - fields.InputDataFields.groundtruth_boxes: input_boxes, - fields.InputDataFields.groundtruth_classes: input_classes, - fields.InputDataFields.groundtruth_is_crowd: input_is_crowd, - fields.InputDataFields.groundtruth_area: input_area, - fields.InputDataFields.groundtruth_difficult: input_difficult, - fields.InputDataFields.groundtruth_label_types: input_label_types - } - output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors) - - image_tensor = np.random.rand(224, 224, 3) - feed_dict = { - input_image: image_tensor, - input_boxes: - np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float), - input_classes: - np.array([1, 0], dtype=np.int32), - input_is_crowd: - np.array([False, True], dtype=np.bool), - input_area: - np.array([32, 48], dtype=np.float32), - input_difficult: - np.array([True, False], dtype=np.bool), - input_label_types: - np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_), - valid_indices: - np.array([0], dtype=np.int32) - } - expected_tensors = { - fields.InputDataFields.image: - image_tensor, - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1], - fields.InputDataFields.groundtruth_is_crowd: - [False], - fields.InputDataFields.groundtruth_area: - [32], - fields.InputDataFields.groundtruth_difficult: - [True], - fields.InputDataFields.groundtruth_label_types: - ['APPROPRIATE'] - } - with self.test_session() as sess: - output_tensors = sess.run(output_tensors, feed_dict=feed_dict) - for key in [fields.InputDataFields.image, - fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_area]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_is_crowd, - fields.InputDataFields.groundtruth_label_types]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - -class ReplaceNaNGroundtruthLabelScoresWithOnes(tf.test.TestCase): - - def test_replace_nan_groundtruth_label_scores_with_ones(self): - label_scores = tf.constant([np.nan, 1.0, np.nan]) - output_tensor = ops.replace_nan_groundtruth_label_scores_with_ones( - label_scores) - expected_tensor = [1.0, 1.0, 1.0] - with self.test_session(): - output_tensor = output_tensor.eval() - self.assertAllClose(expected_tensor, output_tensor) - - def test_input_equals_output_when_no_nans(self): - input_label_scores = [0.5, 1.0, 1.0] - label_scores_tensor = tf.constant(input_label_scores) - output_label_scores = ops.replace_nan_groundtruth_label_scores_with_ones( - label_scores_tensor) - with self.test_session(): - output_label_scores = output_label_scores.eval() - self.assertAllClose(input_label_scores, output_label_scores) - - -class GroundtruthFilterWithCrowdBoxesTest(tf.test.TestCase): - - def test_filter_groundtruth_with_crowd_boxes(self): - input_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[0.1, 0.2, 0.6, 0.8], [0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1, 2], - fields.InputDataFields.groundtruth_is_crowd: - [True, False], - fields.InputDataFields.groundtruth_area: - [100.0, 238.7] - } - - expected_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [2], - fields.InputDataFields.groundtruth_is_crowd: - [False], - fields.InputDataFields.groundtruth_area: - [238.7] - } - - output_tensors = ops.filter_groundtruth_with_crowd_boxes( - input_tensors) - with self.test_session() as sess: - output_tensors = sess.run(output_tensors) - for key in [fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_area]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_is_crowd]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - -class GroundtruthFilterWithNanBoxTest(tf.test.TestCase): - - def test_filter_groundtruth_with_nan_box_coordinates(self): - input_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [1, 2], - fields.InputDataFields.groundtruth_is_crowd: - [False, True], - fields.InputDataFields.groundtruth_area: - [100.0, 238.7] - } - - expected_tensors = { - fields.InputDataFields.groundtruth_boxes: - [[0.2, 0.4, 0.1, 0.8]], - fields.InputDataFields.groundtruth_classes: - [2], - fields.InputDataFields.groundtruth_is_crowd: - [True], - fields.InputDataFields.groundtruth_area: - [238.7] - } - - output_tensors = ops.filter_groundtruth_with_nan_box_coordinates( - input_tensors) - with self.test_session() as sess: - output_tensors = sess.run(output_tensors) - for key in [fields.InputDataFields.groundtruth_boxes, - fields.InputDataFields.groundtruth_area]: - self.assertAllClose(expected_tensors[key], output_tensors[key]) - for key in [fields.InputDataFields.groundtruth_classes, - fields.InputDataFields.groundtruth_is_crowd]: - self.assertAllEqual(expected_tensors[key], output_tensors[key]) - - -class OpsTestNormalizeToTarget(tf.test.TestCase): - - def test_create_normalize_to_target(self): - inputs = tf.random_uniform([5, 10, 12, 3]) - target_norm_value = 4.0 - dim = 3 - with self.test_session(): - output = ops.normalize_to_target(inputs, target_norm_value, dim) - self.assertEqual(output.op.name, 'NormalizeToTarget/mul') - var_name = tf.contrib.framework.get_variables()[0].name - self.assertEqual(var_name, 'NormalizeToTarget/weights:0') - - def test_invalid_dim(self): - inputs = tf.random_uniform([5, 10, 12, 3]) - target_norm_value = 4.0 - dim = 10 - with self.assertRaisesRegexp( - ValueError, - 'dim must be non-negative but smaller than the input rank.'): - ops.normalize_to_target(inputs, target_norm_value, dim) - - def test_invalid_target_norm_values(self): - inputs = tf.random_uniform([5, 10, 12, 3]) - target_norm_value = [4.0, 4.0] - dim = 3 - with self.assertRaisesRegexp( - ValueError, 'target_norm_value must be a float or a list of floats'): - ops.normalize_to_target(inputs, target_norm_value, dim) - - def test_correct_output_shape(self): - inputs = tf.random_uniform([5, 10, 12, 3]) - target_norm_value = 4.0 - dim = 3 - with self.test_session(): - output = ops.normalize_to_target(inputs, target_norm_value, dim) - self.assertEqual(output.get_shape().as_list(), - inputs.get_shape().as_list()) - - def test_correct_initial_output_values(self): - inputs = tf.constant([[[[3, 4], [7, 24]], - [[5, -12], [-1, 0]]]], tf.float32) - target_norm_value = 10.0 - dim = 3 - expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]], - [[50/13.0, -120/13.0], [-10, 0]]]] - with self.test_session() as sess: - normalized_inputs = ops.normalize_to_target(inputs, target_norm_value, - dim) - sess.run(tf.global_variables_initializer()) - output = normalized_inputs.eval() - self.assertAllClose(output, expected_output) - - def test_multiple_target_norm_values(self): - inputs = tf.constant([[[[3, 4], [7, 24]], - [[5, -12], [-1, 0]]]], tf.float32) - target_norm_value = [10.0, 20.0] - dim = 3 - expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]], - [[50/13.0, -240/13.0], [-10, 0]]]] - with self.test_session() as sess: - normalized_inputs = ops.normalize_to_target(inputs, target_norm_value, - dim) - sess.run(tf.global_variables_initializer()) - output = normalized_inputs.eval() - self.assertAllClose(output, expected_output) - - -class OpsTestPositionSensitiveCropRegions(tf.test.TestCase): - - def test_position_sensitive(self): - num_spatial_bins = [3, 2] - image_shape = [1, 3, 2, 6] - - # First channel is 1's, second channel is 2's, etc. - image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32, - shape=image_shape) - boxes = tf.random_uniform((2, 4)) - box_ind = tf.constant([0, 0], dtype=tf.int32) - - # The result for both boxes should be [[1, 2], [3, 4], [5, 6]] - # before averaging. - expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1]) - - for crop_size_mult in range(1, 3): - crop_size = [3 * crop_size_mult, 2 * crop_size_mult] - ps_crop_and_pool = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - - with self.test_session() as sess: - output = sess.run(ps_crop_and_pool) - self.assertAllClose(output, expected_output) - - def test_position_sensitive_with_equal_channels(self): - num_spatial_bins = [2, 2] - image_shape = [1, 3, 3, 4] - crop_size = [2, 2] - - image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32, - shape=[1, 3, 3, 1]) - tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]]) - boxes = tf.random_uniform((3, 4)) - box_ind = tf.constant([0, 0, 0], dtype=tf.int32) - - # All channels are equal so position-sensitive crop and resize should - # work as the usual crop and resize for just one channel. - crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size) - crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True) - - ps_crop_and_pool = ops.position_sensitive_crop_regions( - tiled_image, - boxes, - box_ind, - crop_size, - num_spatial_bins, - global_pool=True) - - with self.test_session() as sess: - expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool)) - self.assertAllClose(output, expected_output) - - def test_position_sensitive_with_single_bin(self): - num_spatial_bins = [1, 1] - image_shape = [2, 3, 3, 4] - crop_size = [2, 2] - - image = tf.random_uniform(image_shape) - boxes = tf.random_uniform((6, 4)) - box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32) - - # When a single bin is used, position-sensitive crop and pool should be - # the same as non-position sensitive crop and pool. - crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size) - crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True) - - ps_crop_and_pool = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - - with self.test_session() as sess: - expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool)) - self.assertAllClose(output, expected_output) - - def test_raise_value_error_on_num_bins_less_than_one(self): - num_spatial_bins = [1, -1] - image_shape = [1, 1, 1, 2] - crop_size = [2, 2] - - image = tf.constant(1, dtype=tf.float32, shape=image_shape) - boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32) - box_ind = tf.constant([0], dtype=tf.int32) - - with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'): - ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - - def test_raise_value_error_on_non_divisible_crop_size(self): - num_spatial_bins = [2, 3] - image_shape = [1, 1, 1, 6] - crop_size = [3, 2] - - image = tf.constant(1, dtype=tf.float32, shape=image_shape) - boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32) - box_ind = tf.constant([0], dtype=tf.int32) - - with self.assertRaisesRegexp( - ValueError, 'crop_size should be divisible by num_spatial_bins'): - ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - - def test_raise_value_error_on_non_divisible_num_channels(self): - num_spatial_bins = [2, 2] - image_shape = [1, 1, 1, 5] - crop_size = [2, 2] - - image = tf.constant(1, dtype=tf.float32, shape=image_shape) - boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32) - box_ind = tf.constant([0], dtype=tf.int32) - - with self.assertRaisesRegexp( - ValueError, 'Dimension size must be evenly divisible by 4 but is 5'): - ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - - def test_position_sensitive_with_global_pool_false(self): - num_spatial_bins = [3, 2] - image_shape = [1, 3, 2, 6] - num_boxes = 2 - - # First channel is 1's, second channel is 2's, etc. - image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32, - shape=image_shape) - boxes = tf.random_uniform((num_boxes, 4)) - box_ind = tf.constant([0, 0], dtype=tf.int32) - - expected_output = [] - - # Expected output, when crop_size = [3, 2]. - expected_output.append(np.expand_dims( - np.tile(np.array([[1, 2], - [3, 4], - [5, 6]]), (num_boxes, 1, 1)), - axis=-1)) - - # Expected output, when crop_size = [6, 4]. - expected_output.append(np.expand_dims( - np.tile(np.array([[1, 1, 2, 2], - [1, 1, 2, 2], - [3, 3, 4, 4], - [3, 3, 4, 4], - [5, 5, 6, 6], - [5, 5, 6, 6]]), (num_boxes, 1, 1)), - axis=-1)) - - for crop_size_mult in range(1, 3): - crop_size = [3 * crop_size_mult, 2 * crop_size_mult] - ps_crop = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False) - with self.test_session() as sess: - output = sess.run(ps_crop) - - self.assertAllEqual(output, expected_output[crop_size_mult - 1]) - - def test_position_sensitive_with_global_pool_false_and_known_boxes(self): - num_spatial_bins = [2, 2] - image_shape = [2, 2, 2, 4] - crop_size = [2, 2] - - image = tf.constant(range(1, 2 * 2 * 4 + 1) * 2, dtype=tf.float32, - shape=image_shape) - - # First box contains whole image, and second box contains only first row. - boxes = tf.constant(np.array([[0., 0., 1., 1.], - [0., 0., 0.5, 1.]]), dtype=tf.float32) - box_ind = tf.constant([0, 1], dtype=tf.int32) - - expected_output = [] - - # Expected output, when the box containing whole image. - expected_output.append( - np.reshape(np.array([[4, 7], - [10, 13]]), - (1, 2, 2, 1)) - ) - - # Expected output, when the box containing only first row. - expected_output.append( - np.reshape(np.array([[3, 6], - [7, 10]]), - (1, 2, 2, 1)) - ) - expected_output = np.concatenate(expected_output, axis=0) - - ps_crop = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False) - - with self.test_session() as sess: - output = sess.run(ps_crop) - self.assertAllEqual(output, expected_output) - - def test_position_sensitive_with_global_pool_false_and_single_bin(self): - num_spatial_bins = [1, 1] - image_shape = [2, 3, 3, 4] - crop_size = [1, 1] - - image = tf.random_uniform(image_shape) - boxes = tf.random_uniform((6, 4)) - box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32) - - # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize), - # the outputs are the same whatever the global_pool value is. - ps_crop_and_pool = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True) - ps_crop = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False) - - with self.test_session() as sess: - pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop)) - self.assertAllClose(pooled_output, unpooled_output) - - def test_position_sensitive_with_global_pool_false_and_do_global_pool(self): - num_spatial_bins = [3, 2] - image_shape = [1, 3, 2, 6] - num_boxes = 2 - - # First channel is 1's, second channel is 2's, etc. - image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32, - shape=image_shape) - boxes = tf.random_uniform((num_boxes, 4)) - box_ind = tf.constant([0, 0], dtype=tf.int32) - - expected_output = [] - - # Expected output, when crop_size = [3, 2]. - expected_output.append(np.mean( - np.expand_dims( - np.tile(np.array([[1, 2], - [3, 4], - [5, 6]]), (num_boxes, 1, 1)), - axis=-1), - axis=(1, 2), keepdims=True)) - - # Expected output, when crop_size = [6, 4]. - expected_output.append(np.mean( - np.expand_dims( - np.tile(np.array([[1, 1, 2, 2], - [1, 1, 2, 2], - [3, 3, 4, 4], - [3, 3, 4, 4], - [5, 5, 6, 6], - [5, 5, 6, 6]]), (num_boxes, 1, 1)), - axis=-1), - axis=(1, 2), keepdims=True)) - - for crop_size_mult in range(1, 3): - crop_size = [3 * crop_size_mult, 2 * crop_size_mult] - - # Perform global_pooling after running the function with - # global_pool=False. - ps_crop = ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False) - ps_crop_and_pool = tf.reduce_mean( - ps_crop, reduction_indices=(1, 2), keep_dims=True) - - with self.test_session() as sess: - output = sess.run(ps_crop_and_pool) - - self.assertAllEqual(output, expected_output[crop_size_mult - 1]) - - def test_raise_value_error_on_non_square_block_size(self): - num_spatial_bins = [3, 2] - image_shape = [1, 3, 2, 6] - crop_size = [6, 2] - - image = tf.constant(1, dtype=tf.float32, shape=image_shape) - boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32) - box_ind = tf.constant([0], dtype=tf.int32) - - with self.assertRaisesRegexp( - ValueError, 'Only support square bin crop size for now.'): - ops.position_sensitive_crop_regions( - image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False) - - -class ReframeBoxMasksToImageMasksTest(tf.test.TestCase): - - def testZeroImageOnEmptyMask(self): - box_masks = tf.constant([[[0, 0], - [0, 0]]], dtype=tf.float32) - boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32) - image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, - image_height=4, - image_width=4) - np_expected_image_masks = np.array([[[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]], dtype=np.float32) - with self.test_session() as sess: - np_image_masks = sess.run(image_masks) - self.assertAllClose(np_image_masks, np_expected_image_masks) - - def testMaskIsCenteredInImageWhenBoxIsCentered(self): - box_masks = tf.constant([[[1, 1], - [1, 1]]], dtype=tf.float32) - boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32) - image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, - image_height=4, - image_width=4) - np_expected_image_masks = np.array([[[0, 0, 0, 0], - [0, 1, 1, 0], - [0, 1, 1, 0], - [0, 0, 0, 0]]], dtype=np.float32) - with self.test_session() as sess: - np_image_masks = sess.run(image_masks) - self.assertAllClose(np_image_masks, np_expected_image_masks) - - def testMaskOffCenterRemainsOffCenterInImage(self): - box_masks = tf.constant([[[1, 0], - [0, 1]]], dtype=tf.float32) - boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) - image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, - image_height=4, - image_width=4) - np_expected_image_masks = np.array([[[0, 0, 0, 0], - [0, 0, 0.6111111, 0.16666669], - [0, 0, 0.3888889, 0.83333337], - [0, 0, 0, 0]]], dtype=np.float32) - with self.test_session() as sess: - np_image_masks = sess.run(image_masks) - self.assertAllClose(np_image_masks, np_expected_image_masks) - - -class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase): - - def testMergeBoxesWithMultipleLabels(self): - boxes = tf.constant( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75], - [0.25, 0.25, 0.75, 0.75]], - dtype=tf.float32) - class_indices = tf.constant([0, 4, 2], dtype=tf.int32) - num_classes = 5 - merged_boxes, merged_classes, merged_box_indices = ( - ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes)) - expected_merged_boxes = np.array( - [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32) - expected_merged_classes = np.array( - [[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32) - expected_merged_box_indices = np.array([0, 1], dtype=np.int32) - with self.test_session() as sess: - np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run( - [merged_boxes, merged_classes, merged_box_indices]) - if np_merged_classes[0, 0] != 1: - expected_merged_boxes = expected_merged_boxes[::-1, :] - expected_merged_classes = expected_merged_classes[::-1, :] - expected_merged_box_indices = expected_merged_box_indices[::-1, :] - self.assertAllClose(np_merged_boxes, expected_merged_boxes) - self.assertAllClose(np_merged_classes, expected_merged_classes) - self.assertAllClose(np_merged_box_indices, expected_merged_box_indices) - - def testMergeBoxesWithEmptyInputs(self): - boxes = tf.constant([[]]) - class_indices = tf.constant([]) - num_classes = 5 - merged_boxes, merged_classes, merged_box_indices = ( - ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes)) - with self.test_session() as sess: - np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run( - [merged_boxes, merged_classes, merged_box_indices]) - self.assertAllEqual(np_merged_boxes.shape, [0, 4]) - self.assertAllEqual(np_merged_classes.shape, [0, 5]) - self.assertAllEqual(np_merged_box_indices.shape, [0]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/per_image_evaluation.py b/object_detection/utils/per_image_evaluation.py deleted file mode 100644 index eb7001fc..00000000 --- a/object_detection/utils/per_image_evaluation.py +++ /dev/null @@ -1,312 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Evaluate Object Detection result on a single image. - -Annotate each detected result as true positives or false positive according to -a predefined IOU ratio. Non Maximum Supression is used by default. Multi class -detection is supported by default. -""" -import numpy as np - -from object_detection.utils import np_box_list -from object_detection.utils import np_box_list_ops - - -class PerImageEvaluation(object): - """Evaluate detection result of a single image.""" - - def __init__(self, - num_groundtruth_classes, - matching_iou_threshold=0.5, - nms_iou_threshold=0.3, - nms_max_output_boxes=50): - """Initialized PerImageEvaluation by evaluation parameters. - - Args: - num_groundtruth_classes: Number of ground truth object classes - matching_iou_threshold: A ratio of area intersection to union, which is - the threshold to consider whether a detection is true positive or not - nms_iou_threshold: IOU threshold used in Non Maximum Suppression. - nms_max_output_boxes: Number of maximum output boxes in NMS. - """ - self.matching_iou_threshold = matching_iou_threshold - self.nms_iou_threshold = nms_iou_threshold - self.nms_max_output_boxes = nms_max_output_boxes - self.num_groundtruth_classes = num_groundtruth_classes - - def compute_object_detection_metrics( - self, detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels, - groundtruth_is_difficult_lists, groundtruth_is_group_of_list): - """Evaluates detections as being tp, fp or ignored from a single image. - - The evaluation is done in two stages: - 1. All detections are matched to non group-of boxes; true positives are - determined and detections matched to difficult boxes are ignored. - 2. Detections that are determined as false positives are matched against - group-of boxes and ignored if matched. - - Args: - detected_boxes: A float numpy array of shape [N, 4], representing N - regions of detected object regions. - Each row is of the format [y_min, x_min, y_max, x_max] - detected_scores: A float numpy array of shape [N, 1], representing - the confidence scores of the detected N object instances. - detected_class_labels: A integer numpy array of shape [N, 1], repreneting - the class labels of the detected N object instances. - groundtruth_boxes: A float numpy array of shape [M, 4], representing M - regions of object instances in ground truth - groundtruth_class_labels: An integer numpy array of shape [M, 1], - representing M class labels of object instances in ground truth - groundtruth_is_difficult_lists: A boolean numpy array of length M denoting - whether a ground truth box is a difficult instance or not - groundtruth_is_group_of_list: A boolean numpy array of length M denoting - whether a ground truth box has group-of tag - - Returns: - scores: A list of C float numpy arrays. Each numpy array is of - shape [K, 1], representing K scores detected with object class - label c - tp_fp_labels: A list of C boolean numpy arrays. Each numpy array - is of shape [K, 1], representing K True/False positive label of - object instances detected with class label c - is_class_correctly_detected_in_image: a numpy integer array of - shape [C, 1], indicating whether the correponding class has a least - one instance being correctly detected in the image - """ - detected_boxes, detected_scores, detected_class_labels = ( - self._remove_invalid_boxes(detected_boxes, detected_scores, - detected_class_labels)) - scores, tp_fp_labels = self._compute_tp_fp( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels, - groundtruth_is_difficult_lists, groundtruth_is_group_of_list) - - is_class_correctly_detected_in_image = self._compute_cor_loc( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels) - return scores, tp_fp_labels, is_class_correctly_detected_in_image - - def _compute_cor_loc(self, detected_boxes, detected_scores, - detected_class_labels, groundtruth_boxes, - groundtruth_class_labels): - """Compute CorLoc score for object detection result. - - Args: - detected_boxes: A float numpy array of shape [N, 4], representing N - regions of detected object regions. - Each row is of the format [y_min, x_min, y_max, x_max] - detected_scores: A float numpy array of shape [N, 1], representing - the confidence scores of the detected N object instances. - detected_class_labels: A integer numpy array of shape [N, 1], repreneting - the class labels of the detected N object instances. - groundtruth_boxes: A float numpy array of shape [M, 4], representing M - regions of object instances in ground truth - groundtruth_class_labels: An integer numpy array of shape [M, 1], - representing M class labels of object instances in ground truth - Returns: - is_class_correctly_detected_in_image: a numpy integer array of - shape [C, 1], indicating whether the correponding class has a least - one instance being correctly detected in the image - """ - is_class_correctly_detected_in_image = np.zeros( - self.num_groundtruth_classes, dtype=int) - for i in range(self.num_groundtruth_classes): - gt_boxes_at_ith_class = groundtruth_boxes[groundtruth_class_labels == - i, :] - detected_boxes_at_ith_class = detected_boxes[detected_class_labels == - i, :] - detected_scores_at_ith_class = detected_scores[detected_class_labels == i] - is_class_correctly_detected_in_image[i] = ( - self._compute_is_aclass_correctly_detected_in_image( - detected_boxes_at_ith_class, detected_scores_at_ith_class, - gt_boxes_at_ith_class)) - - return is_class_correctly_detected_in_image - - def _compute_is_aclass_correctly_detected_in_image( - self, detected_boxes, detected_scores, groundtruth_boxes): - """Compute CorLoc score for a single class. - - Args: - detected_boxes: A numpy array of shape [N, 4] representing detected box - coordinates - detected_scores: A 1-d numpy array of length N representing classification - score - groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth - box coordinates - - Returns: - is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a - class is correctly detected in the image or not - """ - if detected_boxes.size > 0: - if groundtruth_boxes.size > 0: - max_score_id = np.argmax(detected_scores) - detected_boxlist = np_box_list.BoxList( - np.expand_dims(detected_boxes[max_score_id, :], axis=0)) - gt_boxlist = np_box_list.BoxList(groundtruth_boxes) - iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist) - if np.max(iou) >= self.matching_iou_threshold: - return 1 - return 0 - - def _compute_tp_fp(self, detected_boxes, detected_scores, - detected_class_labels, groundtruth_boxes, - groundtruth_class_labels, groundtruth_is_difficult_lists, - groundtruth_is_group_of_list): - """Labels true/false positives of detections of an image across all classes. - - Args: - detected_boxes: A float numpy array of shape [N, 4], representing N - regions of detected object regions. - Each row is of the format [y_min, x_min, y_max, x_max] - detected_scores: A float numpy array of shape [N, 1], representing - the confidence scores of the detected N object instances. - detected_class_labels: A integer numpy array of shape [N, 1], repreneting - the class labels of the detected N object instances. - groundtruth_boxes: A float numpy array of shape [M, 4], representing M - regions of object instances in ground truth - groundtruth_class_labels: An integer numpy array of shape [M, 1], - representing M class labels of object instances in ground truth - groundtruth_is_difficult_lists: A boolean numpy array of length M denoting - whether a ground truth box is a difficult instance or not - groundtruth_is_group_of_list: A boolean numpy array of length M denoting - whether a ground truth box has group-of tag - - Returns: - result_scores: A list of float numpy arrays. Each numpy array is of - shape [K, 1], representing K scores detected with object class - label c - result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of - shape [K, 1], representing K True/False positive label of object - instances detected with class label c - """ - result_scores = [] - result_tp_fp_labels = [] - for i in range(self.num_groundtruth_classes): - gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i - ), :] - groundtruth_is_difficult_list_at_ith_class = ( - groundtruth_is_difficult_lists[groundtruth_class_labels == i]) - groundtruth_is_group_of_list_at_ith_class = ( - groundtruth_is_group_of_list[groundtruth_class_labels == i]) - detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i - ), :] - detected_scores_at_ith_class = detected_scores[detected_class_labels == i] - scores, tp_fp_labels = self._compute_tp_fp_for_single_class( - detected_boxes_at_ith_class, detected_scores_at_ith_class, - gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class, - groundtruth_is_group_of_list_at_ith_class) - result_scores.append(scores) - result_tp_fp_labels.append(tp_fp_labels) - return result_scores, result_tp_fp_labels - - def _remove_invalid_boxes(self, detected_boxes, detected_scores, - detected_class_labels): - valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2], - detected_boxes[:, 1] < detected_boxes[:, 3]) - return (detected_boxes[valid_indices, :], detected_scores[valid_indices], - detected_class_labels[valid_indices]) - - def _compute_tp_fp_for_single_class( - self, detected_boxes, detected_scores, groundtruth_boxes, - groundtruth_is_difficult_list, groundtruth_is_group_of_list): - """Labels boxes detected with the same class from the same image as tp/fp. - - Args: - detected_boxes: A numpy array of shape [N, 4] representing detected box - coordinates - detected_scores: A 1-d numpy array of length N representing classification - score - groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth - box coordinates - groundtruth_is_difficult_list: A boolean numpy array of length M denoting - whether a ground truth box is a difficult instance or not. If a - groundtruth box is difficult, every detection matching this box - is ignored. - groundtruth_is_group_of_list: A boolean numpy array of length M denoting - whether a ground truth box has group-of tag. If a groundtruth box - is group-of box, every detection matching this box is ignored. - - Returns: - Two arrays of the same size, containing all boxes that were evaluated as - being true positives or false positives; if a box matched to a difficult - box or to a group-of box, it is ignored. - - scores: A numpy array representing the detection scores. - tp_fp_labels: a boolean numpy array indicating whether a detection is a - true positive. - - """ - if detected_boxes.size == 0: - return np.array([], dtype=float), np.array([], dtype=bool) - detected_boxlist = np_box_list.BoxList(detected_boxes) - detected_boxlist.add_field('scores', detected_scores) - detected_boxlist = np_box_list_ops.non_max_suppression( - detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold) - - scores = detected_boxlist.get_field('scores') - - if groundtruth_boxes.size == 0: - return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool) - - tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool) - is_matched_to_difficult_box = np.zeros( - detected_boxlist.num_boxes(), dtype=bool) - is_matched_to_group_of_box = np.zeros( - detected_boxlist.num_boxes(), dtype=bool) - - # The evaluation is done in two stages: - # 1. All detections are matched to non group-of boxes; true positives are - # determined and detections matched to difficult boxes are ignored. - # 2. Detections that are determined as false positives are matched against - # group-of boxes and ignored if matched. - - # Tp-fp evaluation for non-group of boxes (if any). - gt_non_group_of_boxlist = np_box_list.BoxList( - groundtruth_boxes[~groundtruth_is_group_of_list, :]) - if gt_non_group_of_boxlist.num_boxes() > 0: - groundtruth_nongroup_of_is_difficult_list = groundtruth_is_difficult_list[ - ~groundtruth_is_group_of_list] - iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist) - max_overlap_gt_ids = np.argmax(iou, axis=1) - is_gt_box_detected = np.zeros( - gt_non_group_of_boxlist.num_boxes(), dtype=bool) - for i in range(detected_boxlist.num_boxes()): - gt_id = max_overlap_gt_ids[i] - if iou[i, gt_id] >= self.matching_iou_threshold: - if not groundtruth_nongroup_of_is_difficult_list[gt_id]: - if not is_gt_box_detected[gt_id]: - tp_fp_labels[i] = True - is_gt_box_detected[gt_id] = True - else: - is_matched_to_difficult_box[i] = True - - # Tp-fp evaluation for group of boxes. - gt_group_of_boxlist = np_box_list.BoxList( - groundtruth_boxes[groundtruth_is_group_of_list, :]) - if gt_group_of_boxlist.num_boxes() > 0: - ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist) - max_overlap_group_of_gt = np.max(ioa, axis=0) - for i in range(detected_boxlist.num_boxes()): - if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and - max_overlap_group_of_gt[i] >= self.matching_iou_threshold): - is_matched_to_group_of_box[i] = True - - return scores[~is_matched_to_difficult_box - & ~is_matched_to_group_of_box], tp_fp_labels[ - ~is_matched_to_difficult_box - & ~is_matched_to_group_of_box] diff --git a/object_detection/utils/per_image_evaluation_test.py b/object_detection/utils/per_image_evaluation_test.py deleted file mode 100644 index ffd089bf..00000000 --- a/object_detection/utils/per_image_evaluation_test.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.per_image_evaluation.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import per_image_evaluation - - -class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase): - - def setUp(self): - num_groundtruth_classes = 1 - matching_iou_threshold = 0.5 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - self.eval = per_image_evaluation.PerImageEvaluation( - num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold, - nms_max_output_boxes) - - self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float) - self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]], - dtype=float) - - def test_match_to_not_difficult_box(self): - groundtruth_groundtruth_is_difficult_list = np.array([False, True], - dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array( - [False, False], dtype=bool) - scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, self.groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - expected_tp_fp_labels = np.array([False, True, False], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - def test_match_to_difficult_box(self): - groundtruth_groundtruth_is_difficult_list = np.array([True, False], - dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array( - [False, False], dtype=bool) - scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, self.groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.5], dtype=float) - expected_tp_fp_labels = np.array([False, False], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - -class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase): - - def setUp(self): - num_groundtruth_classes = 1 - matching_iou_threshold = 0.5 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - self.eval = per_image_evaluation.PerImageEvaluation( - num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold, - nms_max_output_boxes) - - self.detected_boxes = np.array( - [[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float) - self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - self.groundtruth_boxes = np.array( - [[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float) - - def test_match_to_non_group_of_and_group_of_box(self): - groundtruth_groundtruth_is_difficult_list = np.array( - [False, False, False], dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array( - [False, True, True], dtype=bool) - expected_scores = np.array([0.8], dtype=float) - expected_tp_fp_labels = np.array([True], dtype=bool) - scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, self.groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - def test_match_two_to_group_of_box(self): - groundtruth_groundtruth_is_difficult_list = np.array( - [False, False, False], dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array( - [True, False, True], dtype=bool) - expected_scores = np.array([0.5], dtype=float) - expected_tp_fp_labels = np.array([False], dtype=bool) - scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, self.groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - -class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): - - def setUp(self): - num_groundtruth_classes = 1 - matching_iou_threshold1 = 0.5 - matching_iou_threshold2 = 0.1 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - self.eval1 = per_image_evaluation.PerImageEvaluation( - num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold, - nms_max_output_boxes) - - self.eval2 = per_image_evaluation.PerImageEvaluation( - num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold, - nms_max_output_boxes) - - self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float) - - def test_no_true_positives(self): - groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float) - groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool) - scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - expected_tp_fp_labels = np.array([False, False, False], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - def test_one_true_positives_with_large_iou_threshold(self): - groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool) - scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - expected_tp_fp_labels = np.array([False, True, False], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - def test_one_true_positives_with_very_small_iou_threshold(self): - groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float) - groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool) - scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - expected_tp_fp_labels = np.array([True, False, False], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - def test_two_true_positives_with_large_iou_threshold(self): - groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float) - groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool) - groundtruth_groundtruth_is_group_of_list = np.array( - [False, False], dtype=bool) - scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( - self.detected_boxes, self.detected_scores, groundtruth_boxes, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) - expected_tp_fp_labels = np.array([False, True, True], dtype=bool) - self.assertTrue(np.allclose(expected_scores, scores)) - self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) - - -class MultiClassesTpFpTest(tf.test.TestCase): - - def test_tp_fp(self): - num_groundtruth_classes = 3 - matching_iou_threshold = 0.5 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes, - matching_iou_threshold, - nms_iou_threshold, - nms_max_output_boxes) - detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2], - [5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]], - dtype=float) - detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float) - detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int) - groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float) - groundtruth_class_labels = np.array([0, 2], dtype=int) - groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float) - groundtruth_groundtruth_is_group_of_list = np.array( - [False, False], dtype=bool) - scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels, - groundtruth_groundtruth_is_difficult_list, - groundtruth_groundtruth_is_group_of_list) - expected_scores = [np.array([0.8], dtype=float)] * 3 - expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True - ])] - for i in range(len(expected_scores)): - self.assertTrue(np.allclose(expected_scores[i], scores[i])) - self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i])) - - -class CorLocTest(tf.test.TestCase): - - def test_compute_corloc_with_normal_iou_threshold(self): - num_groundtruth_classes = 3 - matching_iou_threshold = 0.5 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes, - matching_iou_threshold, - nms_iou_threshold, - nms_max_output_boxes) - detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3], - [0, 0, 5, 5]], dtype=float) - detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float) - detected_class_labels = np.array([0, 1, 0, 2], dtype=int) - groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]], - dtype=float) - groundtruth_class_labels = np.array([0, 0, 2], dtype=int) - - is_class_correctly_detected_in_image = eval1._compute_cor_loc( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels) - expected_result = np.array([1, 0, 1], dtype=int) - self.assertTrue(np.array_equal(expected_result, - is_class_correctly_detected_in_image)) - - def test_compute_corloc_with_very_large_iou_threshold(self): - num_groundtruth_classes = 3 - matching_iou_threshold = 0.9 - nms_iou_threshold = 1.0 - nms_max_output_boxes = 10000 - eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes, - matching_iou_threshold, - nms_iou_threshold, - nms_max_output_boxes) - detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3], - [0, 0, 5, 5]], dtype=float) - detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float) - detected_class_labels = np.array([0, 1, 0, 2], dtype=int) - groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]], - dtype=float) - groundtruth_class_labels = np.array([0, 0, 2], dtype=int) - - is_class_correctly_detected_in_image = eval1._compute_cor_loc( - detected_boxes, detected_scores, detected_class_labels, - groundtruth_boxes, groundtruth_class_labels) - expected_result = np.array([1, 0, 0], dtype=int) - self.assertTrue(np.array_equal(expected_result, - is_class_correctly_detected_in_image)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/shape_utils.py b/object_detection/utils/shape_utils.py deleted file mode 100644 index 880d367e..00000000 --- a/object_detection/utils/shape_utils.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utils used to manipulate tensor shapes.""" - -import tensorflow as tf - - -def _is_tensor(t): - """Returns a boolean indicating whether the input is a tensor. - - Args: - t: the input to be tested. - - Returns: - a boolean that indicates whether t is a tensor. - """ - return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable)) - - -def _set_dim_0(t, d0): - """Sets the 0-th dimension of the input tensor. - - Args: - t: the input tensor, assuming the rank is at least 1. - d0: an integer indicating the 0-th dimension of the input tensor. - - Returns: - the tensor t with the 0-th dimension set. - """ - t_shape = t.get_shape().as_list() - t_shape[0] = d0 - t.set_shape(t_shape) - return t - - -def pad_tensor(t, length): - """Pads the input tensor with 0s along the first dimension up to the length. - - Args: - t: the input tensor, assuming the rank is at least 1. - length: a tensor of shape [1] or an integer, indicating the first dimension - of the input tensor t after padding, assuming length <= t.shape[0]. - - Returns: - padded_t: the padded tensor, whose first dimension is length. If the length - is an integer, the first dimension of padded_t is set to length - statically. - """ - t_rank = tf.rank(t) - t_shape = tf.shape(t) - t_d0 = t_shape[0] - pad_d0 = tf.expand_dims(length - t_d0, 0) - pad_shape = tf.cond( - tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0), - lambda: tf.expand_dims(length - t_d0, 0)) - padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0) - if not _is_tensor(length): - padded_t = _set_dim_0(padded_t, length) - return padded_t - - -def clip_tensor(t, length): - """Clips the input tensor along the first dimension up to the length. - - Args: - t: the input tensor, assuming the rank is at least 1. - length: a tensor of shape [1] or an integer, indicating the first dimension - of the input tensor t after clipping, assuming length <= t.shape[0]. - - Returns: - clipped_t: the clipped tensor, whose first dimension is length. If the - length is an integer, the first dimension of clipped_t is set to length - statically. - """ - clipped_t = tf.gather(t, tf.range(length)) - if not _is_tensor(length): - clipped_t = _set_dim_0(clipped_t, length) - return clipped_t - - -def pad_or_clip_tensor(t, length): - """Pad or clip the input tensor along the first dimension. - - Args: - t: the input tensor, assuming the rank is at least 1. - length: a tensor of shape [1] or an integer, indicating the first dimension - of the input tensor t after processing. - - Returns: - processed_t: the processed tensor, whose first dimension is length. If the - length is an integer, the first dimension of the processed tensor is set - to length statically. - """ - processed_t = tf.cond( - tf.greater(tf.shape(t)[0], length), - lambda: clip_tensor(t, length), - lambda: pad_tensor(t, length)) - if not _is_tensor(length): - processed_t = _set_dim_0(processed_t, length) - return processed_t - - -def combined_static_and_dynamic_shape(tensor): - """Returns a list containing static and dynamic values for the dimensions. - - Returns a list of static and dynamic values for shape dimensions. This is - useful to preserve static shapes when available in reshape operation. - - Args: - tensor: A tensor of any type. - - Returns: - A list of size tensor.shape.ndims containing integers or a scalar tensor. - """ - static_shape = tensor.shape.as_list() - dynamic_shape = tf.shape(tensor) - combined_shape = [] - for index, dim in enumerate(static_shape): - if dim is not None: - combined_shape.append(dim) - else: - combined_shape.append(dynamic_shape[index]) - return combined_shape diff --git a/object_detection/utils/shape_utils_test.py b/object_detection/utils/shape_utils_test.py deleted file mode 100644 index abeacac8..00000000 --- a/object_detection/utils/shape_utils_test.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.shape_utils.""" - -import tensorflow as tf - -from object_detection.utils import shape_utils - - -class UtilTest(tf.test.TestCase): - - def test_pad_tensor_using_integer_input(self): - t1 = tf.constant([1], dtype=tf.int32) - pad_t1 = shape_utils.pad_tensor(t1, 2) - t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32) - pad_t2 = shape_utils.pad_tensor(t2, 2) - - self.assertEqual(2, pad_t1.get_shape()[0]) - self.assertEqual(2, pad_t2.get_shape()[0]) - - with self.test_session() as sess: - pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2]) - self.assertAllEqual([1, 0], pad_t1_result) - self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result) - - def test_pad_tensor_using_tensor_input(self): - t1 = tf.constant([1], dtype=tf.int32) - pad_t1 = shape_utils.pad_tensor(t1, tf.constant(2)) - t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32) - pad_t2 = shape_utils.pad_tensor(t2, tf.constant(2)) - - with self.test_session() as sess: - pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2]) - self.assertAllEqual([1, 0], pad_t1_result) - self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result) - - def test_clip_tensor_using_integer_input(self): - t1 = tf.constant([1, 2, 3], dtype=tf.int32) - clip_t1 = shape_utils.clip_tensor(t1, 2) - t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32) - clip_t2 = shape_utils.clip_tensor(t2, 2) - - self.assertEqual(2, clip_t1.get_shape()[0]) - self.assertEqual(2, clip_t2.get_shape()[0]) - - with self.test_session() as sess: - clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2]) - self.assertAllEqual([1, 2], clip_t1_result) - self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result) - - def test_clip_tensor_using_tensor_input(self): - t1 = tf.constant([1, 2, 3], dtype=tf.int32) - clip_t1 = shape_utils.clip_tensor(t1, tf.constant(2)) - t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32) - clip_t2 = shape_utils.clip_tensor(t2, tf.constant(2)) - - with self.test_session() as sess: - clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2]) - self.assertAllEqual([1, 2], clip_t1_result) - self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result) - - def test_pad_or_clip_tensor_using_integer_input(self): - t1 = tf.constant([1], dtype=tf.int32) - tt1 = shape_utils.pad_or_clip_tensor(t1, 2) - t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32) - tt2 = shape_utils.pad_or_clip_tensor(t2, 2) - - t3 = tf.constant([1, 2, 3], dtype=tf.int32) - tt3 = shape_utils.clip_tensor(t3, 2) - t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32) - tt4 = shape_utils.clip_tensor(t4, 2) - - self.assertEqual(2, tt1.get_shape()[0]) - self.assertEqual(2, tt2.get_shape()[0]) - self.assertEqual(2, tt3.get_shape()[0]) - self.assertEqual(2, tt4.get_shape()[0]) - - with self.test_session() as sess: - tt1_result, tt2_result, tt3_result, tt4_result = sess.run( - [tt1, tt2, tt3, tt4]) - self.assertAllEqual([1, 0], tt1_result) - self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result) - self.assertAllEqual([1, 2], tt3_result) - self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result) - - def test_pad_or_clip_tensor_using_tensor_input(self): - t1 = tf.constant([1], dtype=tf.int32) - tt1 = shape_utils.pad_or_clip_tensor(t1, tf.constant(2)) - t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32) - tt2 = shape_utils.pad_or_clip_tensor(t2, tf.constant(2)) - - t3 = tf.constant([1, 2, 3], dtype=tf.int32) - tt3 = shape_utils.clip_tensor(t3, tf.constant(2)) - t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32) - tt4 = shape_utils.clip_tensor(t4, tf.constant(2)) - - with self.test_session() as sess: - tt1_result, tt2_result, tt3_result, tt4_result = sess.run( - [tt1, tt2, tt3, tt4]) - self.assertAllEqual([1, 0], tt1_result) - self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result) - self.assertAllEqual([1, 2], tt3_result) - self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result) - - def test_combines_static_dynamic_shape(self): - tensor = tf.placeholder(tf.float32, shape=(None, 2, 3)) - combined_shape = shape_utils.combined_static_and_dynamic_shape( - tensor) - self.assertTrue(tf.contrib.framework.is_tensor(combined_shape[0])) - self.assertListEqual(combined_shape[1:], [2, 3]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/static_shape.py b/object_detection/utils/static_shape.py deleted file mode 100644 index 8e4e522f..00000000 --- a/object_detection/utils/static_shape.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper functions to access TensorShape values. - -The rank 4 tensor_shape must be of the form [batch_size, height, width, depth]. -""" - - -def get_batch_size(tensor_shape): - """Returns batch size from the tensor shape. - - Args: - tensor_shape: A rank 4 TensorShape. - - Returns: - An integer representing the batch size of the tensor. - """ - tensor_shape.assert_has_rank(rank=4) - return tensor_shape[0].value - - -def get_height(tensor_shape): - """Returns height from the tensor shape. - - Args: - tensor_shape: A rank 4 TensorShape. - - Returns: - An integer representing the height of the tensor. - """ - tensor_shape.assert_has_rank(rank=4) - return tensor_shape[1].value - - -def get_width(tensor_shape): - """Returns width from the tensor shape. - - Args: - tensor_shape: A rank 4 TensorShape. - - Returns: - An integer representing the width of the tensor. - """ - tensor_shape.assert_has_rank(rank=4) - return tensor_shape[2].value - - -def get_depth(tensor_shape): - """Returns depth from the tensor shape. - - Args: - tensor_shape: A rank 4 TensorShape. - - Returns: - An integer representing the depth of the tensor. - """ - tensor_shape.assert_has_rank(rank=4) - return tensor_shape[3].value diff --git a/object_detection/utils/static_shape_test.py b/object_detection/utils/static_shape_test.py deleted file mode 100644 index 99307e93..00000000 --- a/object_detection/utils/static_shape_test.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.static_shape.""" - -import tensorflow as tf - -from object_detection.utils import static_shape - - -class StaticShapeTest(tf.test.TestCase): - - def test_return_correct_batchSize(self): - tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) - self.assertEqual(32, static_shape.get_batch_size(tensor_shape)) - - def test_return_correct_height(self): - tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) - self.assertEqual(299, static_shape.get_height(tensor_shape)) - - def test_return_correct_width(self): - tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) - self.assertEqual(384, static_shape.get_width(tensor_shape)) - - def test_return_correct_depth(self): - tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3]) - self.assertEqual(3, static_shape.get_depth(tensor_shape)) - - def test_die_on_tensor_shape_with_rank_three(self): - tensor_shape = tf.TensorShape(dims=[32, 299, 384]) - with self.assertRaises(ValueError): - static_shape.get_batch_size(tensor_shape) - static_shape.get_height(tensor_shape) - static_shape.get_width(tensor_shape) - static_shape.get_depth(tensor_shape) - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/test_utils.py b/object_detection/utils/test_utils.py deleted file mode 100644 index e6277ea5..00000000 --- a/object_detection/utils/test_utils.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains functions which are convenient for unit testing.""" -import numpy as np -import tensorflow as tf - -from object_detection.core import anchor_generator -from object_detection.core import box_coder -from object_detection.core import box_list -from object_detection.core import box_predictor -from object_detection.core import matcher -from object_detection.utils import shape_utils - - -class MockBoxCoder(box_coder.BoxCoder): - """Simple `difference` BoxCoder.""" - - @property - def code_size(self): - return 4 - - def _encode(self, boxes, anchors): - return boxes.get() - anchors.get() - - def _decode(self, rel_codes, anchors): - return box_list.BoxList(rel_codes + anchors.get()) - - -class MockBoxPredictor(box_predictor.BoxPredictor): - """Simple box predictor that ignores inputs and outputs all zeros.""" - - def __init__(self, is_training, num_classes): - super(MockBoxPredictor, self).__init__(is_training, num_classes) - - def _predict(self, image_features, num_predictions_per_location): - combined_feature_shape = shape_utils.combined_static_and_dynamic_shape( - image_features) - batch_size = combined_feature_shape[0] - num_anchors = (combined_feature_shape[1] * combined_feature_shape[2]) - code_size = 4 - zero = tf.reduce_sum(0 * image_features) - box_encodings = zero + tf.zeros( - (batch_size, num_anchors, 1, code_size), dtype=tf.float32) - class_predictions_with_background = zero + tf.zeros( - (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32) - return {box_predictor.BOX_ENCODINGS: box_encodings, - box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND: - class_predictions_with_background} - - -class MockAnchorGenerator(anchor_generator.AnchorGenerator): - """Mock anchor generator.""" - - def name_scope(self): - return 'MockAnchorGenerator' - - def num_anchors_per_location(self): - return [1] - - def _generate(self, feature_map_shape_list): - num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list]) - return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32)) - - -class MockMatcher(matcher.Matcher): - """Simple matcher that matches first anchor to first groundtruth box.""" - - def _match(self, similarity_matrix): - return tf.constant([0, -1, -1, -1], dtype=tf.int32) - - -def create_diagonal_gradient_image(height, width, depth): - """Creates pyramid image. Useful for testing. - - For example, pyramid_image(5, 6, 1) looks like: - # [[[ 5. 4. 3. 2. 1. 0.] - # [ 6. 5. 4. 3. 2. 1.] - # [ 7. 6. 5. 4. 3. 2.] - # [ 8. 7. 6. 5. 4. 3.] - # [ 9. 8. 7. 6. 5. 4.]]] - - Args: - height: height of image - width: width of image - depth: depth of image - - Returns: - pyramid image - """ - row = np.arange(height) - col = np.arange(width)[::-1] - image_layer = np.expand_dims(row, 1) + col - image_layer = np.expand_dims(image_layer, 2) - - image = image_layer - for i in range(1, depth): - image = np.concatenate((image, image_layer * pow(10, i)), 2) - - return image.astype(np.float32) - - -def create_random_boxes(num_boxes, max_height, max_width): - """Creates random bounding boxes of specific maximum height and width. - - Args: - num_boxes: number of boxes. - max_height: maximum height of boxes. - max_width: maximum width of boxes. - - Returns: - boxes: numpy array of shape [num_boxes, 4]. Each row is in form - [y_min, x_min, y_max, x_max]. - """ - - y_1 = np.random.uniform(size=(1, num_boxes)) * max_height - y_2 = np.random.uniform(size=(1, num_boxes)) * max_height - x_1 = np.random.uniform(size=(1, num_boxes)) * max_width - x_2 = np.random.uniform(size=(1, num_boxes)) * max_width - - boxes = np.zeros(shape=(num_boxes, 4)) - boxes[:, 0] = np.minimum(y_1, y_2) - boxes[:, 1] = np.minimum(x_1, x_2) - boxes[:, 2] = np.maximum(y_1, y_2) - boxes[:, 3] = np.maximum(x_1, x_2) - - return boxes.astype(np.float32) diff --git a/object_detection/utils/test_utils_test.py b/object_detection/utils/test_utils_test.py deleted file mode 100644 index 1a4799c6..00000000 --- a/object_detection/utils/test_utils_test.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.test_utils.""" - -import numpy as np -import tensorflow as tf - -from object_detection.utils import test_utils - - -class TestUtilsTest(tf.test.TestCase): - - def test_diagonal_gradient_image(self): - """Tests if a good pyramid image is created.""" - pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2) - - # Test which is easy to understand. - expected_first_channel = np.array([[3, 2, 1, 0], - [4, 3, 2, 1], - [5, 4, 3, 2]], dtype=np.float32) - self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]), - expected_first_channel) - - # Actual test. - expected_image = np.array([[[3, 30], - [2, 20], - [1, 10], - [0, 0]], - [[4, 40], - [3, 30], - [2, 20], - [1, 10]], - [[5, 50], - [4, 40], - [3, 30], - [2, 20]]], dtype=np.float32) - - self.assertAllEqual(pyramid_image, expected_image) - - def test_random_boxes(self): - """Tests if valid random boxes are created.""" - num_boxes = 1000 - max_height = 3 - max_width = 5 - boxes = test_utils.create_random_boxes(num_boxes, - max_height, - max_width) - - true_column = np.ones(shape=(num_boxes)) == 1 - self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column) - self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column) - - self.assertTrue(boxes[:, 0].min() >= 0) - self.assertTrue(boxes[:, 1].min() >= 0) - self.assertTrue(boxes[:, 2].max() <= max_height) - self.assertTrue(boxes[:, 3].max() <= max_width) - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/variables_helper.py b/object_detection/utils/variables_helper.py deleted file mode 100644 index b27f814f..00000000 --- a/object_detection/utils/variables_helper.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper functions for manipulating collections of variables during training. -""" -import logging -import re - -import tensorflow as tf - -slim = tf.contrib.slim - - -# TODO: Consider replacing with tf.contrib.filter_variables in -# tensorflow/contrib/framework/python/ops/variables.py -def filter_variables(variables, filter_regex_list, invert=False): - """Filters out the variables matching the filter_regex. - - Filter out the variables whose name matches the any of the regular - expressions in filter_regex_list and returns the remaining variables. - Optionally, if invert=True, the complement set is returned. - - Args: - variables: a list of tensorflow variables. - filter_regex_list: a list of string regular expressions. - invert: (boolean). If True, returns the complement of the filter set; that - is, all variables matching filter_regex are kept and all others discarded. - - Returns: - a list of filtered variables. - """ - kept_vars = [] - variables_to_ignore_patterns = filter(None, filter_regex_list) - for var in variables: - add = True - for pattern in variables_to_ignore_patterns: - if re.match(pattern, var.op.name): - add = False - break - if add != invert: - kept_vars.append(var) - return kept_vars - - -def multiply_gradients_matching_regex(grads_and_vars, regex_list, multiplier): - """Multiply gradients whose variable names match a regular expression. - - Args: - grads_and_vars: A list of gradient to variable pairs (tuples). - regex_list: A list of string regular expressions. - multiplier: A (float) multiplier to apply to each gradient matching the - regular expression. - - Returns: - grads_and_vars: A list of gradient to variable pairs (tuples). - """ - variables = [pair[1] for pair in grads_and_vars] - matching_vars = filter_variables(variables, regex_list, invert=True) - for var in matching_vars: - logging.info('Applying multiplier %f to variable [%s]', - multiplier, var.op.name) - grad_multipliers = {var: float(multiplier) for var in matching_vars} - return slim.learning.multiply_gradients(grads_and_vars, - grad_multipliers) - - -def freeze_gradients_matching_regex(grads_and_vars, regex_list): - """Freeze gradients whose variable names match a regular expression. - - Args: - grads_and_vars: A list of gradient to variable pairs (tuples). - regex_list: A list of string regular expressions. - - Returns: - grads_and_vars: A list of gradient to variable pairs (tuples) that do not - contain the variables and gradients matching the regex. - """ - variables = [pair[1] for pair in grads_and_vars] - matching_vars = filter_variables(variables, regex_list, invert=True) - kept_grads_and_vars = [pair for pair in grads_and_vars - if pair[1] not in matching_vars] - for var in matching_vars: - logging.info('Freezing variable [%s]', var.op.name) - return kept_grads_and_vars - - -def get_variables_available_in_checkpoint(variables, checkpoint_path): - """Returns the subset of variables available in the checkpoint. - - Inspects given checkpoint and returns the subset of variables that are - available in it. - - TODO: force input and output to be a dictionary. - - Args: - variables: a list or dictionary of variables to find in checkpoint. - checkpoint_path: path to the checkpoint to restore variables from. - - Returns: - A list or dictionary of variables. - Raises: - ValueError: if `variables` is not a list or dict. - """ - if isinstance(variables, list): - variable_names_map = {variable.op.name: variable for variable in variables} - elif isinstance(variables, dict): - variable_names_map = variables - else: - raise ValueError('`variables` is expected to be a list or dict.') - ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path) - ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys() - vars_in_ckpt = {} - for variable_name, variable in sorted(variable_names_map.items()): - if variable_name in ckpt_vars: - vars_in_ckpt[variable_name] = variable - else: - logging.warning('Variable [%s] not available in checkpoint', - variable_name) - if isinstance(variables, list): - return vars_in_ckpt.values() - return vars_in_ckpt diff --git a/object_detection/utils/variables_helper_test.py b/object_detection/utils/variables_helper_test.py deleted file mode 100644 index c04b1191..00000000 --- a/object_detection/utils/variables_helper_test.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for object_detection.utils.variables_helper.""" -import os - -import tensorflow as tf - -from object_detection.utils import variables_helper - - -class FilterVariablesTest(tf.test.TestCase): - - def _create_variables(self): - return [tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights'), - tf.Variable(1.0, name='FeatureExtractor/InceptionV3/biases'), - tf.Variable(1.0, name='StackProposalGenerator/weights'), - tf.Variable(1.0, name='StackProposalGenerator/biases')] - - def test_return_all_variables_when_empty_regex(self): - variables = self._create_variables() - out_variables = variables_helper.filter_variables(variables, ['']) - self.assertItemsEqual(out_variables, variables) - - def test_return_variables_which_do_not_match_single_regex(self): - variables = self._create_variables() - out_variables = variables_helper.filter_variables(variables, - ['FeatureExtractor/.*']) - self.assertItemsEqual(out_variables, variables[2:]) - - def test_return_variables_which_do_not_match_any_regex_in_list(self): - variables = self._create_variables() - out_variables = variables_helper.filter_variables(variables, [ - 'FeatureExtractor.*biases', 'StackProposalGenerator.*biases' - ]) - self.assertItemsEqual(out_variables, [variables[0], variables[2]]) - - def test_return_variables_matching_empty_regex_list(self): - variables = self._create_variables() - out_variables = variables_helper.filter_variables( - variables, [''], invert=True) - self.assertItemsEqual(out_variables, []) - - def test_return_variables_matching_some_regex_in_list(self): - variables = self._create_variables() - out_variables = variables_helper.filter_variables( - variables, - ['FeatureExtractor.*biases', 'StackProposalGenerator.*biases'], - invert=True) - self.assertItemsEqual(out_variables, [variables[1], variables[3]]) - - -class MultiplyGradientsMatchingRegexTest(tf.test.TestCase): - - def _create_grads_and_vars(self): - return [(tf.constant(1.0), - tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')), - (tf.constant(2.0), - tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')), - (tf.constant(3.0), - tf.Variable(3.0, name='StackProposalGenerator/weights')), - (tf.constant(4.0), - tf.Variable(4.0, name='StackProposalGenerator/biases'))] - - def test_multiply_all_feature_extractor_variables(self): - grads_and_vars = self._create_grads_and_vars() - regex_list = ['FeatureExtractor/.*'] - multiplier = 0.0 - grads_and_vars = variables_helper.multiply_gradients_matching_regex( - grads_and_vars, regex_list, multiplier) - exp_output = [(0.0, 1.0), (0.0, 2.0), (3.0, 3.0), (4.0, 4.0)] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - output = sess.run(grads_and_vars) - self.assertItemsEqual(output, exp_output) - - def test_multiply_all_bias_variables(self): - grads_and_vars = self._create_grads_and_vars() - regex_list = ['.*/biases'] - multiplier = 0.0 - grads_and_vars = variables_helper.multiply_gradients_matching_regex( - grads_and_vars, regex_list, multiplier) - exp_output = [(1.0, 1.0), (0.0, 2.0), (3.0, 3.0), (0.0, 4.0)] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - output = sess.run(grads_and_vars) - self.assertItemsEqual(output, exp_output) - - -class FreezeGradientsMatchingRegexTest(tf.test.TestCase): - - def _create_grads_and_vars(self): - return [(tf.constant(1.0), - tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')), - (tf.constant(2.0), - tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')), - (tf.constant(3.0), - tf.Variable(3.0, name='StackProposalGenerator/weights')), - (tf.constant(4.0), - tf.Variable(4.0, name='StackProposalGenerator/biases'))] - - def test_freeze_all_feature_extractor_variables(self): - grads_and_vars = self._create_grads_and_vars() - regex_list = ['FeatureExtractor/.*'] - grads_and_vars = variables_helper.freeze_gradients_matching_regex( - grads_and_vars, regex_list) - exp_output = [(3.0, 3.0), (4.0, 4.0)] - init_op = tf.global_variables_initializer() - with self.test_session() as sess: - sess.run(init_op) - output = sess.run(grads_and_vars) - self.assertItemsEqual(output, exp_output) - - -class GetVariablesAvailableInCheckpointTest(tf.test.TestCase): - - def test_return_all_variables_from_checkpoint(self): - variables = [ - tf.Variable(1.0, name='weights'), - tf.Variable(1.0, name='biases') - ] - checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb') - init_op = tf.global_variables_initializer() - saver = tf.train.Saver(variables) - with self.test_session() as sess: - sess.run(init_op) - saver.save(sess, checkpoint_path) - out_variables = variables_helper.get_variables_available_in_checkpoint( - variables, checkpoint_path) - self.assertItemsEqual(out_variables, variables) - - def test_return_variables_available_in_checkpoint(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb') - graph1_variables = [ - tf.Variable(1.0, name='weights'), - ] - init_op = tf.global_variables_initializer() - saver = tf.train.Saver(graph1_variables) - with self.test_session() as sess: - sess.run(init_op) - saver.save(sess, checkpoint_path) - - graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')] - out_variables = variables_helper.get_variables_available_in_checkpoint( - graph2_variables, checkpoint_path) - self.assertItemsEqual(out_variables, graph1_variables) - - def test_return_variables_available_an_checkpoint_with_dict_inputs(self): - checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb') - graph1_variables = [ - tf.Variable(1.0, name='ckpt_weights'), - ] - init_op = tf.global_variables_initializer() - saver = tf.train.Saver(graph1_variables) - with self.test_session() as sess: - sess.run(init_op) - saver.save(sess, checkpoint_path) - - graph2_variables_dict = { - 'ckpt_weights': tf.Variable(1.0, name='weights'), - 'ckpt_biases': tf.Variable(1.0, name='biases') - } - out_variables = variables_helper.get_variables_available_in_checkpoint( - graph2_variables_dict, checkpoint_path) - self.assertTrue(isinstance(out_variables, dict)) - self.assertItemsEqual(out_variables.keys(), ['ckpt_weights']) - self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights') - - -if __name__ == '__main__': - tf.test.main() diff --git a/object_detection/utils/visualization_utils.py b/object_detection/utils/visualization_utils.py deleted file mode 100644 index 1bce2ca4..00000000 --- a/object_detection/utils/visualization_utils.py +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A set of functions that are used for visualization. - -These functions often receive an image, perform some visualization on the image. -The functions do not return a value, instead they modify the image itself. - -""" -import collections -import functools -import matplotlib.pyplot as plt -import numpy as np -import PIL.Image as Image -import PIL.ImageColor as ImageColor -import PIL.ImageDraw as ImageDraw -import PIL.ImageFont as ImageFont -import six -import tensorflow as tf - - -_TITLE_LEFT_MARGIN = 10 -_TITLE_TOP_MARGIN = 10 -STANDARD_COLORS = [ - 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', - 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', - 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', - 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', - 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', - 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', - 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', - 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', - 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', - 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', - 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', - 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', - 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', - 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', - 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', - 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', - 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', - 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', - 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', - 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', - 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', - 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', - 'WhiteSmoke', 'Yellow', 'YellowGreen' -] - - -def save_image_array_as_png(image, output_path): - """Saves an image (represented as a numpy array) to PNG. - - Args: - image: a numpy array with shape [height, width, 3]. - output_path: path to which image should be written. - """ - image_pil = Image.fromarray(np.uint8(image)).convert('RGB') - with tf.gfile.Open(output_path, 'w') as fid: - image_pil.save(fid, 'PNG') - - -def encode_image_array_as_png_str(image): - """Encodes a numpy array into a PNG string. - - Args: - image: a numpy array with shape [height, width, 3]. - - Returns: - PNG encoded image string. - """ - image_pil = Image.fromarray(np.uint8(image)) - output = six.BytesIO() - image_pil.save(output, format='PNG') - png_string = output.getvalue() - output.close() - return png_string - - -def draw_bounding_box_on_image_array(image, - ymin, - xmin, - ymax, - xmax, - color='red', - thickness=4, - display_str_list=(), - use_normalized_coordinates=True): - """Adds a bounding box to an image (numpy array). - - Args: - image: a numpy array with shape [height, width, 3]. - ymin: ymin of bounding box in normalized coordinates (same below). - xmin: xmin of bounding box. - ymax: ymax of bounding box. - xmax: xmax of bounding box. - color: color to draw bounding box. Default is red. - thickness: line thickness. Default value is 4. - display_str_list: list of strings to display in box - (each to be shown on its own line). - use_normalized_coordinates: If True (default), treat coordinates - ymin, xmin, ymax, xmax as relative to the image. Otherwise treat - coordinates as absolute. - """ - image_pil = Image.fromarray(np.uint8(image)).convert('RGB') - draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, - thickness, display_str_list, - use_normalized_coordinates) - np.copyto(image, np.array(image_pil)) - - -def draw_bounding_box_on_image(image, - ymin, - xmin, - ymax, - xmax, - color='red', - thickness=4, - display_str_list=(), - use_normalized_coordinates=True): - """Adds a bounding box to an image. - - Each string in display_str_list is displayed on a separate line above the - bounding box in black text on a rectangle filled with the input 'color'. - If the top of the bounding box extends to the edge of the image, the strings - are displayed below the bounding box. - - Args: - image: a PIL.Image object. - ymin: ymin of bounding box. - xmin: xmin of bounding box. - ymax: ymax of bounding box. - xmax: xmax of bounding box. - color: color to draw bounding box. Default is red. - thickness: line thickness. Default value is 4. - display_str_list: list of strings to display in box - (each to be shown on its own line). - use_normalized_coordinates: If True (default), treat coordinates - ymin, xmin, ymax, xmax as relative to the image. Otherwise treat - coordinates as absolute. - """ - draw = ImageDraw.Draw(image) - im_width, im_height = image.size - if use_normalized_coordinates: - (left, right, top, bottom) = (xmin * im_width, xmax * im_width, - ymin * im_height, ymax * im_height) - else: - (left, right, top, bottom) = (xmin, xmax, ymin, ymax) - draw.line([(left, top), (left, bottom), (right, bottom), - (right, top), (left, top)], width=thickness, fill=color) - try: - font = ImageFont.truetype('arial.ttf', 24) - except IOError: - font = ImageFont.load_default() - - # If the total height of the display strings added to the top of the bounding - # box exceeds the top of the image, stack the strings below the bounding box - # instead of above. - display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] - # Each display_str has a top and bottom margin of 0.05x. - total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) - - if top > total_display_str_height: - text_bottom = top - else: - text_bottom = bottom + total_display_str_height - # Reverse list and print from bottom to top. - for display_str in display_str_list[::-1]: - text_width, text_height = font.getsize(display_str) - margin = np.ceil(0.05 * text_height) - draw.rectangle( - [(left, text_bottom - text_height - 2 * margin), (left + text_width, - text_bottom)], - fill=color) - draw.text( - (left + margin, text_bottom - text_height - margin), - display_str, - fill='black', - font=font) - text_bottom -= text_height - 2 * margin - - -def draw_bounding_boxes_on_image_array(image, - boxes, - color='red', - thickness=4, - display_str_list_list=()): - """Draws bounding boxes on image (numpy array). - - Args: - image: a numpy array object. - boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). - The coordinates are in normalized format between [0, 1]. - color: color to draw bounding box. Default is red. - thickness: line thickness. Default value is 4. - display_str_list_list: list of list of strings. - a list of strings for each bounding box. - The reason to pass a list of strings for a - bounding box is that it might contain - multiple labels. - - Raises: - ValueError: if boxes is not a [N, 4] array - """ - image_pil = Image.fromarray(image) - draw_bounding_boxes_on_image(image_pil, boxes, color, thickness, - display_str_list_list) - np.copyto(image, np.array(image_pil)) - - -def draw_bounding_boxes_on_image(image, - boxes, - color='red', - thickness=4, - display_str_list_list=()): - """Draws bounding boxes on image. - - Args: - image: a PIL.Image object. - boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). - The coordinates are in normalized format between [0, 1]. - color: color to draw bounding box. Default is red. - thickness: line thickness. Default value is 4. - display_str_list_list: list of list of strings. - a list of strings for each bounding box. - The reason to pass a list of strings for a - bounding box is that it might contain - multiple labels. - - Raises: - ValueError: if boxes is not a [N, 4] array - """ - boxes_shape = boxes.shape - if not boxes_shape: - return - if len(boxes_shape) != 2 or boxes_shape[1] != 4: - raise ValueError('Input must be of size [N, 4]') - for i in range(boxes_shape[0]): - display_str_list = () - if display_str_list_list: - display_str_list = display_str_list_list[i] - draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], - boxes[i, 3], color, thickness, display_str_list) - - -def draw_bounding_boxes_on_image_tensors(images, - boxes, - classes, - scores, - category_index, - max_boxes_to_draw=20, - min_score_thresh=0.2): - """Draws bounding boxes on batch of image tensors. - - Args: - images: A 4D uint8 image tensor of shape [N, H, W, C]. - boxes: [N, max_detections, 4] float32 tensor of detection boxes. - classes: [N, max_detections] int tensor of detection classes. Note that - classes are 1-indexed. - scores: [N, max_detections] float32 tensor of detection scores. - category_index: a dict that maps integer ids to category dicts. e.g. - {1: {1: 'dog'}, 2: {2: 'cat'}, ...} - max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20. - min_score_thresh: Minimum score threshold for visualization. Default 0.2. - - Returns: - 4D image tensor of type uint8, with boxes drawn on top. - """ - visualize_boxes_fn = functools.partial( - visualize_boxes_and_labels_on_image_array, - category_index=category_index, - instance_masks=None, - keypoints=None, - use_normalized_coordinates=True, - max_boxes_to_draw=max_boxes_to_draw, - min_score_thresh=min_score_thresh, - agnostic_mode=False, - line_thickness=4) - - def draw_boxes(image_boxes_classes_scores): - """Draws boxes on image.""" - (image, boxes, classes, scores) = image_boxes_classes_scores - image_with_boxes = tf.py_func(visualize_boxes_fn, - [image, boxes, classes, scores], tf.uint8) - return image_with_boxes - - images = tf.map_fn( - draw_boxes, (images, boxes, classes, scores), - dtype=tf.uint8, - back_prop=False) - return images - - -def draw_keypoints_on_image_array(image, - keypoints, - color='red', - radius=2, - use_normalized_coordinates=True): - """Draws keypoints on an image (numpy array). - - Args: - image: a numpy array with shape [height, width, 3]. - keypoints: a numpy array with shape [num_keypoints, 2]. - color: color to draw the keypoints with. Default is red. - radius: keypoint radius. Default value is 2. - use_normalized_coordinates: if True (default), treat keypoint values as - relative to the image. Otherwise treat them as absolute. - """ - image_pil = Image.fromarray(np.uint8(image)).convert('RGB') - draw_keypoints_on_image(image_pil, keypoints, color, radius, - use_normalized_coordinates) - np.copyto(image, np.array(image_pil)) - - -def draw_keypoints_on_image(image, - keypoints, - color='red', - radius=2, - use_normalized_coordinates=True): - """Draws keypoints on an image. - - Args: - image: a PIL.Image object. - keypoints: a numpy array with shape [num_keypoints, 2]. - color: color to draw the keypoints with. Default is red. - radius: keypoint radius. Default value is 2. - use_normalized_coordinates: if True (default), treat keypoint values as - relative to the image. Otherwise treat them as absolute. - """ - draw = ImageDraw.Draw(image) - im_width, im_height = image.size - keypoints_x = [k[1] for k in keypoints] - keypoints_y = [k[0] for k in keypoints] - if use_normalized_coordinates: - keypoints_x = tuple([im_width * x for x in keypoints_x]) - keypoints_y = tuple([im_height * y for y in keypoints_y]) - for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): - draw.ellipse([(keypoint_x - radius, keypoint_y - radius), - (keypoint_x + radius, keypoint_y + radius)], - outline=color, fill=color) - - -def draw_mask_on_image_array(image, mask, color='red', alpha=0.7): - """Draws mask on an image. - - Args: - image: uint8 numpy array with shape (img_height, img_height, 3) - mask: a uint8 numpy array of shape (img_height, img_height) with - values between either 0 or 1. - color: color to draw the keypoints with. Default is red. - alpha: transparency value between 0 and 1. (default: 0.7) - - Raises: - ValueError: On incorrect data type for image or masks. - """ - if image.dtype != np.uint8: - raise ValueError('`image` not of type np.uint8') - if mask.dtype != np.uint8: - raise ValueError('`mask` not of type np.uint8') - if np.any(np.logical_and(mask != 1, mask != 0)): - raise ValueError('`mask` elements should be in [0, 1]') - rgb = ImageColor.getrgb(color) - pil_image = Image.fromarray(image) - - solid_color = np.expand_dims( - np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) - pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') - pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') - pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) - np.copyto(image, np.array(pil_image.convert('RGB'))) - - -def visualize_boxes_and_labels_on_image_array(image, - boxes, - classes, - scores, - category_index, - instance_masks=None, - keypoints=None, - use_normalized_coordinates=False, - max_boxes_to_draw=20, - min_score_thresh=.5, - agnostic_mode=False, - line_thickness=4): - """Overlay labeled boxes on an image with formatted scores and label names. - - This function groups boxes that correspond to the same location - and creates a display string for each detection and overlays these - on the image. Note that this function modifies the image in place, and returns - that same image. - - Args: - image: uint8 numpy array with shape (img_height, img_width, 3) - boxes: a numpy array of shape [N, 4] - classes: a numpy array of shape [N]. Note that class indices are 1-based, - and match the keys in the label map. - scores: a numpy array of shape [N] or None. If scores=None, then - this function assumes that the boxes to be plotted are groundtruth - boxes and plot all boxes as black with no classes or scores. - category_index: a dict containing category dictionaries (each holding - category index `id` and category name `name`) keyed by category indices. - instance_masks: a numpy array of shape [N, image_height, image_width], can - be None - keypoints: a numpy array of shape [N, num_keypoints, 2], can - be None - use_normalized_coordinates: whether boxes is to be interpreted as - normalized coordinates or not. - max_boxes_to_draw: maximum number of boxes to visualize. If None, draw - all boxes. - min_score_thresh: minimum score threshold for a box to be visualized - agnostic_mode: boolean (default: False) controlling whether to evaluate in - class-agnostic mode or not. This mode will display scores but ignore - classes. - line_thickness: integer (default: 4) controlling line width of the boxes. - - Returns: - uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. - """ - # Create a display string (and color) for every box location, group any boxes - # that correspond to the same location. - box_to_display_str_map = collections.defaultdict(list) - box_to_color_map = collections.defaultdict(str) - box_to_instance_masks_map = {} - box_to_keypoints_map = collections.defaultdict(list) - if not max_boxes_to_draw: - max_boxes_to_draw = boxes.shape[0] - for i in range(min(max_boxes_to_draw, boxes.shape[0])): - if scores is None or scores[i] > min_score_thresh: - box = tuple(boxes[i].tolist()) - if instance_masks is not None: - box_to_instance_masks_map[box] = instance_masks[i] - if keypoints is not None: - box_to_keypoints_map[box].extend(keypoints[i]) - if scores is None: - box_to_color_map[box] = 'black' - else: - if not agnostic_mode: - if classes[i] in category_index.keys(): - class_name = category_index[classes[i]]['name'] - else: - class_name = 'N/A' - display_str = '{}: {}%'.format( - class_name, - int(100*scores[i])) - else: - display_str = 'score: {}%'.format(int(100 * scores[i])) - box_to_display_str_map[box].append(display_str) - if agnostic_mode: - box_to_color_map[box] = 'DarkOrange' - else: - box_to_color_map[box] = STANDARD_COLORS[ - classes[i] % len(STANDARD_COLORS)] - - # Draw all boxes onto image. - for box, color in box_to_color_map.items(): - ymin, xmin, ymax, xmax = box - if instance_masks is not None: - draw_mask_on_image_array( - image, - box_to_instance_masks_map[box], - color=color - ) - draw_bounding_box_on_image_array( - image, - ymin, - xmin, - ymax, - xmax, - color=color, - thickness=line_thickness, - display_str_list=box_to_display_str_map[box], - use_normalized_coordinates=use_normalized_coordinates) - if keypoints is not None: - draw_keypoints_on_image_array( - image, - box_to_keypoints_map[box], - color=color, - radius=line_thickness / 2, - use_normalized_coordinates=use_normalized_coordinates) - - return image - - -def add_cdf_image_summary(values, name): - """Adds a tf.summary.image for a CDF plot of the values. - - Normalizes `values` such that they sum to 1, plots the cumulative distribution - function and creates a tf image summary. - - Args: - values: a 1-D float32 tensor containing the values. - name: name for the image summary. - """ - def cdf_plot(values): - """Numpy function to plot CDF.""" - normalized_values = values / np.sum(values) - sorted_values = np.sort(normalized_values) - cumulative_values = np.cumsum(sorted_values) - fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32) - / cumulative_values.size) - fig = plt.figure(frameon=False) - ax = fig.add_subplot('111') - ax.plot(fraction_of_examples, cumulative_values) - ax.set_ylabel('cumulative normalized values') - ax.set_xlabel('fraction of examples') - fig.canvas.draw() - width, height = fig.get_size_inches() * fig.get_dpi() - image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape( - 1, height, width, 3) - return image - cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8) - tf.summary.image(name, cdf_plot) diff --git a/object_detection/utils/visualization_utils_test.py b/object_detection/utils/visualization_utils_test.py deleted file mode 100644 index dffe1cd6..00000000 --- a/object_detection/utils/visualization_utils_test.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for image.understanding.object_detection.core.visualization_utils. - -Testing with visualization in the following colab: -https://drive.google.com/a/google.com/file/d/0B5HnKS_hMsNARERpU3MtU3I5RFE/view?usp=sharing - -""" - -import os - -import numpy as np -import PIL.Image as Image -import tensorflow as tf - -from object_detection.utils import visualization_utils - -_TESTDATA_PATH = 'object_detection/test_images' - - -class VisualizationUtilsTest(tf.test.TestCase): - - def create_colorful_test_image(self): - """This function creates an image that can be used to test vis functions. - - It makes an image composed of four colored rectangles. - - Returns: - colorful test numpy array image. - """ - ch255 = np.full([100, 200, 1], 255, dtype=np.uint8) - ch128 = np.full([100, 200, 1], 128, dtype=np.uint8) - ch0 = np.full([100, 200, 1], 0, dtype=np.uint8) - imr = np.concatenate((ch255, ch128, ch128), axis=2) - img = np.concatenate((ch255, ch255, ch0), axis=2) - imb = np.concatenate((ch255, ch0, ch255), axis=2) - imw = np.concatenate((ch128, ch128, ch128), axis=2) - imu = np.concatenate((imr, img), axis=1) - imd = np.concatenate((imb, imw), axis=1) - image = np.concatenate((imu, imd), axis=0) - return image - - def test_draw_bounding_box_on_image(self): - test_image = self.create_colorful_test_image() - test_image = Image.fromarray(test_image) - width_original, height_original = test_image.size - ymin = 0.25 - ymax = 0.75 - xmin = 0.4 - xmax = 0.6 - - visualization_utils.draw_bounding_box_on_image(test_image, ymin, xmin, ymax, - xmax) - width_final, height_final = test_image.size - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_bounding_box_on_image_array(self): - test_image = self.create_colorful_test_image() - width_original = test_image.shape[0] - height_original = test_image.shape[1] - ymin = 0.25 - ymax = 0.75 - xmin = 0.4 - xmax = 0.6 - - visualization_utils.draw_bounding_box_on_image_array( - test_image, ymin, xmin, ymax, xmax) - width_final = test_image.shape[0] - height_final = test_image.shape[1] - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_bounding_boxes_on_image(self): - test_image = self.create_colorful_test_image() - test_image = Image.fromarray(test_image) - width_original, height_original = test_image.size - boxes = np.array([[0.25, 0.75, 0.4, 0.6], - [0.1, 0.1, 0.9, 0.9]]) - - visualization_utils.draw_bounding_boxes_on_image(test_image, boxes) - width_final, height_final = test_image.size - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_bounding_boxes_on_image_array(self): - test_image = self.create_colorful_test_image() - width_original = test_image.shape[0] - height_original = test_image.shape[1] - boxes = np.array([[0.25, 0.75, 0.4, 0.6], - [0.1, 0.1, 0.9, 0.9]]) - - visualization_utils.draw_bounding_boxes_on_image_array(test_image, boxes) - width_final = test_image.shape[0] - height_final = test_image.shape[1] - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_bounding_boxes_on_image_tensors(self): - """Tests that bounding box utility produces reasonable results.""" - category_index = {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}} - - fname = os.path.join(_TESTDATA_PATH, 'image1.jpg') - image_np = np.array(Image.open(fname)) - images_np = np.stack((image_np, image_np), axis=0) - - with tf.Graph().as_default(): - images_tensor = tf.constant(value=images_np, dtype=tf.uint8) - boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]], - [[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]]) - classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64) - scores = tf.constant([[0.8, 0.1], [0.6, 0.5]]) - images_with_boxes = ( - visualization_utils.draw_bounding_boxes_on_image_tensors( - images_tensor, - boxes, - classes, - scores, - category_index, - min_score_thresh=0.2)) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - - # Write output images for visualization. - images_with_boxes_np = sess.run(images_with_boxes) - self.assertEqual(images_np.shape, images_with_boxes_np.shape) - for i in range(images_with_boxes_np.shape[0]): - img_name = 'image_' + str(i) + '.png' - output_file = os.path.join(self.get_temp_dir(), img_name) - print 'Writing output image %d to %s' % (i, output_file) - image_pil = Image.fromarray(images_with_boxes_np[i, ...]) - image_pil.save(output_file) - - def test_draw_keypoints_on_image(self): - test_image = self.create_colorful_test_image() - test_image = Image.fromarray(test_image) - width_original, height_original = test_image.size - keypoints = [[0.25, 0.75], [0.4, 0.6], [0.1, 0.1], [0.9, 0.9]] - - visualization_utils.draw_keypoints_on_image(test_image, keypoints) - width_final, height_final = test_image.size - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_keypoints_on_image_array(self): - test_image = self.create_colorful_test_image() - width_original = test_image.shape[0] - height_original = test_image.shape[1] - keypoints = [[0.25, 0.75], [0.4, 0.6], [0.1, 0.1], [0.9, 0.9]] - - visualization_utils.draw_keypoints_on_image_array(test_image, keypoints) - width_final = test_image.shape[0] - height_final = test_image.shape[1] - - self.assertEqual(width_original, width_final) - self.assertEqual(height_original, height_final) - - def test_draw_mask_on_image_array(self): - test_image = np.asarray([[[0, 0, 0], [0, 0, 0]], - [[0, 0, 0], [0, 0, 0]]], dtype=np.uint8) - mask = np.asarray([[0, 1], - [1, 1]], dtype=np.uint8) - expected_result = np.asarray([[[0, 0, 0], [0, 0, 127]], - [[0, 0, 127], [0, 0, 127]]], dtype=np.uint8) - visualization_utils.draw_mask_on_image_array(test_image, mask, - color='Blue', alpha=.5) - self.assertAllEqual(test_image, expected_result) - - def test_add_cdf_image_summary(self): - values = [0.1, 0.2, 0.3, 0.4, 0.42, 0.44, 0.46, 0.48, 0.50] - visualization_utils.add_cdf_image_summary(values, 'PositiveAnchorLoss') - cdf_image_summary = tf.get_collection(key=tf.GraphKeys.SUMMARIES)[0] - with self.test_session(): - cdf_image_summary.eval() - - -if __name__ == '__main__': - tf.test.main() diff --git a/test_extract_towncentre.py b/test_extract_towncentre.py new file mode 100644 index 00000000..0bb7c233 --- /dev/null +++ b/test_extract_towncentre.py @@ -0,0 +1,52 @@ +import unittest +from extract_towncentre import validate_video_path, process_video_cmd_args +import tempfile +from unittest.mock import Mock + +class TestExtractTownCentre(unittest.TestCase): + def test_validate_video_path_invalid(self): + file = "/tmp/xyz" + result = validate_video_path("/tmp/xyz") + self.assertIsNotNone(result, "validation should not produce a None result") + status, msg = result + self.assertFalse(status, "validation should fail for an non-existent path") + self.assertEqual(msg, f"{file} does not exist") + + def test_validate_video_path_valid(self): + file = tempfile.NamedTemporaryFile(suffix=".vid") + result = validate_video_path(file.name) + self.assertIsNotNone(result, "validation should not produce a None result") + status, msg = result + self.assertTrue(status, f"validation should succeed for valid path {file.name}") + self.assertEqual(msg, f"Processing {file.name}...") + file.close() + + def test_process_video_cmd_args_no_cmd_args(self): + extract_mock = Mock() + # when no args are passed, only the script name is present + argv = ['script/path'] + process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,) + extract_mock.video2im.assert_called_once() + extract_mock.validate_video_path.assert_not_called() + + def test_process_video_cmd_args_with_invalid_cmd_args(self): + extract_mock = Mock() + mock_path = 'video-file' + extract_mock.validate_video_path = Mock(return_value=(False, f"Bad {mock_path}")) + argv = ['script/path', mock_path] + process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,) + extract_mock.video2im.assert_not_called() + extract_mock.validate_video_path.assert_called_once_with(mock_path) + + def test_process_video_cmd_args_with_valid_cmd_args(self): + extract_mock = Mock() + mock_path = 'video-file' + extract_mock.validate_video_path = Mock(return_value=(True, f"Good {mock_path}")) + argv = ['script/path', mock_path] + process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,) + extract_mock.video2im.assert_called_once_with(src=mock_path) + extract_mock.validate_video_path.assert_called_once_with(mock_path) + + +if __name__ == '__main__': + unittest.main()