diff --git a/.gitignore b/.gitignore
index 69435bb8..c0f4fe14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,8 @@
*.pyc
.ipynb_checkpoints
+images
+test_images
+*.avi
+train.record
+val.record
+object_detection
diff --git a/create_tf_record.py b/create_tf_record.py
index 9774a306..e0969e19 100644
--- a/create_tf_record.py
+++ b/create_tf_record.py
@@ -148,6 +148,7 @@ def create_tf_record(output_filename,
writer.close()
def main(_):
+ logging.getLogger().setLevel(logging.INFO)
label_map_dict = label_map_util.get_label_map_dict('annotations/label_map.pbtxt')
logging.info('Reading from Pet dataset.')
diff --git a/extract_towncentre.py b/extract_towncentre.py
index 5d3f64a4..52397ce5 100644
--- a/extract_towncentre.py
+++ b/extract_towncentre.py
@@ -1,23 +1,30 @@
import os
import cv2
import numpy as np
+import logging as log
#Dataset from http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009bbenfold_headpose/project.html#datasets
-def video2im(src, train_path='images', test_path='test_images', factor=2):
+def video2im(src='TownCentreXVID.avi', train_path='images', test_path='test_images', factor=2):
"""
Extracts all frames from a video and saves them as jpgs
"""
- os.mkdir(train_path)
- os.mkdir(test_path)
+ try:
+ os.mkdir(train_path)
+ os.mkdir(test_path)
+ except FileExistsError as fee:
+ log.error(f"Error creating output directories - {fee.strerror}: {fee.filename}")
+ logging.getLogger().setLevel(logging.INFO)
+ log.info("delete or rename offending directory")
+ return
frame = 0
cap = cv2.VideoCapture(src)
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
+
print('Total Frame Count:', length )
-
+
while True:
check, img = cap.read()
if check:
@@ -25,17 +32,46 @@ def video2im(src, train_path='images', test_path='test_images', factor=2):
path = train_path
else:
path = test_path
-
+
img = cv2.resize(img, (1920 // factor, 1080 // factor))
cv2.imwrite(os.path.join(path, str(frame) + ".jpg"), img)
frame += 1
print('Processed: ',frame, end = '\r')
-
+
else:
break
-
+
cap.release()
+def validate_video_path(path):
+ """
+ returns a tuple. first element of the tuple indicates whether the validation succeeded
+ second element is an optional logging message
+ """
+ if os.path.exists(path):
+ return (True, f"Processing {path}...")
+ else:
+ return (False, f"{path} does not exist")
+
+# validator=validate_video_path, processor=video2i
+def process_video_cmd_args(argv, validator=validate_video_path, processor=video2im):
+ """
+ calls video2im() with validated path or none (default path to be used) when there are no args passed
+ """
+ try:
+ path = argv[1]
+ status, msg = validator(path)
+ if status:
+ processor(src=path)
+ log.info(msg)
+ else:
+ log.error(msg)
+ except IndexError:
+ log.warning('Video file path was not passed to script arguements, trying default location, name')
+ processor()
+
+
if __name__ == '__main__':
- video2im('TownCentreXVID.avi')
+ import sys
+ process_video_cmd_args(sys.argv, validate_video_path, video2im)
diff --git a/object_detection/BUILD b/object_detection/BUILD
deleted file mode 100644
index df835b74..00000000
--- a/object_detection/BUILD
+++ /dev/null
@@ -1,136 +0,0 @@
-# Tensorflow Object Detection API: main runnables.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-py_binary(
- name = "train",
- srcs = [
- "train.py",
- ],
- deps = [
- ":trainer",
- "//tensorflow",
- "//tensorflow_models/object_detection/builders:input_reader_builder",
- "//tensorflow_models/object_detection/builders:model_builder",
- "//tensorflow_models/object_detection/utils:config_util",
- ],
-)
-
-py_library(
- name = "trainer",
- srcs = ["trainer.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/builders:optimizer_builder",
- "//tensorflow_models/object_detection/builders:preprocessor_builder",
- "//tensorflow_models/object_detection/core:batcher",
- "//tensorflow_models/object_detection/core:preprocessor",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/object_detection/utils:variables_helper",
- "//tensorflow_models/slim:model_deploy",
- ],
-)
-
-py_test(
- name = "trainer_test",
- srcs = ["trainer_test.py"],
- deps = [
- ":trainer",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/core:model",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/protos:train_py_pb2",
- ],
-)
-
-py_library(
- name = "eval_util",
- srcs = [
- "eval_util.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:box_list_ops",
- "//tensorflow_models/object_detection/core:keypoint_ops",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:label_map_util",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/object_detection/utils:visualization_utils",
- ],
-)
-
-py_library(
- name = "evaluator",
- srcs = ["evaluator.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection:eval_util",
- "//tensorflow_models/object_detection/core:prefetcher",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/protos:eval_py_pb2",
- "//tensorflow_models/object_detection/utils:object_detection_evaluation",
- ],
-)
-
-py_binary(
- name = "eval",
- srcs = [
- "eval.py",
- ],
- deps = [
- ":evaluator",
- "//tensorflow",
- "//tensorflow_models/object_detection/builders:input_reader_builder",
- "//tensorflow_models/object_detection/builders:model_builder",
- "//tensorflow_models/object_detection/utils:config_util",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
-
-py_library(
- name = "exporter",
- srcs = [
- "exporter.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow/python/tools:freeze_graph_lib",
- "//tensorflow_models/object_detection/builders:model_builder",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
- ],
-)
-
-py_test(
- name = "exporter_test",
- srcs = [
- "exporter_test.py",
- ],
- deps = [
- ":exporter",
- "//tensorflow",
- "//tensorflow_models/object_detection/builders:model_builder",
- "//tensorflow_models/object_detection/core:model",
- "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
- ],
-)
-
-py_binary(
- name = "export_inference_graph",
- srcs = [
- "export_inference_graph.py",
- ],
- deps = [
- ":exporter",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
- ],
-)
diff --git a/object_detection/CONTRIBUTING.md b/object_detection/CONTRIBUTING.md
deleted file mode 100644
index e3d87e3c..00000000
--- a/object_detection/CONTRIBUTING.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Contributing to the Tensorflow Object Detection API
-
-Patches to Tensorflow Object Detection API are welcome!
-
-We require contributors to fill out either the individual or corporate
-Contributor License Agreement (CLA).
-
- * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
- * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
-
-Please follow the
-[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
-when submitting pull requests.
diff --git a/object_detection/README.md b/object_detection/README.md
deleted file mode 100644
index a0d8ddf2..00000000
--- a/object_detection/README.md
+++ /dev/null
@@ -1,155 +0,0 @@
-
-# Tensorflow Object Detection API
-Creating accurate machine learning models capable of localizing and identifying
-multiple objects in a single image remains a core challenge in computer vision.
-The TensorFlow Object Detection API is an open source framework built on top of
-TensorFlow that makes it easy to construct, train and deploy object detection
-models. At Google we’ve certainly found this codebase to be useful for our
-computer vision needs, and we hope that you will as well.
-
-
-
-Contributions to the codebase are welcome and we would love to hear back from
-you if you find this API useful. Finally if you use the Tensorflow Object
-Detection API for a research publication, please consider citing:
-
-```
-"Speed/accuracy trade-offs for modern convolutional object detectors."
-Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
-Song Y, Guadarrama S, Murphy K, CVPR 2017
-```
-\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
-https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
-
-
-
-
-
-## Maintainers
-
-* Jonathan Huang, github: [jch1](https://github.com/jch1)
-* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
-* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
-* Chen Sun, github: [jesu9](https://github.com/jesu9)
-* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
-
-
-## Table of contents
-
-Quick Start:
-
- *
- Quick Start: Jupyter notebook for off-the-shelf inference
- * Quick Start: Training a pet detector
-
-Setup:
-
- * Installation
- *
- Configuring an object detection pipeline
- * Preparing inputs
-
-Running:
-
- * Running locally
- * Running on the cloud
-
-Extras:
-
- * Tensorflow detection model zoo
- *
- Exporting a trained model for inference
- *
- Defining your own model architecture
- *
- Bringing in your own dataset
- *
- Supported object detection evaluation protocols
- *
- Inference and evaluation on the Open Images dataset
-
-## Getting Help
-
-To get help with issues you may encounter using the Tensorflow Object Detection
-API, create a new question on [StackOverflow](https://stackoverflow.com/) with
-the tags "tensorflow" and "object-detection".
-
-Please report bugs (actually broken code, not usage questions) to the
-tensorflow/models Github
-[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
-issue name with "object_detection".
-
-
-
-## Release information
-
-### November 17, 2017
-
-As a part of the Open Images V3 release we have released:
-
-* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images).
-* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)).
-* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
-
-See more information on the [Open Images website](https://github.com/openimages/dataset)!
-
-Thanks to contributors: Stefan Popov, Alina Kuznetsova
-
-### November 6, 2017
-
-We have re-released faster versions of our (pre-trained) models in the
-model zoo. In addition to what
-was available before, we are also adding Faster R-CNN models trained on COCO
-with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN
-with Resnet-101 model trained on the KITTI dataset.
-
-Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow,
-Tal Remez, Chen Sun.
-
-### October 31, 2017
-
-We have released a new state-of-the-art model for object detection using
-the Faster-RCNN with the
-[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This
-model achieves mAP of 43.1% on the test-dev validation dataset for COCO,
-improving on the best available model in the zoo by 6% in terms
-of absolute mAP.
-
-Thanks to contributors: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le
-
-### August 11, 2017
-
-We have released an update to the [Android Detect
-demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
-which will now run models trained using the Tensorflow Object
-Detection API on an Android device. By default, it currently runs a
-frozen SSD w/Mobilenet detector trained on COCO, but we encourage
-you to try out other detection models!
-
-Thanks to contributors: Jonathan Huang, Andrew Harp
-
-
-### June 15, 2017
-
-In addition to our base Tensorflow detection model definitions, this
-release includes:
-
-* A selection of trainable detection models, including:
- * Single Shot Multibox Detector (SSD) with MobileNet,
- * SSD with Inception V2,
- * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
- * Faster RCNN with Resnet 101,
- * Faster RCNN with Inception Resnet v2
-* Frozen weights (trained on the COCO dataset) for each of the above models to
- be used for out-of-the-box inference purposes.
-* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
- out-of-the-box inference with one of our released models
-* Convenient [local training](g3doc/running_locally.md) scripts as well as
- distributed training and evaluation pipelines via
- [Google Cloud](g3doc/running_on_cloud.md).
-
-
-Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow,
-Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
-Viacheslav Kovalevskyi, Kevin Murphy
-
diff --git a/object_detection/__init__.py b/object_detection/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/__pycache__/__init__.cpython-35.pyc b/object_detection/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index a9a89667..00000000
Binary files a/object_detection/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/__pycache__/exporter.cpython-35.pyc b/object_detection/__pycache__/exporter.cpython-35.pyc
deleted file mode 100644
index b0148785..00000000
Binary files a/object_detection/__pycache__/exporter.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/anchor_generators/BUILD b/object_detection/anchor_generators/BUILD
deleted file mode 100644
index cb421a0c..00000000
--- a/object_detection/anchor_generators/BUILD
+++ /dev/null
@@ -1,56 +0,0 @@
-# Tensorflow Object Detection API: Anchor Generator implementations.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-py_library(
- name = "grid_anchor_generator",
- srcs = [
- "grid_anchor_generator.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:anchor_generator",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/utils:ops",
- ],
-)
-
-py_test(
- name = "grid_anchor_generator_test",
- srcs = [
- "grid_anchor_generator_test.py",
- ],
- deps = [
- ":grid_anchor_generator",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "multiple_grid_anchor_generator",
- srcs = [
- "multiple_grid_anchor_generator.py",
- ],
- deps = [
- ":grid_anchor_generator",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:anchor_generator",
- "//tensorflow_models/object_detection/core:box_list_ops",
- ],
-)
-
-py_test(
- name = "multiple_grid_anchor_generator_test",
- srcs = [
- "multiple_grid_anchor_generator_test.py",
- ],
- deps = [
- ":multiple_grid_anchor_generator",
- "//third_party/py/numpy",
- ],
-)
diff --git a/object_detection/anchor_generators/__init__.py b/object_detection/anchor_generators/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 23341928..00000000
Binary files a/object_detection/anchor_generators/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc
deleted file mode 100644
index 1d43e0c5..00000000
Binary files a/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc
deleted file mode 100644
index e3e39400..00000000
Binary files a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/anchor_generators/grid_anchor_generator.py b/object_detection/anchor_generators/grid_anchor_generator.py
deleted file mode 100644
index d2ea2c07..00000000
--- a/object_detection/anchor_generators/grid_anchor_generator.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Generates grid anchors on the fly as used in Faster RCNN.
-
-Generates grid anchors on the fly as described in:
-"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
-Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import anchor_generator
-from object_detection.core import box_list
-from object_detection.utils import ops
-
-
-class GridAnchorGenerator(anchor_generator.AnchorGenerator):
- """Generates a grid of anchors at given scales and aspect ratios."""
-
- def __init__(self,
- scales=(0.5, 1.0, 2.0),
- aspect_ratios=(0.5, 1.0, 2.0),
- base_anchor_size=None,
- anchor_stride=None,
- anchor_offset=None):
- """Constructs a GridAnchorGenerator.
-
- Args:
- scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
- aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
- base_anchor_size: base anchor size as height, width (
- (length-2 float32 list, default=[256, 256])
- anchor_stride: difference in centers between base anchors for adjacent
- grid positions (length-2 float32 list, default=[16, 16])
- anchor_offset: center of the anchor with scale and aspect ratio 1 for the
- upper left element of the grid, this should be zero for
- feature networks with only VALID padding and even receptive
- field size, but may need additional calculation if other
- padding is used (length-2 float32 tensor, default=[0, 0])
- """
- # Handle argument defaults
- if base_anchor_size is None:
- base_anchor_size = [256, 256]
- base_anchor_size = tf.constant(base_anchor_size, tf.float32)
- if anchor_stride is None:
- anchor_stride = [16, 16]
- anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
- if anchor_offset is None:
- anchor_offset = [0, 0]
- anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
-
- self._scales = scales
- self._aspect_ratios = aspect_ratios
- self._base_anchor_size = base_anchor_size
- self._anchor_stride = anchor_stride
- self._anchor_offset = anchor_offset
-
- def name_scope(self):
- return 'GridAnchorGenerator'
-
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the `generate` function.
- """
- return [len(self._scales) * len(self._aspect_ratios)]
-
- def _generate(self, feature_map_shape_list):
- """Generates a collection of bounding boxes to be used as anchors.
-
- Args:
- feature_map_shape_list: list of pairs of convnet layer resolutions in the
- format [(height_0, width_0)]. For example, setting
- feature_map_shape_list=[(8, 8)] asks for anchors that correspond
- to an 8x8 layer. For this anchor generator, only lists of length 1 are
- allowed.
-
- Returns:
- boxes: a BoxList holding a collection of N anchor boxes
- Raises:
- ValueError: if feature_map_shape_list, box_specs_list do not have the same
- length.
- ValueError: if feature_map_shape_list does not consist of pairs of
- integers
- """
- if not (isinstance(feature_map_shape_list, list)
- and len(feature_map_shape_list) == 1):
- raise ValueError('feature_map_shape_list must be a list of length 1.')
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in feature_map_shape_list]):
- raise ValueError('feature_map_shape_list must be a list of pairs.')
- grid_height, grid_width = feature_map_shape_list[0]
- scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
- self._aspect_ratios)
- scales_grid = tf.reshape(scales_grid, [-1])
- aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
- return tile_anchors(grid_height,
- grid_width,
- scales_grid,
- aspect_ratios_grid,
- self._base_anchor_size,
- self._anchor_stride,
- self._anchor_offset)
-
-
-def tile_anchors(grid_height,
- grid_width,
- scales,
- aspect_ratios,
- base_anchor_size,
- anchor_stride,
- anchor_offset):
- """Create a tiled set of anchors strided along a grid in image space.
-
- This op creates a set of anchor boxes by placing a "basis" collection of
- boxes with user-specified scales and aspect ratios centered at evenly
- distributed points along a grid. The basis collection is specified via the
- scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
- and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
- .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
- and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
- placing it over its respective center.
-
- Grid points are specified via grid_height, grid_width parameters as well as
- the anchor_stride and anchor_offset parameters.
-
- Args:
- grid_height: size of the grid in the y direction (int or int scalar tensor)
- grid_width: size of the grid in the x direction (int or int scalar tensor)
- scales: a 1-d (float) tensor representing the scale of each box in the
- basis set.
- aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
- box in the basis set. The length of the scales and aspect_ratios tensors
- must be equal.
- base_anchor_size: base anchor size as [height, width]
- (float tensor of shape [2])
- anchor_stride: difference in centers between base anchors for adjacent grid
- positions (float tensor of shape [2])
- anchor_offset: center of the anchor with scale and aspect ratio 1 for the
- upper left element of the grid, this should be zero for
- feature networks with only VALID padding and even receptive
- field size, but may need some additional calculation if other
- padding is used (float tensor of shape [2])
- Returns:
- a BoxList holding a collection of N anchor boxes
- """
- ratio_sqrts = tf.sqrt(aspect_ratios)
- heights = scales / ratio_sqrts * base_anchor_size[0]
- widths = scales * ratio_sqrts * base_anchor_size[1]
-
- # Get a grid of box centers
- y_centers = tf.to_float(tf.range(grid_height))
- y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
- x_centers = tf.to_float(tf.range(grid_width))
- x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
- x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
-
- widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
- heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
- bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
- bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
- bbox_centers = tf.reshape(bbox_centers, [-1, 2])
- bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
- bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
- return box_list.BoxList(bbox_corners)
-
-
-def _center_size_bbox_to_corners_bbox(centers, sizes):
- """Converts bbox center-size representation to corners representation.
-
- Args:
- centers: a tensor with shape [N, 2] representing bounding box centers
- sizes: a tensor with shape [N, 2] representing bounding boxes
-
- Returns:
- corners: tensor with shape [N, 4] representing bounding boxes in corners
- representation
- """
- return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
diff --git a/object_detection/anchor_generators/grid_anchor_generator_test.py b/object_detection/anchor_generators/grid_anchor_generator_test.py
deleted file mode 100644
index 80a82a39..00000000
--- a/object_detection/anchor_generators/grid_anchor_generator_test.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.grid_anchor_generator."""
-
-import tensorflow as tf
-
-from object_detection.anchor_generators import grid_anchor_generator
-
-
-class GridAnchorGeneratorTest(tf.test.TestCase):
-
- def test_construct_single_anchor(self):
- """Builds a 1x1 anchor grid to test the size of the output boxes."""
- scales = [0.5, 1.0, 2.0]
- aspect_ratios = [0.25, 1.0, 4.0]
- anchor_offset = [7, -3]
- exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
- [-505, -131, 519, 125], [-57, -67, 71, 61],
- [-121, -131, 135, 125], [-249, -259, 263, 253],
- [-25, -131, 39, 125], [-57, -259, 71, 253],
- [-121, -515, 135, 509]]
-
- anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- scales, aspect_ratios,
- anchor_offset=anchor_offset)
- anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid(self):
- base_anchor_size = [10, 10]
- anchor_stride = [19, 19]
- anchor_offset = [0, 0]
- scales = [0.5, 1.0, 2.0]
- aspect_ratios = [1.0]
-
- exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
- [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
- [-5., 14., 5, 24], [-10., 9., 10, 29],
- [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
- [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
- [14., 14., 24, 24], [9., 9., 29, 29]]
-
- anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- scales,
- aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=anchor_stride,
- anchor_offset=anchor_offset)
-
- anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator.py b/object_detection/anchor_generators/multiple_grid_anchor_generator.py
deleted file mode 100644
index b49f12dc..00000000
--- a/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Generates grid anchors on the fly corresponding to multiple CNN layers.
-
-Generates grid anchors on the fly corresponding to multiple CNN layers as
-described in:
-"SSD: Single Shot MultiBox Detector"
-Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
-Cheng-Yang Fu, Alexander C. Berg
-(see Section 2.2: Choosing scales and aspect ratios for default boxes)
-"""
-
-import numpy as np
-
-import tensorflow as tf
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.core import anchor_generator
-from object_detection.core import box_list_ops
-
-
-class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
- """Generate a grid of anchors for multiple CNN layers."""
-
- def __init__(self,
- box_specs_list,
- base_anchor_size=None,
- anchor_strides=None,
- anchor_offsets=None,
- clip_window=None):
- """Constructs a MultipleGridAnchorGenerator.
-
- To construct anchors, at multiple grid resolutions, one must provide a
- list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
- size, a corresponding list of (scale, aspect ratio) box specifications.
-
- For example:
- box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
- [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
-
- To support the fully convolutional setting, we pass grid sizes in at
- generation time, while scale and aspect ratios are fixed at construction
- time.
-
- Args:
- box_specs_list: list of list of (scale, aspect ratio) pairs with the
- outside list having the same number of entries as feature_map_shape_list
- (which is passed in at generation time).
- base_anchor_size: base anchor size as [height, width]
- (length-2 float tensor, default=[1.0, 1.0]).
- The height and width values are normalized to the
- minimum dimension of the input height and width, so that
- when the base anchor height equals the base anchor
- width, the resulting anchor is square even if the input
- image is not square.
- anchor_strides: list of pairs of strides in pixels (in y and x directions
- respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
- means that we want the anchors corresponding to the first layer to be
- strided by 25 pixels and those in the second layer to be strided by 50
- pixels in both y and x directions. If anchor_strides=None, they are set
- to be the reciprocal of the corresponding feature map shapes.
- anchor_offsets: list of pairs of offsets in pixels (in y and x directions
- respectively). The offset specifies where we want the center of the
- (0, 0)-th anchor to lie for each layer. For example, setting
- anchor_offsets=[(10, 10), (20, 20)]) means that we want the
- (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
- and likewise that we want the (0, 0)-th anchor of the second layer to
- lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
- set to be half of the corresponding anchor stride.
- clip_window: a tensor of shape [4] specifying a window to which all
- anchors should be clipped. If clip_window is None, then no clipping
- is performed.
-
- Raises:
- ValueError: if box_specs_list is not a list of list of pairs
- ValueError: if clip_window is not either None or a tensor of shape [4]
- """
- if isinstance(box_specs_list, list) and all(
- [isinstance(list_item, list) for list_item in box_specs_list]):
- self._box_specs = box_specs_list
- else:
- raise ValueError('box_specs_list is expected to be a '
- 'list of lists of pairs')
- if base_anchor_size is None:
- base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
- self._base_anchor_size = base_anchor_size
- self._anchor_strides = anchor_strides
- self._anchor_offsets = anchor_offsets
- if clip_window is not None and clip_window.get_shape().as_list() != [4]:
- raise ValueError('clip_window must either be None or a shape [4] tensor')
- self._clip_window = clip_window
- self._scales = []
- self._aspect_ratios = []
- for box_spec in self._box_specs:
- if not all([isinstance(entry, tuple) and len(entry) == 2
- for entry in box_spec]):
- raise ValueError('box_specs_list is expected to be a '
- 'list of lists of pairs')
- scales, aspect_ratios = zip(*box_spec)
- self._scales.append(scales)
- self._aspect_ratios.append(aspect_ratios)
-
- for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
- ['anchor_strides', 'anchor_offsets']):
- if arg and not (isinstance(arg, list) and
- len(arg) == len(self._box_specs)):
- raise ValueError('%s must be a list with the same length '
- 'as self._box_specs' % arg_name)
- if arg and not all([
- isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in arg
- ]):
- raise ValueError('%s must be a list of pairs.' % arg_name)
-
- def name_scope(self):
- return 'MultipleGridAnchorGenerator'
-
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the Generate function.
- """
- return [len(box_specs) for box_specs in self._box_specs]
-
- def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
- """Generates a collection of bounding boxes to be used as anchors.
-
- The number of anchors generated for a single grid with shape MxM where we
- place k boxes over each grid center is k*M^2 and thus the total number of
- anchors is the sum over all grids. In our box_specs_list example
- (see the constructor docstring), we would place two boxes over each grid
- point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
- thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
- output anchors follows the order of how the grid sizes and box_specs are
- specified (with box_spec index varying the fastest, followed by width
- index, then height index, then grid index).
-
- Args:
- feature_map_shape_list: list of pairs of convnet layer resolutions in the
- format [(height_0, width_0), (height_1, width_1), ...]. For example,
- setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
- correspond to an 8x8 layer followed by a 7x7 layer.
- im_height: the height of the image to generate the grid for. If both
- im_height and im_width are 1, the generated anchors default to
- normalized coordinates, otherwise absolute coordinates are used for the
- grid.
- im_width: the width of the image to generate the grid for. If both
- im_height and im_width are 1, the generated anchors default to
- normalized coordinates, otherwise absolute coordinates are used for the
- grid.
-
- Returns:
- boxes: a BoxList holding a collection of N anchor boxes
- Raises:
- ValueError: if feature_map_shape_list, box_specs_list do not have the same
- length.
- ValueError: if feature_map_shape_list does not consist of pairs of
- integers
- """
- if not (isinstance(feature_map_shape_list, list)
- and len(feature_map_shape_list) == len(self._box_specs)):
- raise ValueError('feature_map_shape_list must be a list with the same '
- 'length as self._box_specs')
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in feature_map_shape_list]):
- raise ValueError('feature_map_shape_list must be a list of pairs.')
-
- im_height = tf.to_float(im_height)
- im_width = tf.to_float(im_width)
-
- if not self._anchor_strides:
- anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
- for pair in feature_map_shape_list]
- else:
- anchor_strides = [(tf.to_float(stride[0]) / im_height,
- tf.to_float(stride[1]) / im_width)
- for stride in self._anchor_strides]
- if not self._anchor_offsets:
- anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
- for stride in anchor_strides]
- else:
- anchor_offsets = [(tf.to_float(offset[0]) / im_height,
- tf.to_float(offset[1]) / im_width)
- for offset in self._anchor_offsets]
-
- for arg, arg_name in zip([anchor_strides, anchor_offsets],
- ['anchor_strides', 'anchor_offsets']):
- if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
- raise ValueError('%s must be a list with the same length '
- 'as self._box_specs' % arg_name)
- if not all([isinstance(list_item, tuple) and len(list_item) == 2
- for list_item in arg]):
- raise ValueError('%s must be a list of pairs.' % arg_name)
-
- anchor_grid_list = []
- min_im_shape = tf.minimum(im_height, im_width)
- scale_height = min_im_shape / im_height
- scale_width = min_im_shape / im_width
- base_anchor_size = [
- scale_height * self._base_anchor_size[0],
- scale_width * self._base_anchor_size[1]
- ]
- for grid_size, scales, aspect_ratios, stride, offset in zip(
- feature_map_shape_list, self._scales, self._aspect_ratios,
- anchor_strides, anchor_offsets):
- anchor_grid_list.append(
- grid_anchor_generator.tile_anchors(
- grid_height=grid_size[0],
- grid_width=grid_size[1],
- scales=scales,
- aspect_ratios=aspect_ratios,
- base_anchor_size=base_anchor_size,
- anchor_stride=stride,
- anchor_offset=offset))
- concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
- num_anchors = concatenated_anchors.num_boxes_static()
- if num_anchors is None:
- num_anchors = concatenated_anchors.num_boxes()
- if self._clip_window is not None:
- concatenated_anchors = box_list_ops.clip_to_window(
- concatenated_anchors, self._clip_window, filter_nonoverlapping=False)
- # TODO(jonathanhuang): make reshape an option for the clip_to_window op
- concatenated_anchors.set(
- tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
-
- stddevs_tensor = 0.01 * tf.ones(
- [num_anchors, 4], dtype=tf.float32, name='stddevs')
- concatenated_anchors.add_field('stddev', stddevs_tensor)
-
- return concatenated_anchors
-
-
-def create_ssd_anchors(num_layers=6,
- min_scale=0.2,
- max_scale=0.95,
- scales=None,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
- interpolated_scale_aspect_ratio=1.0,
- base_anchor_size=None,
- anchor_strides=None,
- anchor_offsets=None,
- reduce_boxes_in_lowest_layer=True):
- """Creates MultipleGridAnchorGenerator for SSD anchors.
-
- This function instantiates a MultipleGridAnchorGenerator that reproduces
- ``default box`` construction proposed by Liu et al in the SSD paper.
- See Section 2.2 for details. Grid sizes are assumed to be passed in
- at generation time from finest resolution to coarsest resolution --- this is
- used to (linearly) interpolate scales of anchor boxes corresponding to the
- intermediate grid sizes.
-
- Anchors that are returned by calling the `generate` method on the returned
- MultipleGridAnchorGenerator object are always in normalized coordinates
- and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
-
- Args:
- num_layers: integer number of grid layers to create anchors for (actual
- grid sizes passed in at generation time)
- min_scale: scale of anchors corresponding to finest resolution (float)
- max_scale: scale of anchors corresponding to coarsest resolution (float)
- scales: As list of anchor scales to use. When not None and not emtpy,
- min_scale and max_scale are not used.
- aspect_ratios: list or tuple of (float) aspect ratios to place on each
- grid point.
- interpolated_scale_aspect_ratio: An additional anchor is added with this
- aspect ratio and a scale interpolated between the scale for a layer
- and the scale for the next layer (1.0 for the last layer).
- This anchor is not included if this value is 0.
- base_anchor_size: base anchor size as [height, width].
- The height and width values are normalized to the minimum dimension of the
- input height and width, so that when the base anchor height equals the
- base anchor width, the resulting anchor is square even if the input image
- is not square.
- anchor_strides: list of pairs of strides in pixels (in y and x directions
- respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
- means that we want the anchors corresponding to the first layer to be
- strided by 25 pixels and those in the second layer to be strided by 50
- pixels in both y and x directions. If anchor_strides=None, they are set to
- be the reciprocal of the corresponding feature map shapes.
- anchor_offsets: list of pairs of offsets in pixels (in y and x directions
- respectively). The offset specifies where we want the center of the
- (0, 0)-th anchor to lie for each layer. For example, setting
- anchor_offsets=[(10, 10), (20, 20)]) means that we want the
- (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
- and likewise that we want the (0, 0)-th anchor of the second layer to lie
- at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
- be half of the corresponding anchor stride.
- reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
- boxes per location is used in the lowest layer.
-
- Returns:
- a MultipleGridAnchorGenerator
- """
- if base_anchor_size is None:
- base_anchor_size = [1.0, 1.0]
- base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
- box_specs_list = []
- if scales is None or not scales:
- scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
- for i in range(num_layers)] + [1.0]
- else:
- # Add 1.0 to the end, which will only be used in scale_next below and used
- # for computing an interpolated scale for the largest scale in the list.
- scales += [1.0]
-
- for layer, scale, scale_next in zip(
- range(num_layers), scales[:-1], scales[1:]):
- layer_box_specs = []
- if layer == 0 and reduce_boxes_in_lowest_layer:
- layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
- else:
- for aspect_ratio in aspect_ratios:
- layer_box_specs.append((scale, aspect_ratio))
- # Add one more anchor, with a scale between the current scale, and the
- # scale for the next layer, with a specified aspect ratio (1.0 by
- # default).
- if interpolated_scale_aspect_ratio > 0.0:
- layer_box_specs.append((np.sqrt(scale*scale_next),
- interpolated_scale_aspect_ratio))
- box_specs_list.append(layer_box_specs)
-
- return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
- anchor_strides, anchor_offsets)
diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py b/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
deleted file mode 100644
index 03ec970b..00000000
--- a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
-
-import numpy as np
-
-import tensorflow as tf
-
-from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
-
-
-class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
-
- def test_construct_single_anchor_grid(self):
- """Builds a 1x1 anchor grid to test the size of the output boxes."""
- exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
- [-505, -131, 519, 125], [-57, -67, 71, 61],
- [-121, -131, 135, 125], [-249, -259, 263, 253],
- [-25, -131, 39, 125], [-57, -259, 71, 253],
- [-121, -515, 135, 509]]
-
- box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
- (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
- (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
- anchor_strides=[(16, 16)],
- anchor_offsets=[(7, -3)])
- anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
- anchor_corners = anchors.get()
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid(self):
- box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
-
- exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
- [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
- [-5., 14., 5, 24], [-10., 9., 10, 29],
- [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
- [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
- [14., 14., 24, 24], [9., 9., 29, 29]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
- anchor_strides=[(19, 19)],
- anchor_offsets=[(0, 0)])
- anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid_non_square(self):
- box_specs_list = [[(1.0, 1.0)]]
-
- exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list, base_anchor_size=tf.constant([1, 1], dtype=tf.float32))
- anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
- 1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_anchor_grid_normalized(self):
- box_specs_list = [[(1.0, 1.0)]]
-
- exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list, base_anchor_size=tf.constant([1, 1], dtype=tf.float32))
- anchors = anchor_generator.generate(
- feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
- 2, dtype=tf.int32))],
- im_height=320,
- im_width=640)
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertAllClose(anchor_corners_out, exp_anchor_corners)
-
- def test_construct_multiple_grids(self):
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- # height and width of box with .5 aspect ratio
- h = np.sqrt(2)
- w = 1.0/np.sqrt(2)
- exp_small_grid_corners = [[-.25, -.25, .75, .75],
- [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
- [-.25, .25, .75, 1.25],
- [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
- [.25, -.25, 1.25, .75],
- [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
- [.25, .25, 1.25, 1.25],
- [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
- # only test first entry of larger set of anchors
- exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
- [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
- [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
-
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertEquals(anchor_corners_out.shape, (56, 4))
- big_grid_corners = anchor_corners_out[0:3, :]
- small_grid_corners = anchor_corners_out[48:, :]
- self.assertAllClose(small_grid_corners, exp_small_grid_corners)
- self.assertAllClose(big_grid_corners, exp_big_grid_corners)
-
- def test_construct_multiple_grids_with_clipping(self):
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- # height and width of box with .5 aspect ratio
- h = np.sqrt(2)
- w = 1.0/np.sqrt(2)
- exp_small_grid_corners = [[0, 0, .75, .75],
- [0, 0, .25+.5*h, .25+.5*w],
- [0, .25, .75, 1],
- [0, .75-.5*w, .25+.5*h, 1],
- [.25, 0, 1, .75],
- [.75-.5*h, 0, 1, .25+.5*w],
- [.25, .25, 1, 1],
- [.75-.5*h, .75-.5*w, 1, 1]]
-
- clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- clip_window=clip_window)
- anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- anchor_corners = anchors.get()
-
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- small_grid_corners = anchor_corners_out[48:, :]
- self.assertAllClose(small_grid_corners, exp_small_grid_corners)
-
- def test_invalid_box_specs(self):
- # not all box specs are pairs
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5, .3)]]
- with self.assertRaises(ValueError):
- ag.MultipleGridAnchorGenerator(box_specs_list)
-
- # box_specs_list is not a list of lists
- box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
- with self.assertRaises(ValueError):
- ag.MultipleGridAnchorGenerator(box_specs_list)
-
- def test_invalid_generate_arguments(self):
- box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
- [(1.0, 1.0), (1.0, 0.5)]]
-
- # incompatible lengths with box_specs_list
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.5, .5)],
- anchor_offsets=[(.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
-
- # not pairs
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25, .1), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
- with self.assertRaises(ValueError):
- anchor_generator = ag.MultipleGridAnchorGenerator(
- box_specs_list,
- base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
- anchor_strides=[(.25, .25), (.5, .5)],
- anchor_offsets=[(.125, .125), (.25, .25)])
- anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
-
-
-class CreateSSDAnchorsTest(tf.test.TestCase):
-
- def test_create_ssd_anchors_returns_correct_shape(self):
- anchor_generator = ag.create_ssd_anchors(
- num_layers=6,
- min_scale=0.2,
- max_scale=0.95,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
- reduce_boxes_in_lowest_layer=True)
-
- feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
- (5, 5), (3, 3), (1, 1)]
- anchors = anchor_generator.generate(
- feature_map_shape_list=feature_map_shape_list)
- anchor_corners = anchors.get()
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertEquals(anchor_corners_out.shape, (7308, 4))
-
- anchor_generator = ag.create_ssd_anchors(
- num_layers=6, min_scale=0.2, max_scale=0.95,
- aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
- reduce_boxes_in_lowest_layer=False)
-
- feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
- (5, 5), (3, 3), (1, 1)]
- anchors = anchor_generator.generate(
- feature_map_shape_list=feature_map_shape_list)
- anchor_corners = anchors.get()
- with self.test_session():
- anchor_corners_out = anchor_corners.eval()
- self.assertEquals(anchor_corners_out.shape, (11640, 4))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/box_coders/BUILD b/object_detection/box_coders/BUILD
deleted file mode 100644
index ecb3cc7a..00000000
--- a/object_detection/box_coders/BUILD
+++ /dev/null
@@ -1,102 +0,0 @@
-# Tensorflow Object Detection API: Box Coder implementations.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-py_library(
- name = "faster_rcnn_box_coder",
- srcs = [
- "faster_rcnn_box_coder.py",
- ],
- deps = [
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_test(
- name = "faster_rcnn_box_coder_test",
- srcs = [
- "faster_rcnn_box_coder_test.py",
- ],
- deps = [
- ":faster_rcnn_box_coder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_library(
- name = "keypoint_box_coder",
- srcs = [
- "keypoint_box_coder.py",
- ],
- deps = [
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_test(
- name = "keypoint_box_coder_test",
- srcs = [
- "keypoint_box_coder_test.py",
- ],
- deps = [
- ":keypoint_box_coder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_library(
- name = "mean_stddev_box_coder",
- srcs = [
- "mean_stddev_box_coder.py",
- ],
- deps = [
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_test(
- name = "mean_stddev_box_coder_test",
- srcs = [
- "mean_stddev_box_coder_test.py",
- ],
- deps = [
- ":mean_stddev_box_coder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_library(
- name = "square_box_coder",
- srcs = [
- "square_box_coder.py",
- ],
- deps = [
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_test(
- name = "square_box_coder_test",
- srcs = [
- "square_box_coder_test.py",
- ],
- deps = [
- ":square_box_coder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
diff --git a/object_detection/box_coders/__init__.py b/object_detection/box_coders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc b/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 975284e4..00000000
Binary files a/object_detection/box_coders/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc
deleted file mode 100644
index 8dcd397f..00000000
Binary files a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc
deleted file mode 100644
index f0c7151d..00000000
Binary files a/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc
deleted file mode 100644
index fbee9205..00000000
Binary files a/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc b/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc
deleted file mode 100644
index 6f3a5280..00000000
Binary files a/object_detection/box_coders/__pycache__/square_box_coder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/box_coders/faster_rcnn_box_coder.py b/object_detection/box_coders/faster_rcnn_box_coder.py
deleted file mode 100644
index af25e21a..00000000
--- a/object_detection/box_coders/faster_rcnn_box_coder.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Faster RCNN box coder.
-
-Faster RCNN box coder follows the coding schema described below:
- ty = (y - ya) / ha
- tx = (x - xa) / wa
- th = log(h / ha)
- tw = log(w / wa)
- where x, y, w, h denote the box's center coordinates, width and height
- respectively. Similarly, xa, ya, wa, ha denote the anchor's center
- coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
- center, width and height respectively.
-
- See http://arxiv.org/abs/1506.01497 for details.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-EPSILON = 1e-8
-
-
-class FasterRcnnBoxCoder(box_coder.BoxCoder):
- """Faster RCNN box coder."""
-
- def __init__(self, scale_factors=None):
- """Constructor for FasterRcnnBoxCoder.
-
- Args:
- scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
- If set to None, does not perform scaling. For Faster RCNN,
- the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
- """
- if scale_factors:
- assert len(scale_factors) == 4
- for scalar in scale_factors:
- assert scalar > 0
- self._scale_factors = scale_factors
-
- @property
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- """Encode a box collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, th, tw].
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- # Avoid NaN in division and log below.
- ha += EPSILON
- wa += EPSILON
- h += EPSILON
- w += EPSILON
-
- tx = (xcenter - xcenter_a) / wa
- ty = (ycenter - ycenter_a) / ha
- tw = tf.log(w / wa)
- th = tf.log(h / ha)
- # Scales location targets as used in paper for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- th *= self._scale_factors[2]
- tw *= self._scale_factors[3]
- return tf.transpose(tf.stack([ty, tx, th, tw]))
-
- def _decode(self, rel_codes, anchors):
- """Decode relative codes to boxes.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
-
- ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- th /= self._scale_factors[2]
- tw /= self._scale_factors[3]
- w = tf.exp(tw) * wa
- h = tf.exp(th) * ha
- ycenter = ty * ha + ycenter_a
- xcenter = tx * wa + xcenter_a
- ymin = ycenter - h / 2.
- xmin = xcenter - w / 2.
- ymax = ycenter + h / 2.
- xmax = xcenter + w / 2.
- return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/object_detection/box_coders/faster_rcnn_box_coder_test.py b/object_detection/box_coders/faster_rcnn_box_coder_test.py
deleted file mode 100644
index b2135f06..00000000
--- a/object_detection/box_coders/faster_rcnn_box_coder_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.core import box_list
-
-
-class FasterRcnnBoxCoderTest(tf.test.TestCase):
-
- def test_get_correct_relative_codes_after_encoding(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
- [-0.083333, -0.222222, -0.693147, -1.098612]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_relative_codes_after_encoding_with_scaling(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4, 5]
- expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
- [-0.166667, -0.666667, -2.772588, -5.493062]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_boxes_after_decoding(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
- [-0.083333, -0.222222, -0.693147, -1.098612]]
- expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_get_correct_boxes_after_decoding_with_scaling(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-1., -1.25, -1.62186, -0.911608],
- [-0.166667, -0.666667, -2.772588, -5.493062]]
- scale_factors = [2, 3, 4, 5]
- expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_very_small_Width_nan_after_encoding(self):
- boxes = [[10.0, 10.0, 10.0000001, 20.0]]
- anchors = [[15.0, 12.0, 30.0, 18.0]]
- expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/box_coders/keypoint_box_coder.py b/object_detection/box_coders/keypoint_box_coder.py
deleted file mode 100644
index 67df3b82..00000000
--- a/object_detection/box_coders/keypoint_box_coder.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Keypoint box coder.
-
-The keypoint box coder follows the coding schema described below (this is
-similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
-to box coordinates):
- ty = (y - ya) / ha
- tx = (x - xa) / wa
- th = log(h / ha)
- tw = log(w / wa)
- tky0 = (ky0 - ya) / ha
- tkx0 = (kx0 - xa) / wa
- tky1 = (ky1 - ya) / ha
- tkx1 = (kx1 - xa) / wa
- ...
- where x, y, w, h denote the box's center coordinates, width and height
- respectively. Similarly, xa, ya, wa, ha denote the anchor's center
- coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
- center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
- keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
- anchor-encoded keypoint coordinates.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-from object_detection.core import standard_fields as fields
-
-EPSILON = 1e-8
-
-
-class KeypointBoxCoder(box_coder.BoxCoder):
- """Keypoint box coder."""
-
- def __init__(self, num_keypoints, scale_factors=None):
- """Constructor for KeypointBoxCoder.
-
- Args:
- num_keypoints: Number of keypoints to encode/decode.
- scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
- In addition to scaling ty and tx, the first 2 scalars are used to scale
- the y and x coordinates of the keypoints as well. If set to None, does
- not perform scaling.
- """
- self._num_keypoints = num_keypoints
-
- if scale_factors:
- assert len(scale_factors) == 4
- for scalar in scale_factors:
- assert scalar > 0
- self._scale_factors = scale_factors
- self._keypoint_scale_factors = None
- if scale_factors is not None:
- self._keypoint_scale_factors = tf.expand_dims(tf.tile(
- [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
- [num_keypoints]), 1)
-
- @property
- def code_size(self):
- return 4 + self._num_keypoints * 2
-
- def _encode(self, boxes, anchors):
- """Encode a box and keypoint collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
- tensors with the shape [N, 4], and keypoints are tensors with the shape
- [N, num_keypoints, 2].
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
- represent the y and x coordinates of the first keypoint, tky1 and tkx1
- represent the y and x coordinates of the second keypoint, and so on.
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- keypoints = boxes.get_field(fields.BoxListFields.keypoints)
- keypoints = tf.transpose(tf.reshape(keypoints,
- [-1, self._num_keypoints * 2]))
- num_boxes = boxes.num_boxes()
-
- # Avoid NaN in division and log below.
- ha += EPSILON
- wa += EPSILON
- h += EPSILON
- w += EPSILON
-
- tx = (xcenter - xcenter_a) / wa
- ty = (ycenter - ycenter_a) / ha
- tw = tf.log(w / wa)
- th = tf.log(h / ha)
-
- tiled_anchor_centers = tf.tile(
- tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
- tiled_anchor_sizes = tf.tile(
- tf.stack([ha, wa]), [self._num_keypoints, 1])
- tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
-
- # Scales location targets as used in paper for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- th *= self._scale_factors[2]
- tw *= self._scale_factors[3]
- tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
-
- tboxes = tf.stack([ty, tx, th, tw])
- return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
-
- def _decode(self, rel_codes, anchors):
- """Decode relative codes to boxes and keypoints.
-
- Args:
- rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
- anchor-encoded boxes and keypoints
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes and keypoints.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
-
- num_codes = tf.shape(rel_codes)[0]
- result = tf.unstack(tf.transpose(rel_codes))
- ty, tx, th, tw = result[:4]
- tkeypoints = result[4:]
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- th /= self._scale_factors[2]
- tw /= self._scale_factors[3]
- tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
-
- w = tf.exp(tw) * wa
- h = tf.exp(th) * ha
- ycenter = ty * ha + ycenter_a
- xcenter = tx * wa + xcenter_a
- ymin = ycenter - h / 2.
- xmin = xcenter - w / 2.
- ymax = ycenter + h / 2.
- xmax = xcenter + w / 2.
- decoded_boxes_keypoints = box_list.BoxList(
- tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
-
- tiled_anchor_centers = tf.tile(
- tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
- tiled_anchor_sizes = tf.tile(
- tf.stack([ha, wa]), [self._num_keypoints, 1])
- keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
- keypoints = tf.reshape(tf.transpose(keypoints),
- [-1, self._num_keypoints, 2])
- decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
- return decoded_boxes_keypoints
diff --git a/object_detection/box_coders/keypoint_box_coder_test.py b/object_detection/box_coders/keypoint_box_coder_test.py
deleted file mode 100644
index 330641e5..00000000
--- a/object_detection/box_coders/keypoint_box_coder_test.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.keypoint_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.core import box_list
-from object_detection.core import standard_fields as fields
-
-
-class KeypointBoxCoderTest(tf.test.TestCase):
-
- def test_get_correct_relative_codes_after_encoding(self):
- boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(keypoints[0])
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- expected_rel_codes = [
- [-0.5, -0.416666, -0.405465, -0.182321,
- -0.5, -0.5, -0.833333, 0.],
- [-0.083333, -0.222222, -0.693147, -1.098612,
- 0.166667, -0.166667, -0.333333, -0.055556]
- ]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_relative_codes_after_encoding_with_scaling(self):
- boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(keypoints[0])
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4, 5]
- expected_rel_codes = [
- [-1., -1.25, -1.62186, -0.911608,
- -1.0, -1.5, -1.666667, 0.],
- [-0.166667, -0.666667, -2.772588, -5.493062,
- 0.333333, -0.5, -0.666667, -0.166667]
- ]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints, scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_get_correct_boxes_after_decoding(self):
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [
- [-0.5, -0.416666, -0.405465, -0.182321,
- -0.5, -0.5, -0.833333, 0.],
- [-0.083333, -0.222222, -0.693147, -1.098612,
- 0.166667, -0.166667, -0.333333, -0.055556]
- ]
- expected_boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- expected_keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(expected_keypoints[0])
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, keypoints_out = sess.run(
- [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
- self.assertAllClose(boxes_out, expected_boxes)
- self.assertAllClose(keypoints_out, expected_keypoints)
-
- def test_get_correct_boxes_after_decoding_with_scaling(self):
- anchors = [[15., 12., 30., 18.],
- [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [
- [-1., -1.25, -1.62186, -0.911608,
- -1.0, -1.5, -1.666667, 0.],
- [-0.166667, -0.666667, -2.772588, -5.493062,
- 0.333333, -0.5, -0.666667, -0.166667]
- ]
- scale_factors = [2, 3, 4, 5]
- expected_boxes = [[10., 10., 20., 15.],
- [0.2, 0.1, 0.5, 0.4]]
- expected_keypoints = [[[15., 12.], [10., 15.]],
- [[0.5, 0.3], [0.2, 0.4]]]
- num_keypoints = len(expected_keypoints[0])
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(
- num_keypoints, scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- boxes_out, keypoints_out = sess.run(
- [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
- self.assertAllClose(boxes_out, expected_boxes)
- self.assertAllClose(keypoints_out, expected_keypoints)
-
- def test_very_small_width_nan_after_encoding(self):
- boxes = [[10., 10., 10.0000001, 20.]]
- keypoints = [[[10., 10.], [10.0000001, 20.]]]
- anchors = [[15., 12., 30., 18.]]
- expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
- -0.833333, -0.833333, -0.833333, 0.833333]]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = keypoint_box_coder.KeypointBoxCoder(2)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- rel_codes_out, = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/box_coders/mean_stddev_box_coder.py b/object_detection/box_coders/mean_stddev_box_coder.py
deleted file mode 100644
index 726b4a61..00000000
--- a/object_detection/box_coders/mean_stddev_box_coder.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Mean stddev box coder.
-
-This box coder use the following coding schema to encode boxes:
-rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
-"""
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-
-class MeanStddevBoxCoder(box_coder.BoxCoder):
- """Mean stddev box coder."""
-
- @property
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- """Encode a box collection with respect to anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of N anchors. We assume that anchors has an associated
- stddev field.
-
- Returns:
- a tensor representing N anchor-encoded boxes
- Raises:
- ValueError: if the anchors BoxList does not have a stddev field
- """
- if not anchors.has_field('stddev'):
- raise ValueError('anchors must have a stddev field')
- box_corners = boxes.get()
- means = anchors.get()
- stddev = anchors.get_field('stddev')
- return (box_corners - means) / stddev
-
- def _decode(self, rel_codes, anchors):
- """Decode.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors. We assume that anchors has an associated
- stddev field.
-
- Returns:
- boxes: BoxList holding N bounding boxes
- Raises:
- ValueError: if the anchors BoxList does not have a stddev field
- """
- if not anchors.has_field('stddev'):
- raise ValueError('anchors must have a stddev field')
- means = anchors.get()
- stddevs = anchors.get_field('stddev')
- box_corners = rel_codes * stddevs + means
- return box_list.BoxList(box_corners)
diff --git a/object_detection/box_coders/mean_stddev_box_coder_test.py b/object_detection/box_coders/mean_stddev_box_coder_test.py
deleted file mode 100644
index 0d3a8952..00000000
--- a/object_detection/box_coders/mean_stddev_box_coder_test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_list
-
-
-class MeanStddevBoxCoderTest(tf.test.TestCase):
-
- def testGetCorrectRelativeCodesAfterEncoding(self):
- box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
- prior_stddevs = tf.constant(2 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- rel_codes = coder.encode(boxes, priors)
- with self.test_session() as sess:
- rel_codes_out = sess.run(rel_codes)
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def testGetCorrectBoxesAfterDecoding(self):
- rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
- expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
- prior_stddevs = tf.constant(2 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- decoded_boxes = coder.decode(rel_codes, priors)
- decoded_box_corners = decoded_boxes.get()
- with self.test_session() as sess:
- decoded_out = sess.run(decoded_box_corners)
- self.assertAllClose(decoded_out, expected_box_corners)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/box_coders/square_box_coder.py b/object_detection/box_coders/square_box_coder.py
deleted file mode 100644
index ee46b689..00000000
--- a/object_detection/box_coders/square_box_coder.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Square box coder.
-
-Square box coder follows the coding schema described below:
-l = sqrt(h * w)
-la = sqrt(ha * wa)
-ty = (y - ya) / la
-tx = (x - xa) / la
-tl = log(l / la)
-where x, y, w, h denote the box's center coordinates, width, and height,
-respectively. Similarly, xa, ya, wa, ha denote the anchor's center
-coordinates, width and height. tx, ty, tl denote the anchor-encoded
-center, and length, respectively. Because the encoded box is a square, only
-one length is encoded.
-
-This has shown to provide performance improvements over the Faster RCNN box
-coder when the objects being detected tend to be square (e.g. faces) and when
-the input images are not distorted via resizing.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-EPSILON = 1e-8
-
-
-class SquareBoxCoder(box_coder.BoxCoder):
- """Encodes a 3-scalar representation of a square box."""
-
- def __init__(self, scale_factors=None):
- """Constructor for SquareBoxCoder.
-
- Args:
- scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
- If set to None, does not perform scaling. For faster RCNN,
- the open-source implementation recommends using [10.0, 10.0, 5.0].
-
- Raises:
- ValueError: If scale_factors is not length 3 or contains values less than
- or equal to 0.
- """
- if scale_factors:
- if len(scale_factors) != 3:
- raise ValueError('The argument scale_factors must be a list of length '
- '3.')
- if any(scalar <= 0 for scalar in scale_factors):
- raise ValueError('The values in scale_factors must all be greater '
- 'than 0.')
- self._scale_factors = scale_factors
-
- @property
- def code_size(self):
- return 3
-
- def _encode(self, boxes, anchors):
- """Encodes a box collection with respect to an anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded.
- anchors: BoxList of anchors.
-
- Returns:
- a tensor representing N anchor-encoded boxes of the format
- [ty, tx, tl].
- """
- # Convert anchors to the center coordinate representation.
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- la = tf.sqrt(ha * wa)
- ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
- l = tf.sqrt(h * w)
- # Avoid NaN in division and log below.
- la += EPSILON
- l += EPSILON
-
- tx = (xcenter - xcenter_a) / la
- ty = (ycenter - ycenter_a) / la
- tl = tf.log(l / la)
- # Scales location targets for joint training.
- if self._scale_factors:
- ty *= self._scale_factors[0]
- tx *= self._scale_factors[1]
- tl *= self._scale_factors[2]
- return tf.transpose(tf.stack([ty, tx, tl]))
-
- def _decode(self, rel_codes, anchors):
- """Decodes relative codes to boxes.
-
- Args:
- rel_codes: a tensor representing N anchor-encoded boxes.
- anchors: BoxList of anchors.
-
- Returns:
- boxes: BoxList holding N bounding boxes.
- """
- ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
- la = tf.sqrt(ha * wa)
-
- ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
- if self._scale_factors:
- ty /= self._scale_factors[0]
- tx /= self._scale_factors[1]
- tl /= self._scale_factors[2]
- l = tf.exp(tl) * la
- ycenter = ty * la + ycenter_a
- xcenter = tx * la + xcenter_a
- ymin = ycenter - l / 2.
- xmin = xcenter - l / 2.
- ymax = ycenter + l / 2.
- xmax = xcenter + l / 2.
- return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/object_detection/box_coders/square_box_coder_test.py b/object_detection/box_coders/square_box_coder_test.py
deleted file mode 100644
index 7f739c6b..00000000
--- a/object_detection/box_coders/square_box_coder_test.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.box_coder.square_box_coder."""
-
-import tensorflow as tf
-
-from object_detection.box_coders import square_box_coder
-from object_detection.core import box_list
-
-
-class SquareBoxCoderTest(tf.test.TestCase):
-
- def test_correct_relative_codes_with_default_scale(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = None
- expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
- [-0.068041, -0.272166, -0.89588]]
-
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_relative_codes_with_non_default_scale(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- scale_factors = [2, 3, 4]
- expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
- [-0.136083, -0.816497, -3.583519]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_relative_codes_with_small_width(self):
- boxes = [[10.0, 10.0, 10.0000001, 20.0]]
- anchors = [[15.0, 12.0, 30.0, 18.0]]
- scale_factors = None
- expected_rel_codes = [[-1.317616, 0., -20.670586]]
- boxes = box_list.BoxList(tf.constant(boxes))
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- rel_codes = coder.encode(boxes, anchors)
- with self.test_session() as sess:
- (rel_codes_out,) = sess.run([rel_codes])
- self.assertAllClose(rel_codes_out, expected_rel_codes)
-
- def test_correct_boxes_with_default_scale(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-0.5, -0.416666, -0.405465],
- [-0.083333, -0.222222, -0.693147]]
- scale_factors = None
- expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
- [0.155051, 0.102989, 0.522474, 0.470412]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- (boxes_out,) = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
- def test_correct_boxes_with_non_default_scale(self):
- anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
- rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
- scale_factors = [2, 3, 4]
- expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
- [0.155051, 0.102989, 0.522474, 0.470412]]
- anchors = box_list.BoxList(tf.constant(anchors))
- coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
- boxes = coder.decode(rel_codes, anchors)
- with self.test_session() as sess:
- (boxes_out,) = sess.run([boxes.get()])
- self.assertAllClose(boxes_out, expected_boxes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/BUILD b/object_detection/builders/BUILD
deleted file mode 100644
index d1bb3f03..00000000
--- a/object_detection/builders/BUILD
+++ /dev/null
@@ -1,305 +0,0 @@
-# Tensorflow Object Detection API: component builders.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-py_library(
- name = "model_builder",
- srcs = ["model_builder.py"],
- deps = [
- ":anchor_generator_builder",
- ":box_coder_builder",
- ":box_predictor_builder",
- ":hyperparams_builder",
- ":image_resizer_builder",
- ":losses_builder",
- ":matcher_builder",
- ":post_processing_builder",
- ":region_similarity_calculator_builder",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch",
- "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
- "//tensorflow_models/object_detection/models:embedded_ssd_mobilenet_v1_feature_extractor",
- "//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
- "//tensorflow_models/object_detection/protos:model_py_pb2",
- ],
-)
-
-py_test(
- name = "model_builder_test",
- srcs = ["model_builder_test.py"],
- deps = [
- ":model_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
- "//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:faster_rcnn_inception_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_inception_v3_feature_extractor",
- "//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
- "//tensorflow_models/object_detection/protos:model_py_pb2",
- ],
-)
-
-py_library(
- name = "matcher_builder",
- srcs = ["matcher_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/matchers:argmax_matcher",
- "//tensorflow_models/object_detection/matchers:bipartite_matcher",
- "//tensorflow_models/object_detection/protos:matcher_py_pb2",
- ],
-)
-
-py_test(
- name = "matcher_builder_test",
- srcs = ["matcher_builder_test.py"],
- deps = [
- ":matcher_builder",
- "//tensorflow_models/object_detection/matchers:argmax_matcher",
- "//tensorflow_models/object_detection/matchers:bipartite_matcher",
- "//tensorflow_models/object_detection/protos:matcher_py_pb2",
- ],
-)
-
-py_library(
- name = "box_coder_builder",
- srcs = ["box_coder_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
- "//tensorflow_models/object_detection/box_coders:keypoint_box_coder",
- "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
- "//tensorflow_models/object_detection/box_coders:square_box_coder",
- "//tensorflow_models/object_detection/protos:box_coder_py_pb2",
- ],
-)
-
-py_test(
- name = "box_coder_builder_test",
- srcs = ["box_coder_builder_test.py"],
- deps = [
- ":box_coder_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
- "//tensorflow_models/object_detection/box_coders:keypoint_box_coder",
- "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
- "//tensorflow_models/object_detection/box_coders:square_box_coder",
- "//tensorflow_models/object_detection/protos:box_coder_py_pb2",
- ],
-)
-
-py_library(
- name = "anchor_generator_builder",
- srcs = ["anchor_generator_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
- "//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator",
- "//tensorflow_models/object_detection/protos:anchor_generator_py_pb2",
- ],
-)
-
-py_test(
- name = "anchor_generator_builder_test",
- srcs = ["anchor_generator_builder_test.py"],
- deps = [
- ":anchor_generator_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
- "//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator",
- "//tensorflow_models/object_detection/protos:anchor_generator_py_pb2",
- ],
-)
-
-py_library(
- name = "input_reader_builder",
- srcs = ["input_reader_builder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
- "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
- ],
-)
-
-py_test(
- name = "input_reader_builder_test",
- srcs = [
- "input_reader_builder_test.py",
- ],
- deps = [
- ":input_reader_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
- ],
-)
-
-py_library(
- name = "losses_builder",
- srcs = ["losses_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/protos:losses_py_pb2",
- ],
-)
-
-py_test(
- name = "losses_builder_test",
- srcs = ["losses_builder_test.py"],
- deps = [
- ":losses_builder",
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/protos:losses_py_pb2",
- ],
-)
-
-py_library(
- name = "optimizer_builder",
- srcs = ["optimizer_builder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:learning_schedules",
- ],
-)
-
-py_test(
- name = "optimizer_builder_test",
- srcs = ["optimizer_builder_test.py"],
- deps = [
- ":optimizer_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:optimizer_py_pb2",
- ],
-)
-
-py_library(
- name = "post_processing_builder",
- srcs = ["post_processing_builder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:post_processing",
- "//tensorflow_models/object_detection/protos:post_processing_py_pb2",
- ],
-)
-
-py_test(
- name = "post_processing_builder_test",
- srcs = ["post_processing_builder_test.py"],
- deps = [
- ":post_processing_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:post_processing_py_pb2",
- ],
-)
-
-py_library(
- name = "hyperparams_builder",
- srcs = ["hyperparams_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
- ],
-)
-
-py_test(
- name = "hyperparams_builder_test",
- srcs = ["hyperparams_builder_test.py"],
- deps = [
- ":hyperparams_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
- ],
-)
-
-py_library(
- name = "box_predictor_builder",
- srcs = ["box_predictor_builder.py"],
- deps = [
- ":hyperparams_builder",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/protos:box_predictor_py_pb2",
- ],
-)
-
-py_test(
- name = "box_predictor_builder_test",
- srcs = ["box_predictor_builder_test.py"],
- deps = [
- ":box_predictor_builder",
- ":hyperparams_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:box_predictor_py_pb2",
- "//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
- ],
-)
-
-py_library(
- name = "region_similarity_calculator_builder",
- srcs = ["region_similarity_calculator_builder.py"],
- deps = [
- "//tensorflow_models/object_detection/core:region_similarity_calculator",
- "//tensorflow_models/object_detection/protos:region_similarity_calculator_py_pb2",
- ],
-)
-
-py_test(
- name = "region_similarity_calculator_builder_test",
- srcs = ["region_similarity_calculator_builder_test.py"],
- deps = [
- ":region_similarity_calculator_builder",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "preprocessor_builder",
- srcs = ["preprocessor_builder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:preprocessor",
- "//tensorflow_models/object_detection/protos:preprocessor_py_pb2",
- ],
-)
-
-py_test(
- name = "preprocessor_builder_test",
- srcs = [
- "preprocessor_builder_test.py",
- ],
- deps = [
- ":preprocessor_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:preprocessor",
- "//tensorflow_models/object_detection/protos:preprocessor_py_pb2",
- ],
-)
-
-py_library(
- name = "image_resizer_builder",
- srcs = ["image_resizer_builder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:preprocessor",
- "//tensorflow_models/object_detection/protos:image_resizer_py_pb2",
- ],
-)
-
-py_test(
- name = "image_resizer_builder_test",
- srcs = ["image_resizer_builder_test.py"],
- deps = [
- ":image_resizer_builder",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:image_resizer_py_pb2",
- ],
-)
diff --git a/object_detection/builders/__init__.py b/object_detection/builders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/builders/__pycache__/__init__.cpython-35.pyc b/object_detection/builders/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 39958001..00000000
Binary files a/object_detection/builders/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc b/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc
deleted file mode 100644
index deee9d21..00000000
Binary files a/object_detection/builders/__pycache__/anchor_generator_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc b/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc
deleted file mode 100644
index 7fdb466c..00000000
Binary files a/object_detection/builders/__pycache__/box_coder_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc b/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc
deleted file mode 100644
index c84f9f90..00000000
Binary files a/object_detection/builders/__pycache__/box_predictor_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc b/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc
deleted file mode 100644
index cbc66011..00000000
Binary files a/object_detection/builders/__pycache__/hyperparams_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc b/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc
deleted file mode 100644
index 87f79381..00000000
Binary files a/object_detection/builders/__pycache__/image_resizer_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc b/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc
deleted file mode 100644
index e8dd90e8..00000000
Binary files a/object_detection/builders/__pycache__/losses_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc b/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc
deleted file mode 100644
index c20c37d6..00000000
Binary files a/object_detection/builders/__pycache__/matcher_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/model_builder.cpython-35.pyc b/object_detection/builders/__pycache__/model_builder.cpython-35.pyc
deleted file mode 100644
index ca02ac33..00000000
Binary files a/object_detection/builders/__pycache__/model_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc b/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc
deleted file mode 100644
index f4c63a23..00000000
Binary files a/object_detection/builders/__pycache__/post_processing_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc b/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc
deleted file mode 100644
index 80a61106..00000000
Binary files a/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/builders/anchor_generator_builder.py b/object_detection/builders/anchor_generator_builder.py
deleted file mode 100644
index 40a65c5c..00000000
--- a/object_detection/builders/anchor_generator_builder.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection anchor generator from config."""
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.anchor_generators import multiple_grid_anchor_generator
-from object_detection.protos import anchor_generator_pb2
-
-
-def build(anchor_generator_config):
- """Builds an anchor generator based on the config.
-
- Args:
- anchor_generator_config: An anchor_generator.proto object containing the
- config for the desired anchor generator.
-
- Returns:
- Anchor generator based on the config.
-
- Raises:
- ValueError: On empty anchor generator proto.
- """
- if not isinstance(anchor_generator_config,
- anchor_generator_pb2.AnchorGenerator):
- raise ValueError('anchor_generator_config not of type '
- 'anchor_generator_pb2.AnchorGenerator')
- if anchor_generator_config.WhichOneof(
- 'anchor_generator_oneof') == 'grid_anchor_generator':
- grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
- return grid_anchor_generator.GridAnchorGenerator(
- scales=[float(scale) for scale in grid_anchor_generator_config.scales],
- aspect_ratios=[float(aspect_ratio)
- for aspect_ratio
- in grid_anchor_generator_config.aspect_ratios],
- base_anchor_size=[grid_anchor_generator_config.height,
- grid_anchor_generator_config.width],
- anchor_stride=[grid_anchor_generator_config.height_stride,
- grid_anchor_generator_config.width_stride],
- anchor_offset=[grid_anchor_generator_config.height_offset,
- grid_anchor_generator_config.width_offset])
- elif anchor_generator_config.WhichOneof(
- 'anchor_generator_oneof') == 'ssd_anchor_generator':
- ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
- anchor_strides = None
- if ssd_anchor_generator_config.height_stride:
- anchor_strides = zip(ssd_anchor_generator_config.height_stride,
- ssd_anchor_generator_config.width_stride)
- anchor_offsets = None
- if ssd_anchor_generator_config.height_offset:
- anchor_offsets = zip(ssd_anchor_generator_config.height_offset,
- ssd_anchor_generator_config.width_offset)
- return multiple_grid_anchor_generator.create_ssd_anchors(
- num_layers=ssd_anchor_generator_config.num_layers,
- min_scale=ssd_anchor_generator_config.min_scale,
- max_scale=ssd_anchor_generator_config.max_scale,
- scales=[float(scale) for scale in ssd_anchor_generator_config.scales],
- aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
- interpolated_scale_aspect_ratio=(
- ssd_anchor_generator_config.interpolated_scale_aspect_ratio),
- base_anchor_size=[
- ssd_anchor_generator_config.base_anchor_height,
- ssd_anchor_generator_config.base_anchor_width
- ],
- anchor_strides=anchor_strides,
- anchor_offsets=anchor_offsets,
- reduce_boxes_in_lowest_layer=(
- ssd_anchor_generator_config.reduce_boxes_in_lowest_layer))
- else:
- raise ValueError('Empty anchor generator.')
diff --git a/object_detection/builders/anchor_generator_builder_test.py b/object_detection/builders/anchor_generator_builder_test.py
deleted file mode 100644
index ecc1eca1..00000000
--- a/object_detection/builders/anchor_generator_builder_test.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for anchor_generator_builder."""
-
-import math
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.anchor_generators import multiple_grid_anchor_generator
-from object_detection.builders import anchor_generator_builder
-from object_detection.protos import anchor_generator_pb2
-
-
-class AnchorGeneratorBuilderTest(tf.test.TestCase):
-
- def assert_almost_list_equal(self, expected_list, actual_list, delta=None):
- self.assertEqual(len(expected_list), len(actual_list))
- for expected_item, actual_item in zip(expected_list, actual_list):
- self.assertAlmostEqual(expected_item, actual_item, delta=delta)
-
- def test_build_grid_anchor_generator_with_defaults(self):
- anchor_generator_text_proto = """
- grid_anchor_generator {
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- grid_anchor_generator.GridAnchorGenerator))
- self.assertListEqual(anchor_generator_object._scales, [])
- self.assertListEqual(anchor_generator_object._aspect_ratios, [])
- with self.test_session() as sess:
- base_anchor_size, anchor_offset, anchor_stride = sess.run(
- [anchor_generator_object._base_anchor_size,
- anchor_generator_object._anchor_offset,
- anchor_generator_object._anchor_stride])
- self.assertAllEqual(anchor_offset, [0, 0])
- self.assertAllEqual(anchor_stride, [16, 16])
- self.assertAllEqual(base_anchor_size, [256, 256])
-
- def test_build_grid_anchor_generator_with_non_default_parameters(self):
- anchor_generator_text_proto = """
- grid_anchor_generator {
- height: 128
- width: 512
- height_stride: 10
- width_stride: 20
- height_offset: 30
- width_offset: 40
- scales: [0.4, 2.2]
- aspect_ratios: [0.3, 4.5]
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- grid_anchor_generator.GridAnchorGenerator))
- self.assert_almost_list_equal(anchor_generator_object._scales,
- [0.4, 2.2])
- self.assert_almost_list_equal(anchor_generator_object._aspect_ratios,
- [0.3, 4.5])
- with self.test_session() as sess:
- base_anchor_size, anchor_offset, anchor_stride = sess.run(
- [anchor_generator_object._base_anchor_size,
- anchor_generator_object._anchor_offset,
- anchor_generator_object._anchor_stride])
- self.assertAllEqual(anchor_offset, [30, 40])
- self.assertAllEqual(anchor_stride, [10, 20])
- self.assertAllEqual(base_anchor_size, [128, 512])
-
- def test_build_ssd_anchor_generator_with_defaults(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, 0.2, 0.2),
- (0.35, 0.418),
- (0.499, 0.570),
- (0.649, 0.721),
- (0.799, 0.871),
- (0.949, 0.974)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_build_ssd_anchor_generator_with_custom_scales(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8]
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, math.sqrt(0.1 * 0.15)),
- (0.15, math.sqrt(0.15 * 0.2)),
- (0.2, math.sqrt(0.2 * 0.4)),
- (0.4, math.sqrt(0.4 * 0.6)),
- (0.6, math.sqrt(0.6 * 0.8)),
- (0.8, math.sqrt(0.8 * 1.0))]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [0.5]
- interpolated_scale_aspect_ratio: 0.5
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- 6 * [(0.5, 0.5)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- def test_build_ssd_anchor_generator_without_reduced_boxes(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- aspect_ratios: [1.0]
- reduce_boxes_in_lowest_layer: false
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
-
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.2, 0.264),
- (0.35, 0.418),
- (0.499, 0.570),
- (0.649, 0.721),
- (0.799, 0.871),
- (0.949, 0.974)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- 6 * [(1.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_build_ssd_anchor_generator_with_non_default_parameters(self):
- anchor_generator_text_proto = """
- ssd_anchor_generator {
- num_layers: 2
- min_scale: 0.3
- max_scale: 0.8
- aspect_ratios: [2.0]
- height_stride: 16
- height_stride: 32
- width_stride: 20
- width_stride: 30
- height_offset: 8
- height_offset: 16
- width_offset: 0
- width_offset: 10
- }
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- anchor_generator_object = anchor_generator_builder.build(
- anchor_generator_proto)
- self.assertTrue(isinstance(anchor_generator_object,
- multiple_grid_anchor_generator.
- MultipleGridAnchorGenerator))
-
- for actual_scales, expected_scales in zip(
- list(anchor_generator_object._scales),
- [(0.1, 0.3, 0.3), (0.8, 0.894)]):
- self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
-
- for actual_aspect_ratio, expected_aspect_ratio in zip(
- list(anchor_generator_object._aspect_ratios),
- [(1.0, 2.0, 0.5), (2.0, 1.0)]):
- self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
-
- for actual_strides, expected_strides in zip(
- list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]):
- self.assert_almost_list_equal(expected_strides, actual_strides)
-
- for actual_offsets, expected_offsets in zip(
- list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]):
- self.assert_almost_list_equal(expected_offsets, actual_offsets)
-
- with self.test_session() as sess:
- base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
- self.assertAllClose(base_anchor_size, [1.0, 1.0])
-
- def test_raise_value_error_on_empty_anchor_genertor(self):
- anchor_generator_text_proto = """
- """
- anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
- text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
- with self.assertRaises(ValueError):
- anchor_generator_builder.build(anchor_generator_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/box_coder_builder.py b/object_detection/builders/box_coder_builder.py
deleted file mode 100644
index edfc2fca..00000000
--- a/object_detection/builders/box_coder_builder.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection box coder from configuration."""
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.box_coders import square_box_coder
-from object_detection.protos import box_coder_pb2
-
-
-def build(box_coder_config):
- """Builds a box coder object based on the box coder config.
-
- Args:
- box_coder_config: A box_coder.proto object containing the config for the
- desired box coder.
-
- Returns:
- BoxCoder based on the config.
-
- Raises:
- ValueError: On empty box coder proto.
- """
- if not isinstance(box_coder_config, box_coder_pb2.BoxCoder):
- raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.')
-
- if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder':
- return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[
- box_coder_config.faster_rcnn_box_coder.y_scale,
- box_coder_config.faster_rcnn_box_coder.x_scale,
- box_coder_config.faster_rcnn_box_coder.height_scale,
- box_coder_config.faster_rcnn_box_coder.width_scale
- ])
- if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder':
- return keypoint_box_coder.KeypointBoxCoder(
- box_coder_config.keypoint_box_coder.num_keypoints,
- scale_factors=[
- box_coder_config.keypoint_box_coder.y_scale,
- box_coder_config.keypoint_box_coder.x_scale,
- box_coder_config.keypoint_box_coder.height_scale,
- box_coder_config.keypoint_box_coder.width_scale
- ])
- if (box_coder_config.WhichOneof('box_coder_oneof') ==
- 'mean_stddev_box_coder'):
- return mean_stddev_box_coder.MeanStddevBoxCoder()
- if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder':
- return square_box_coder.SquareBoxCoder(scale_factors=[
- box_coder_config.square_box_coder.y_scale,
- box_coder_config.square_box_coder.x_scale,
- box_coder_config.square_box_coder.length_scale
- ])
- raise ValueError('Empty box coder.')
diff --git a/object_detection/builders/box_coder_builder_test.py b/object_detection/builders/box_coder_builder_test.py
deleted file mode 100644
index 286012e9..00000000
--- a/object_detection/builders/box_coder_builder_test.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for box_coder_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import keypoint_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.box_coders import square_box_coder
-from object_detection.builders import box_coder_builder
-from object_detection.protos import box_coder_pb2
-
-
-class BoxCoderBuilderTest(tf.test.TestCase):
-
- def test_build_faster_rcnn_box_coder_with_defaults(self):
- box_coder_text_proto = """
- faster_rcnn_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object,
- faster_rcnn_box_coder.FasterRcnnBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
-
- def test_build_faster_rcnn_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- faster_rcnn_box_coder {
- y_scale: 6.0
- x_scale: 3.0
- height_scale: 7.0
- width_scale: 8.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object,
- faster_rcnn_box_coder.FasterRcnnBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
-
- def test_build_keypoint_box_coder_with_defaults(self):
- box_coder_text_proto = """
- keypoint_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
-
- def test_build_keypoint_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- keypoint_box_coder {
- num_keypoints: 6
- y_scale: 6.0
- x_scale: 3.0
- height_scale: 7.0
- width_scale: 8.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
- self.assertEqual(box_coder_object._num_keypoints, 6)
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
-
- def test_build_mean_stddev_box_coder(self):
- box_coder_text_proto = """
- mean_stddev_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object,
- mean_stddev_box_coder.MeanStddevBoxCoder))
-
- def test_build_square_box_coder_with_defaults(self):
- box_coder_text_proto = """
- square_box_coder {
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
- self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0])
-
- def test_build_square_box_coder_with_non_default_parameters(self):
- box_coder_text_proto = """
- square_box_coder {
- y_scale: 6.0
- x_scale: 3.0
- length_scale: 7.0
- }
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- box_coder_object = box_coder_builder.build(box_coder_proto)
- self.assertTrue(
- isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
- self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0])
-
- def test_raise_error_on_empty_box_coder(self):
- box_coder_text_proto = """
- """
- box_coder_proto = box_coder_pb2.BoxCoder()
- text_format.Merge(box_coder_text_proto, box_coder_proto)
- with self.assertRaises(ValueError):
- box_coder_builder.build(box_coder_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/box_predictor_builder.py b/object_detection/builders/box_predictor_builder.py
deleted file mode 100644
index 3e10b394..00000000
--- a/object_detection/builders/box_predictor_builder.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Function to build box predictor from configuration."""
-
-from object_detection.core import box_predictor
-from object_detection.protos import box_predictor_pb2
-
-
-def build(argscope_fn, box_predictor_config, is_training, num_classes):
- """Builds box predictor based on the configuration.
-
- Builds box predictor based on the configuration. See box_predictor.proto for
- configurable options. Also, see box_predictor.py for more details.
-
- Args:
- argscope_fn: A function that takes the following inputs:
- * hyperparams_pb2.Hyperparams proto
- * a boolean indicating if the model is in training mode.
- and returns a tf slim argscope for Conv and FC hyperparameters.
- box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
- configuration.
- is_training: Whether the models is in training mode.
- num_classes: Number of classes to predict.
-
- Returns:
- box_predictor: box_predictor.BoxPredictor object.
-
- Raises:
- ValueError: On unknown box predictor.
- """
- if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
- raise ValueError('box_predictor_config not of type '
- 'box_predictor_pb2.BoxPredictor.')
-
- box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
-
- if box_predictor_oneof == 'convolutional_box_predictor':
- conv_box_predictor = box_predictor_config.convolutional_box_predictor
- conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- conv_hyperparams=conv_hyperparams,
- min_depth=conv_box_predictor.min_depth,
- max_depth=conv_box_predictor.max_depth,
- num_layers_before_predictor=(conv_box_predictor.
- num_layers_before_predictor),
- use_dropout=conv_box_predictor.use_dropout,
- dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
- kernel_size=conv_box_predictor.kernel_size,
- box_code_size=conv_box_predictor.box_code_size,
- apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores,
- class_prediction_bias_init=conv_box_predictor.class_prediction_bias_init
- )
- return box_predictor_object
-
- if box_predictor_oneof == 'mask_rcnn_box_predictor':
- mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
- fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
- is_training)
- conv_hyperparams = None
- if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
- conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- fc_hyperparams=fc_hyperparams,
- use_dropout=mask_rcnn_box_predictor.use_dropout,
- dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
- box_code_size=mask_rcnn_box_predictor.box_code_size,
- conv_hyperparams=conv_hyperparams,
- predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
- mask_prediction_conv_depth=(mask_rcnn_box_predictor.
- mask_prediction_conv_depth),
- predict_keypoints=mask_rcnn_box_predictor.predict_keypoints)
- return box_predictor_object
-
- if box_predictor_oneof == 'rfcn_box_predictor':
- rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
- conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
- is_training)
- box_predictor_object = box_predictor.RfcnBoxPredictor(
- is_training=is_training,
- num_classes=num_classes,
- conv_hyperparams=conv_hyperparams,
- crop_size=[rfcn_box_predictor.crop_height,
- rfcn_box_predictor.crop_width],
- num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
- rfcn_box_predictor.num_spatial_bins_width],
- depth=rfcn_box_predictor.depth,
- box_code_size=rfcn_box_predictor.box_code_size)
- return box_predictor_object
- raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
diff --git a/object_detection/builders/box_predictor_builder_test.py b/object_detection/builders/box_predictor_builder_test.py
deleted file mode 100644
index 6bafd482..00000000
--- a/object_detection/builders/box_predictor_builder_test.py
+++ /dev/null
@@ -1,393 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for box_predictor_builder."""
-import mock
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.protos import box_predictor_pb2
-from object_detection.protos import hyperparams_pb2
-
-
-class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_calls_conv_argscope_fn(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
- self.assertAlmostEqual((hyperparams_proto.regularizer.
- l1_regularizer.weight),
- (conv_hyperparams_actual.regularizer.l1_regularizer.
- weight))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.stddev),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.stddev))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.mean),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.mean))
- self.assertEqual(hyperparams_proto.activation,
- conv_hyperparams_actual.activation)
- self.assertFalse(is_training)
-
- def test_construct_non_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- convolutional_box_predictor {
- min_depth: 2
- max_depth: 16
- num_layers_before_predictor: 2
- use_dropout: false
- dropout_keep_probability: 0.4
- kernel_size: 3
- box_code_size: 3
- apply_sigmoid_to_scores: true
- class_prediction_bias_init: 4.0
- }
- """
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- self.assertEqual(box_predictor._min_depth, 2)
- self.assertEqual(box_predictor._max_depth, 16)
- self.assertEqual(box_predictor._num_layers_before_predictor, 2)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.4)
- self.assertTrue(box_predictor._apply_sigmoid_to_scores)
- self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0)
- self.assertEqual(box_predictor.num_classes, 10)
- self.assertFalse(box_predictor._is_training)
-
- def test_construct_default_conv_box_predictor(self):
- box_predictor_text_proto = """
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }"""
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=hyperparams_builder.build,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor._min_depth, 0)
- self.assertEqual(box_predictor._max_depth, 0)
- self.assertEqual(box_predictor._num_layers_before_predictor, 0)
- self.assertTrue(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
- self.assertFalse(box_predictor._apply_sigmoid_to_scores)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
-
-
-class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_builder_calls_fc_argscope_fn(self):
- fc_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- op: FC
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
- hyperparams_proto)
- mock_argscope_fn = mock.Mock(return_value='arg_scope')
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_argscope_fn,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- mock_argscope_fn.assert_called_with(hyperparams_proto, False)
- self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope')
-
- def test_non_default_mask_rcnn_box_predictor(self):
- fc_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- op: FC
- """
- box_predictor_text_proto = """
- mask_rcnn_box_predictor {
- use_dropout: true
- dropout_keep_probability: 0.8
- box_code_size: 3
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
- def mock_fc_argscope_builder(fc_hyperparams_arg, is_training):
- return (fc_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_fc_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertTrue(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 3)
-
- def test_build_default_mask_rcnn_box_predictor(self):
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
- hyperparams_pb2.Hyperparams.FC)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock.Mock(return_value='arg_scope'),
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertFalse(box_predictor._predict_instance_masks)
- self.assertFalse(box_predictor._predict_keypoints)
-
- def test_build_box_predictor_with_mask_branch(self):
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
- hyperparams_pb2.Hyperparams.FC)
- box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = (
- hyperparams_pb2.Hyperparams.CONV)
- box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True
- box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512
- mock_argscope_fn = mock.Mock(return_value='arg_scope')
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_argscope_fn,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- mock_argscope_fn.assert_has_calls(
- [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams,
- True),
- mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
- True)], any_order=True)
- self.assertFalse(box_predictor._use_dropout)
- self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertTrue(box_predictor._predict_instance_masks)
- self.assertEqual(box_predictor._mask_prediction_conv_depth, 512)
- self.assertFalse(box_predictor._predict_keypoints)
-
-
-class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
-
- def test_box_predictor_calls_fc_argscope_fn(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.0003
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.3
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=False,
- num_classes=10)
- (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
- self.assertAlmostEqual((hyperparams_proto.regularizer.
- l1_regularizer.weight),
- (conv_hyperparams_actual.regularizer.l1_regularizer.
- weight))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.stddev),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.stddev))
- self.assertAlmostEqual((hyperparams_proto.initializer.
- truncated_normal_initializer.mean),
- (conv_hyperparams_actual.initializer.
- truncated_normal_initializer.mean))
- self.assertEqual(hyperparams_proto.activation,
- conv_hyperparams_actual.activation)
- self.assertFalse(is_training)
-
- def test_non_default_rfcn_box_predictor(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- box_predictor_text_proto = """
- rfcn_box_predictor {
- num_spatial_bins_height: 4
- num_spatial_bins_width: 4
- depth: 4
- box_code_size: 3
- crop_height: 16
- crop_width: 16
- }
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(box_predictor_text_proto, box_predictor_proto)
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 3)
- self.assertEqual(box_predictor._num_spatial_bins, [4, 4])
- self.assertEqual(box_predictor._crop_size, [16, 16])
-
- def test_default_rfcn_box_predictor(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
- def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
- return (conv_hyperparams_arg, is_training)
-
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
- hyperparams_proto)
- box_predictor = box_predictor_builder.build(
- argscope_fn=mock_conv_argscope_builder,
- box_predictor_config=box_predictor_proto,
- is_training=True,
- num_classes=90)
- self.assertEqual(box_predictor.num_classes, 90)
- self.assertTrue(box_predictor._is_training)
- self.assertEqual(box_predictor._box_code_size, 4)
- self.assertEqual(box_predictor._num_spatial_bins, [3, 3])
- self.assertEqual(box_predictor._crop_size, [12, 12])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/hyperparams_builder.py b/object_detection/builders/hyperparams_builder.py
deleted file mode 100644
index 094ff023..00000000
--- a/object_detection/builders/hyperparams_builder.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder function to construct tf-slim arg_scope for convolution, fc ops."""
-import tensorflow as tf
-
-from object_detection.protos import hyperparams_pb2
-
-slim = tf.contrib.slim
-
-
-def build(hyperparams_config, is_training):
- """Builds tf-slim arg_scope for convolution ops based on the config.
-
- Returns an arg_scope to use for convolution ops containing weights
- initializer, weights regularizer, activation function, batch norm function
- and batch norm parameters based on the configuration.
-
- Note that if the batch_norm parameteres are not specified in the config
- (i.e. left to default) then batch norm is excluded from the arg_scope.
-
- The batch norm parameters are set for updates based on `is_training` argument
- and conv_hyperparams_config.batch_norm.train parameter. During training, they
- are updated only if batch_norm.train parameter is true. However, during eval,
- no updates are made to the batch norm variables. In both cases, their current
- values are used during forward pass.
-
- Args:
- hyperparams_config: hyperparams.proto object containing
- hyperparameters.
- is_training: Whether the network is in training mode.
-
- Returns:
- arg_scope: tf-slim arg_scope containing hyperparameters for ops.
-
- Raises:
- ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
- """
- if not isinstance(hyperparams_config,
- hyperparams_pb2.Hyperparams):
- raise ValueError('hyperparams_config not of type '
- 'hyperparams_pb.Hyperparams.')
-
- batch_norm = None
- batch_norm_params = None
- if hyperparams_config.HasField('batch_norm'):
- batch_norm = slim.batch_norm
- batch_norm_params = _build_batch_norm_params(
- hyperparams_config.batch_norm, is_training)
-
- affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
- if hyperparams_config.HasField('op') and (
- hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
- affected_ops = [slim.fully_connected]
- with slim.arg_scope(
- affected_ops,
- weights_regularizer=_build_regularizer(
- hyperparams_config.regularizer),
- weights_initializer=_build_initializer(
- hyperparams_config.initializer),
- activation_fn=_build_activation_fn(hyperparams_config.activation),
- normalizer_fn=batch_norm,
- normalizer_params=batch_norm_params) as sc:
- return sc
-
-
-def _build_activation_fn(activation_fn):
- """Builds a callable activation from config.
-
- Args:
- activation_fn: hyperparams_pb2.Hyperparams.activation
-
- Returns:
- Callable activation function.
-
- Raises:
- ValueError: On unknown activation function.
- """
- if activation_fn == hyperparams_pb2.Hyperparams.NONE:
- return None
- if activation_fn == hyperparams_pb2.Hyperparams.RELU:
- return tf.nn.relu
- if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
- return tf.nn.relu6
- raise ValueError('Unknown activation function: {}'.format(activation_fn))
-
-
-def _build_regularizer(regularizer):
- """Builds a tf-slim regularizer from config.
-
- Args:
- regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
-
- Returns:
- tf-slim regularizer.
-
- Raises:
- ValueError: On unknown regularizer.
- """
- regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
- if regularizer_oneof == 'l1_regularizer':
- return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight))
- if regularizer_oneof == 'l2_regularizer':
- return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight))
- raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
-
-
-def _build_initializer(initializer):
- """Build a tf initializer from config.
-
- Args:
- initializer: hyperparams_pb2.Hyperparams.regularizer proto.
-
- Returns:
- tf initializer.
-
- Raises:
- ValueError: On unknown initializer.
- """
- initializer_oneof = initializer.WhichOneof('initializer_oneof')
- if initializer_oneof == 'truncated_normal_initializer':
- return tf.truncated_normal_initializer(
- mean=initializer.truncated_normal_initializer.mean,
- stddev=initializer.truncated_normal_initializer.stddev)
- if initializer_oneof == 'variance_scaling_initializer':
- enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
- DESCRIPTOR.enum_types_by_name['Mode'])
- mode = enum_descriptor.values_by_number[initializer.
- variance_scaling_initializer.
- mode].name
- return slim.variance_scaling_initializer(
- factor=initializer.variance_scaling_initializer.factor,
- mode=mode,
- uniform=initializer.variance_scaling_initializer.uniform)
- raise ValueError('Unknown initializer function: {}'.format(
- initializer_oneof))
-
-
-def _build_batch_norm_params(batch_norm, is_training):
- """Build a dictionary of batch_norm params from config.
-
- Args:
- batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
- is_training: Whether the models is in training mode.
-
- Returns:
- A dictionary containing batch_norm parameters.
- """
- batch_norm_params = {
- 'decay': batch_norm.decay,
- 'center': batch_norm.center,
- 'scale': batch_norm.scale,
- 'epsilon': batch_norm.epsilon,
- 'is_training': is_training and batch_norm.train,
- }
- return batch_norm_params
diff --git a/object_detection/builders/hyperparams_builder_test.py b/object_detection/builders/hyperparams_builder_test.py
deleted file mode 100644
index a9808076..00000000
--- a/object_detection/builders/hyperparams_builder_test.py
+++ /dev/null
@@ -1,449 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests object_detection.core.hyperparams_builder."""
-
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import hyperparams_builder
-from object_detection.protos import hyperparams_pb2
-
-slim = tf.contrib.slim
-
-
-class HyperparamsBuilderTest(tf.test.TestCase):
-
- # TODO: Make this a public api in slim arg_scope.py.
- def _get_scope_key(self, op):
- return getattr(op, '_key_op', str(op))
-
- def test_default_arg_scope_has_conv2d_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
-
- def test_default_arg_scope_has_separable_conv2d_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
-
- def test_default_arg_scope_has_conv2d_transpose_op(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
-
- def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
- conv_hyperparams_text_proto = """
- op: FC
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
-
- def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- kwargs_1, kwargs_2, kwargs_3 = scope.values()
- self.assertDictEqual(kwargs_1, kwargs_2)
- self.assertDictEqual(kwargs_1, kwargs_3)
-
- def test_return_l1_regularized_weights(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l1_regularizer {
- weight: 0.5
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- regularizer = conv_scope_arguments['weights_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.abs(weights).sum() * 0.5, result)
-
- def test_return_l2_regularizer_weights(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- weight: 0.42
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
-
- regularizer = conv_scope_arguments['weights_regularizer']
- weights = np.array([1., -1, 4., 2.])
- with self.test_session() as sess:
- result = sess.run(regularizer(tf.constant(weights)))
- self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
-
- def test_return_non_default_batch_norm_params_with_train_during_train(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: true
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = conv_scope_arguments['normalizer_params']
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertTrue(batch_norm_params['is_training'])
-
- def test_return_batch_norm_params_with_notrain_during_eval(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: true
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=False)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = conv_scope_arguments['normalizer_params']
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
-
- def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- batch_norm {
- decay: 0.7
- center: false
- scale: true
- epsilon: 0.03
- train: false
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
- batch_norm_params = conv_scope_arguments['normalizer_params']
- self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
- self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
- self.assertFalse(batch_norm_params['center'])
- self.assertTrue(batch_norm_params['scale'])
- self.assertFalse(batch_norm_params['is_training'])
-
- def test_do_not_use_batch_norm_if_default(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
- self.assertEqual(conv_scope_arguments['normalizer_params'], None)
-
- def test_use_none_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: NONE
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['activation_fn'], None)
-
- def test_use_relu_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
-
- def test_use_relu_6_activation(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- activation: RELU_6
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
-
- def _assert_variance_in_range(self, initializer, shape, variance,
- tol=1e-2):
- with tf.Graph().as_default() as g:
- with self.test_session(graph=g) as sess:
- var = tf.get_variable(
- name='test',
- shape=shape,
- dtype=tf.float32,
- initializer=initializer)
- sess.run(tf.global_variables_initializer())
- values = sess.run(var)
- self.assertAllClose(np.var(values), variance, tol, tol)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_OUT
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 40.)
-
- def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_AVG
- uniform: false
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=4. / (100. + 40.))
-
- def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 2.0
- mode: FAN_IN
- uniform: true
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=2. / 100.)
-
- def test_variance_in_range_with_truncated_normal_initializer(self):
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- mean: 0.0
- stddev: 0.8
- }
- }
- """
- conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
- scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
- conv_scope_arguments = scope.values()[0]
- initializer = conv_scope_arguments['weights_initializer']
- self._assert_variance_in_range(initializer, shape=[100, 40],
- variance=0.49, tol=1e-1)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/image_resizer_builder.py b/object_detection/builders/image_resizer_builder.py
deleted file mode 100644
index 9d81c7d3..00000000
--- a/object_detection/builders/image_resizer_builder.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Builder function for image resizing operations."""
-import functools
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.protos import image_resizer_pb2
-
-
-def _tf_resize_method(resize_method):
- """Maps image resize method from enumeration type to TensorFlow.
-
- Args:
- resize_method: The resize_method attribute of keep_aspect_ratio_resizer or
- fixed_shape_resizer.
-
- Returns:
- method: The corresponding TensorFlow ResizeMethod.
-
- Raises:
- ValueError: if `resize_method` is of unknown type.
- """
- dict_method = {
- image_resizer_pb2.BILINEAR:
- tf.image.ResizeMethod.BILINEAR,
- image_resizer_pb2.NEAREST_NEIGHBOR:
- tf.image.ResizeMethod.NEAREST_NEIGHBOR,
- image_resizer_pb2.BICUBIC:
- tf.image.ResizeMethod.BICUBIC,
- image_resizer_pb2.AREA:
- tf.image.ResizeMethod.AREA
- }
- if resize_method in dict_method:
- return dict_method[resize_method]
- else:
- raise ValueError('Unknown resize_method')
-
-
-def build(image_resizer_config):
- """Builds callable for image resizing operations.
-
- Args:
- image_resizer_config: image_resizer.proto object containing parameters for
- an image resizing operation.
-
- Returns:
- image_resizer_fn: Callable for image resizing. This callable always takes
- a rank-3 image tensor (corresponding to a single image) and returns a
- rank-3 image tensor, possibly with new spatial dimensions.
-
- Raises:
- ValueError: if `image_resizer_config` is of incorrect type.
- ValueError: if `image_resizer_config.image_resizer_oneof` is of expected
- type.
- ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer
- is used.
- """
- if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
- raise ValueError('image_resizer_config not of type '
- 'image_resizer_pb2.ImageResizer.')
-
- if image_resizer_config.WhichOneof(
- 'image_resizer_oneof') == 'keep_aspect_ratio_resizer':
- keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
- if not (keep_aspect_ratio_config.min_dimension <=
- keep_aspect_ratio_config.max_dimension):
- raise ValueError('min_dimension > max_dimension')
- method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
- return functools.partial(
- preprocessor.resize_to_range,
- min_dimension=keep_aspect_ratio_config.min_dimension,
- max_dimension=keep_aspect_ratio_config.max_dimension,
- method=method)
- if image_resizer_config.WhichOneof(
- 'image_resizer_oneof') == 'fixed_shape_resizer':
- fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
- method = _tf_resize_method(fixed_shape_resizer_config.resize_method)
- return functools.partial(
- preprocessor.resize_image,
- new_height=fixed_shape_resizer_config.height,
- new_width=fixed_shape_resizer_config.width,
- method=method)
- raise ValueError('Invalid image resizer option.')
diff --git a/object_detection/builders/image_resizer_builder_test.py b/object_detection/builders/image_resizer_builder_test.py
deleted file mode 100644
index 4ef557a5..00000000
--- a/object_detection/builders/image_resizer_builder_test.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for object_detection.builders.image_resizer_builder."""
-import numpy as np
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.builders import image_resizer_builder
-from object_detection.protos import image_resizer_pb2
-
-
-class ImageResizerBuilderTest(tf.test.TestCase):
-
- def _shape_of_resized_random_image_given_text_proto(self, input_shape,
- text_proto):
- image_resizer_config = image_resizer_pb2.ImageResizer()
- text_format.Merge(text_proto, image_resizer_config)
- image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- images = tf.to_float(
- tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32))
- resized_images = image_resizer_fn(images)
- with self.test_session() as sess:
- return sess.run(resized_images).shape
-
- def test_built_keep_aspect_ratio_resizer_returns_expected_shape(self):
- image_resizer_text_proto = """
- keep_aspect_ratio_resizer {
- min_dimension: 10
- max_dimension: 20
- }
- """
- input_shape = (50, 25, 3)
- expected_output_shape = (20, 10, 3)
- output_shape = self._shape_of_resized_random_image_given_text_proto(
- input_shape, image_resizer_text_proto)
- self.assertEqual(output_shape, expected_output_shape)
-
- def test_built_fixed_shape_resizer_returns_expected_shape(self):
- image_resizer_text_proto = """
- fixed_shape_resizer {
- height: 10
- width: 20
- }
- """
- input_shape = (50, 25, 3)
- expected_output_shape = (10, 20, 3)
- output_shape = self._shape_of_resized_random_image_given_text_proto(
- input_shape, image_resizer_text_proto)
- self.assertEqual(output_shape, expected_output_shape)
-
- def test_raises_error_on_invalid_input(self):
- invalid_input = 'invalid_input'
- with self.assertRaises(ValueError):
- image_resizer_builder.build(invalid_input)
-
- def _resized_image_given_text_proto(self, image, text_proto):
- image_resizer_config = image_resizer_pb2.ImageResizer()
- text_format.Merge(text_proto, image_resizer_config)
- image_resizer_fn = image_resizer_builder.build(image_resizer_config)
- image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
- resized_image = image_resizer_fn(image_placeholder)
- with self.test_session() as sess:
- return sess.run(resized_image, feed_dict={image_placeholder: image})
-
- def test_fixed_shape_resizer_nearest_neighbor_method(self):
- image_resizer_text_proto = """
- fixed_shape_resizer {
- height: 1
- width: 1
- resize_method: NEAREST_NEIGHBOR
- }
- """
- image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
- image = np.expand_dims(image, axis=2)
- image = np.tile(image, (1, 1, 3))
- image = np.expand_dims(image, axis=0)
- resized_image = self._resized_image_given_text_proto(
- image, image_resizer_text_proto)
- vals = np.unique(resized_image).tolist()
- self.assertEqual(len(vals), 1)
- self.assertEqual(vals[0], 1)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/input_reader_builder.py b/object_detection/builders/input_reader_builder.py
deleted file mode 100644
index 530e879c..00000000
--- a/object_detection/builders/input_reader_builder.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Input reader builder.
-
-Creates data sources for DetectionModels from an InputReader config. See
-input_reader.proto for options.
-
-Note: If users wishes to also use their own InputReaders with the Object
-Detection configuration framework, they should define their own builder function
-that wraps the build function.
-"""
-
-import tensorflow as tf
-
-from object_detection.data_decoders import tf_example_decoder
-from object_detection.protos import input_reader_pb2
-
-parallel_reader = tf.contrib.slim.parallel_reader
-
-
-def build(input_reader_config):
- """Builds a tensor dictionary based on the InputReader config.
-
- Args:
- input_reader_config: A input_reader_pb2.InputReader object.
-
- Returns:
- A tensor dict based on the input_reader_config.
-
- Raises:
- ValueError: On invalid input reader proto.
- ValueError: If no input paths are specified.
- """
- if not isinstance(input_reader_config, input_reader_pb2.InputReader):
- raise ValueError('input_reader_config not of type '
- 'input_reader_pb2.InputReader.')
-
- if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
- config = input_reader_config.tf_record_input_reader
- if not config.input_path:
- raise ValueError('At least one input path must be specified in '
- '`input_reader_config`.')
- _, string_tensor = parallel_reader.parallel_read(
- config.input_path[:], # Convert `RepeatedScalarContainer` to list.
- reader_class=tf.TFRecordReader,
- num_epochs=(input_reader_config.num_epochs
- if input_reader_config.num_epochs else None),
- num_readers=input_reader_config.num_readers,
- shuffle=input_reader_config.shuffle,
- dtypes=[tf.string, tf.string],
- capacity=input_reader_config.queue_capacity,
- min_after_dequeue=input_reader_config.min_after_dequeue)
-
- label_map_proto_file = None
- if input_reader_config.HasField('label_map_path'):
- label_map_proto_file = input_reader_config.label_map_path
- decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=input_reader_config.load_instance_masks,
- label_map_proto_file=label_map_proto_file)
- return decoder.decode(string_tensor)
-
- raise ValueError('Unsupported input_reader_config.')
diff --git a/object_detection/builders/input_reader_builder_test.py b/object_detection/builders/input_reader_builder_test.py
deleted file mode 100644
index f09f60e5..00000000
--- a/object_detection/builders/input_reader_builder_test.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for input_reader_builder."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from object_detection.builders import input_reader_builder
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-
-
-class InputReaderBuilderTest(tf.test.TestCase):
-
- def create_tf_record(self):
- path = os.path.join(self.get_temp_dir(), 'tfrecord')
- writer = tf.python_io.TFRecordWriter(path)
-
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- flat_mask = (4 * 5) * [1.0]
- with self.test_session():
- encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- example = example_pb2.Example(features=feature_pb2.Features(feature={
- 'image/encoded': feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
- 'image/format': feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
- 'image/height': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[4])),
- 'image/width': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[5])),
- 'image/object/bbox/xmin': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/xmax': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/bbox/ymin': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- 'image/object/bbox/ymax': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0])),
- 'image/object/class/label': feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[2])),
- 'image/object/mask': feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=flat_mask)),
- }))
- writer.write(example.SerializeToString())
- writer.close()
-
- return path
-
- def test_build_tf_record_input_reader(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = input_reader_builder.build(input_reader_proto)
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
- not in output_dict)
- self.assertEquals(
- (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
- self.assertEquals(
- [2], output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEquals(
- (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0])
-
- def test_build_tf_record_input_reader_and_load_instance_masks(self):
- tf_record_path = self.create_tf_record()
-
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- tf_record_input_reader {{
- input_path: '{0}'
- }}
- """.format(tf_record_path)
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- tensor_dict = input_reader_builder.build(input_reader_proto)
-
- sv = tf.train.Supervisor(logdir=self.get_temp_dir())
- with sv.prepare_or_wait_for_session() as sess:
- sv.start_queue_runners(sess)
- output_dict = sess.run(tensor_dict)
-
- self.assertEquals(
- (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
- self.assertEquals(
- [2], output_dict[fields.InputDataFields.groundtruth_classes])
- self.assertEquals(
- (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
- self.assertAllEqual(
- [0.0, 0.0, 1.0, 1.0],
- output_dict[fields.InputDataFields.groundtruth_boxes][0])
- self.assertAllEqual(
- (1, 4, 5),
- output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
-
- def test_raises_error_with_no_input_paths(self):
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- """
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
- with self.assertRaises(ValueError):
- input_reader_builder.build(input_reader_proto)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/losses_builder.py b/object_detection/builders/losses_builder.py
deleted file mode 100644
index c2b0a1f1..00000000
--- a/object_detection/builders/losses_builder.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build localization and classification losses from config."""
-
-from object_detection.core import losses
-from object_detection.protos import losses_pb2
-
-
-def build(loss_config):
- """Build losses based on the config.
-
- Builds classification, localization losses and optionally a hard example miner
- based on the config.
-
- Args:
- loss_config: A losses_pb2.Loss object.
-
- Returns:
- classification_loss: Classification loss object.
- localization_loss: Localization loss object.
- classification_weight: Classification loss weight.
- localization_weight: Localization loss weight.
- hard_example_miner: Hard example miner object.
-
- Raises:
- ValueError: If hard_example_miner is used with sigmoid_focal_loss.
- """
- classification_loss = _build_classification_loss(
- loss_config.classification_loss)
- localization_loss = _build_localization_loss(
- loss_config.localization_loss)
- classification_weight = loss_config.classification_weight
- localization_weight = loss_config.localization_weight
- hard_example_miner = None
- if loss_config.HasField('hard_example_miner'):
- if (loss_config.classification_loss.WhichOneof('classification_loss') ==
- 'weighted_sigmoid_focal'):
- raise ValueError('HardExampleMiner should not be used with sigmoid focal '
- 'loss')
- hard_example_miner = build_hard_example_miner(
- loss_config.hard_example_miner,
- classification_weight,
- localization_weight)
- return (classification_loss, localization_loss,
- classification_weight,
- localization_weight, hard_example_miner)
-
-
-def build_hard_example_miner(config,
- classification_weight,
- localization_weight):
- """Builds hard example miner based on the config.
-
- Args:
- config: A losses_pb2.HardExampleMiner object.
- classification_weight: Classification loss weight.
- localization_weight: Localization loss weight.
-
- Returns:
- Hard example miner.
-
- """
- loss_type = None
- if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
- loss_type = 'both'
- if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
- loss_type = 'cls'
- if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
- loss_type = 'loc'
-
- max_negatives_per_positive = None
- num_hard_examples = None
- if config.max_negatives_per_positive > 0:
- max_negatives_per_positive = config.max_negatives_per_positive
- if config.num_hard_examples > 0:
- num_hard_examples = config.num_hard_examples
- hard_example_miner = losses.HardExampleMiner(
- num_hard_examples=num_hard_examples,
- iou_threshold=config.iou_threshold,
- loss_type=loss_type,
- cls_loss_weight=classification_weight,
- loc_loss_weight=localization_weight,
- max_negatives_per_positive=max_negatives_per_positive,
- min_negatives_per_image=config.min_negatives_per_image)
- return hard_example_miner
-
-
-def build_faster_rcnn_classification_loss(loss_config):
- """Builds a classification loss for Faster RCNN based on the loss config.
-
- Args:
- loss_config: A losses_pb2.ClassificationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.ClassificationLoss):
- raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
-
- loss_type = loss_config.WhichOneof('classification_loss')
-
- if loss_type == 'weighted_sigmoid':
- config = loss_config.weighted_sigmoid
- return losses.WeightedSigmoidClassificationLoss(
- anchorwise_output=config.anchorwise_output)
- if loss_type == 'weighted_softmax':
- config = loss_config.weighted_softmax
- return losses.WeightedSoftmaxClassificationLoss(
- anchorwise_output=config.anchorwise_output)
-
- # By default, Faster RCNN second stage classifier uses Softmax loss
- # with anchor-wise outputs.
- return losses.WeightedSoftmaxClassificationLoss(
- anchorwise_output=True)
-
-
-def _build_localization_loss(loss_config):
- """Builds a localization loss based on the loss config.
-
- Args:
- loss_config: A losses_pb2.LocalizationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.LocalizationLoss):
- raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.')
-
- loss_type = loss_config.WhichOneof('localization_loss')
-
- if loss_type == 'weighted_l2':
- config = loss_config.weighted_l2
- return losses.WeightedL2LocalizationLoss(
- anchorwise_output=config.anchorwise_output)
-
- if loss_type == 'weighted_smooth_l1':
- config = loss_config.weighted_smooth_l1
- return losses.WeightedSmoothL1LocalizationLoss(
- anchorwise_output=config.anchorwise_output)
-
- if loss_type == 'weighted_iou':
- return losses.WeightedIOULocalizationLoss()
-
- raise ValueError('Empty loss config.')
-
-
-def _build_classification_loss(loss_config):
- """Builds a classification loss based on the loss config.
-
- Args:
- loss_config: A losses_pb2.ClassificationLoss object.
-
- Returns:
- Loss based on the config.
-
- Raises:
- ValueError: On invalid loss_config.
- """
- if not isinstance(loss_config, losses_pb2.ClassificationLoss):
- raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
-
- loss_type = loss_config.WhichOneof('classification_loss')
-
- if loss_type == 'weighted_sigmoid':
- config = loss_config.weighted_sigmoid
- return losses.WeightedSigmoidClassificationLoss(
- anchorwise_output=config.anchorwise_output)
-
- if loss_type == 'weighted_sigmoid_focal':
- config = loss_config.weighted_sigmoid_focal
- alpha = None
- if config.HasField('alpha'):
- alpha = config.alpha
- return losses.SigmoidFocalClassificationLoss(
- anchorwise_output=config.anchorwise_output,
- gamma=config.gamma,
- alpha=alpha)
-
- if loss_type == 'weighted_softmax':
- config = loss_config.weighted_softmax
- return losses.WeightedSoftmaxClassificationLoss(
- anchorwise_output=config.anchorwise_output,
- logit_scale=config.logit_scale)
-
- if loss_type == 'bootstrapped_sigmoid':
- config = loss_config.bootstrapped_sigmoid
- return losses.BootstrappedSigmoidClassificationLoss(
- alpha=config.alpha,
- bootstrap_type=('hard' if config.hard_bootstrap else 'soft'),
- anchorwise_output=config.anchorwise_output)
-
- raise ValueError('Empty loss config.')
diff --git a/object_detection/builders/losses_builder_test.py b/object_detection/builders/losses_builder_test.py
deleted file mode 100644
index d4105203..00000000
--- a/object_detection/builders/losses_builder_test.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for losses_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import losses_builder
-from object_detection.core import losses
-from object_detection.protos import losses_pb2
-
-
-class LocalizationLossBuilderTest(tf.test.TestCase):
-
- def test_build_weighted_l2_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedL2LocalizationLoss))
-
- def test_build_weighted_smooth_l1_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedSmoothL1LocalizationLoss))
-
- def test_build_weighted_iou_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_iou {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedIOULocalizationLoss))
-
- def test_anchorwise_output(self):
- losses_text_proto = """
- localization_loss {
- weighted_smooth_l1 {
- anchorwise_output: true
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, localization_loss, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedSmoothL1LocalizationLoss))
- predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
- targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
- weights = tf.constant([[1.0, 1.0]])
- loss = localization_loss(predictions, targets, weights=weights)
- self.assertEqual(loss.shape, [1, 2])
-
- def test_raise_error_on_empty_localization_config(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder._build_localization_loss(losses_proto)
-
-
-class ClassificationLossBuilderTest(tf.test.TestCase):
-
- def test_build_weighted_sigmoid_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
-
- def test_build_weighted_sigmoid_focal_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid_focal {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.SigmoidFocalClassificationLoss))
- self.assertAlmostEqual(classification_loss._alpha, None)
- self.assertAlmostEqual(classification_loss._gamma, 2.0)
-
- def test_build_weighted_sigmoid_focal_loss_non_default(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid_focal {
- alpha: 0.25
- gamma: 3.0
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.SigmoidFocalClassificationLoss))
- self.assertAlmostEqual(classification_loss._alpha, 0.25)
- self.assertAlmostEqual(classification_loss._gamma, 3.0)
-
- def test_build_weighted_softmax_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_weighted_softmax_classification_loss_with_logit_scale(self):
- losses_text_proto = """
- classification_loss {
- weighted_softmax {
- logit_scale: 2.0
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_bootstrapped_sigmoid_classification_loss(self):
- losses_text_proto = """
- classification_loss {
- bootstrapped_sigmoid {
- alpha: 0.5
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.BootstrappedSigmoidClassificationLoss))
-
- def test_anchorwise_output(self):
- losses_text_proto = """
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
- predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]])
- targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]])
- weights = tf.constant([[1.0, 1.0]])
- loss = classification_loss(predictions, targets, weights=weights)
- self.assertEqual(loss.shape, [1, 2])
-
- def test_raise_error_on_empty_config(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder.build(losses_proto)
-
-
-class HardExampleMinerBuilderTest(tf.test.TestCase):
-
- def test_do_not_build_hard_example_miner_by_default(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
- self.assertEqual(hard_example_miner, None)
-
- def test_build_hard_example_miner_for_classification_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- loss_type: CLASSIFICATION
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._loss_type, 'cls')
-
- def test_build_hard_example_miner_for_localization_loss(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- loss_type: LOCALIZATION
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._loss_type, 'loc')
-
- def test_build_hard_example_miner_with_non_default_values(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- num_hard_examples: 32
- iou_threshold: 0.5
- loss_type: LOCALIZATION
- max_negatives_per_positive: 10
- min_negatives_per_image: 3
- }
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- _, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertEqual(hard_example_miner._num_hard_examples, 32)
- self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5)
- self.assertEqual(hard_example_miner._max_negatives_per_positive, 10)
- self.assertEqual(hard_example_miner._min_negatives_per_image, 3)
-
-
-class LossBuilderTest(tf.test.TestCase):
-
- def test_build_all_loss_parameters(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_softmax {
- }
- }
- hard_example_miner {
- }
- classification_weight: 0.8
- localization_weight: 0.2
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- (classification_loss, localization_loss,
- classification_weight, localization_weight,
- hard_example_miner) = losses_builder.build(losses_proto)
- self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
- self.assertTrue(isinstance(localization_loss,
- losses.WeightedL2LocalizationLoss))
- self.assertAlmostEqual(classification_weight, 0.8)
- self.assertAlmostEqual(localization_weight, 0.2)
-
- def test_raise_error_when_both_focal_loss_and_hard_example_miner(self):
- losses_text_proto = """
- localization_loss {
- weighted_l2 {
- }
- }
- classification_loss {
- weighted_sigmoid_focal {
- }
- }
- hard_example_miner {
- }
- classification_weight: 0.8
- localization_weight: 0.2
- """
- losses_proto = losses_pb2.Loss()
- text_format.Merge(losses_text_proto, losses_proto)
- with self.assertRaises(ValueError):
- losses_builder.build(losses_proto)
-
-
-class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase):
-
- def test_build_sigmoid_loss(self):
- losses_text_proto = """
- weighted_sigmoid {
- }
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSigmoidClassificationLoss))
-
- def test_build_softmax_loss(self):
- losses_text_proto = """
- weighted_softmax {
- }
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
- def test_build_softmax_loss_by_default(self):
- losses_text_proto = """
- """
- losses_proto = losses_pb2.ClassificationLoss()
- text_format.Merge(losses_text_proto, losses_proto)
- classification_loss = losses_builder.build_faster_rcnn_classification_loss(
- losses_proto)
- self.assertTrue(isinstance(classification_loss,
- losses.WeightedSoftmaxClassificationLoss))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/matcher_builder.py b/object_detection/builders/matcher_builder.py
deleted file mode 100644
index 6ec49da9..00000000
--- a/object_detection/builders/matcher_builder.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build an object detection matcher from configuration."""
-
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.protos import matcher_pb2
-
-
-def build(matcher_config):
- """Builds a matcher object based on the matcher config.
-
- Args:
- matcher_config: A matcher.proto object containing the config for the desired
- Matcher.
-
- Returns:
- Matcher based on the config.
-
- Raises:
- ValueError: On empty matcher proto.
- """
- if not isinstance(matcher_config, matcher_pb2.Matcher):
- raise ValueError('matcher_config not of type matcher_pb2.Matcher.')
- if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher':
- matcher = matcher_config.argmax_matcher
- matched_threshold = unmatched_threshold = None
- if not matcher.ignore_thresholds:
- matched_threshold = matcher.matched_threshold
- unmatched_threshold = matcher.unmatched_threshold
- return argmax_matcher.ArgMaxMatcher(
- matched_threshold=matched_threshold,
- unmatched_threshold=unmatched_threshold,
- negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched,
- force_match_for_each_row=matcher.force_match_for_each_row)
- if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher':
- return bipartite_matcher.GreedyBipartiteMatcher()
- raise ValueError('Empty matcher.')
diff --git a/object_detection/builders/matcher_builder_test.py b/object_detection/builders/matcher_builder_test.py
deleted file mode 100644
index c4275aae..00000000
--- a/object_detection/builders/matcher_builder_test.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for matcher_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import matcher_builder
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-from object_detection.protos import matcher_pb2
-
-
-class MatcherBuilderTest(tf.test.TestCase):
-
- def test_build_arg_max_matcher_with_defaults(self):
- matcher_text_proto = """
- argmax_matcher {
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
- self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
- self.assertTrue(matcher_object._negatives_lower_than_unmatched)
- self.assertFalse(matcher_object._force_match_for_each_row)
-
- def test_build_arg_max_matcher_without_thresholds(self):
- matcher_text_proto = """
- argmax_matcher {
- ignore_thresholds: true
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertEqual(matcher_object._matched_threshold, None)
- self.assertEqual(matcher_object._unmatched_threshold, None)
- self.assertTrue(matcher_object._negatives_lower_than_unmatched)
- self.assertFalse(matcher_object._force_match_for_each_row)
-
- def test_build_arg_max_matcher_with_non_default_parameters(self):
- matcher_text_proto = """
- argmax_matcher {
- matched_threshold: 0.7
- unmatched_threshold: 0.3
- negatives_lower_than_unmatched: false
- force_match_for_each_row: true
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
- self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
- self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
- self.assertFalse(matcher_object._negatives_lower_than_unmatched)
- self.assertTrue(matcher_object._force_match_for_each_row)
-
- def test_build_bipartite_matcher(self):
- matcher_text_proto = """
- bipartite_matcher {
- }
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- matcher_object = matcher_builder.build(matcher_proto)
- self.assertTrue(
- isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
-
- def test_raise_error_on_empty_matcher(self):
- matcher_text_proto = """
- """
- matcher_proto = matcher_pb2.Matcher()
- text_format.Merge(matcher_text_proto, matcher_proto)
- with self.assertRaises(ValueError):
- matcher_builder.build(matcher_proto)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/model_builder.py b/object_detection/builders/model_builder.py
deleted file mode 100644
index 5467a91b..00000000
--- a/object_detection/builders/model_builder.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build a DetectionModel from configuration."""
-from object_detection.builders import anchor_generator_builder
-from object_detection.builders import box_coder_builder
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.builders import image_resizer_builder
-from object_detection.builders import losses_builder
-from object_detection.builders import matcher_builder
-from object_detection.builders import post_processing_builder
-from object_detection.builders import region_similarity_calculator_builder as sim_calc
-from object_detection.core import box_predictor
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.meta_architectures import rfcn_meta_arch
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
-from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
-from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
-from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
-from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
-from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
-from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
-from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
-from object_detection.protos import model_pb2
-
-# A map of names to SSD feature extractors.
-SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
- 'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
- 'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
- 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
- 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor,
-}
-
-# A map of names to Faster R-CNN feature extractors.
-FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
- 'faster_rcnn_nas':
- frcnn_nas.FasterRCNNNASFeatureExtractor,
- 'faster_rcnn_inception_resnet_v2':
- frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
- 'faster_rcnn_inception_v2':
- frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor,
- 'faster_rcnn_resnet50':
- frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
- 'faster_rcnn_resnet101':
- frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
- 'faster_rcnn_resnet152':
- frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
-}
-
-
-def build(model_config, is_training):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- is_training: True if this model is being built for training purposes.
-
- Returns:
- DetectionModel based on the config.
-
- Raises:
- ValueError: On invalid meta architecture or model.
- """
- if not isinstance(model_config, model_pb2.DetectionModel):
- raise ValueError('model_config not of type model_pb2.DetectionModel.')
- meta_architecture = model_config.WhichOneof('model')
- if meta_architecture == 'ssd':
- return _build_ssd_model(model_config.ssd, is_training)
- if meta_architecture == 'faster_rcnn':
- return _build_faster_rcnn_model(model_config.faster_rcnn, is_training)
- raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
-
-
-def _build_ssd_feature_extractor(feature_extractor_config, is_training,
- reuse_weights=None):
- """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Args:
- feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
- is_training: True if this feature extractor is being built for training.
- reuse_weights: if the feature extractor should reuse weights.
-
- Returns:
- ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Raises:
- ValueError: On invalid feature extractor type.
- """
- feature_type = feature_extractor_config.type
- depth_multiplier = feature_extractor_config.depth_multiplier
- min_depth = feature_extractor_config.min_depth
- pad_to_multiple = feature_extractor_config.pad_to_multiple
- batch_norm_trainable = feature_extractor_config.batch_norm_trainable
- conv_hyperparams = hyperparams_builder.build(
- feature_extractor_config.conv_hyperparams, is_training)
-
- if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
- raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
-
- feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
- return feature_extractor_class(is_training, depth_multiplier, min_depth,
- pad_to_multiple, conv_hyperparams,
- batch_norm_trainable, reuse_weights)
-
-
-def _build_ssd_model(ssd_config, is_training):
- """Builds an SSD detection model based on the model config.
-
- Args:
- ssd_config: A ssd.proto object containing the config for the desired
- SSDMetaArch.
- is_training: True if this model is being built for training purposes.
-
- Returns:
- SSDMetaArch based on the config.
- Raises:
- ValueError: If ssd_config.type is not recognized (i.e. not registered in
- model_class_map).
- """
- num_classes = ssd_config.num_classes
-
- # Feature extractor
- feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
- is_training)
-
- box_coder = box_coder_builder.build(ssd_config.box_coder)
- matcher = matcher_builder.build(ssd_config.matcher)
- region_similarity_calculator = sim_calc.build(
- ssd_config.similarity_calculator)
- ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
- ssd_config.box_predictor,
- is_training, num_classes)
- anchor_generator = anchor_generator_builder.build(
- ssd_config.anchor_generator)
- image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
- non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
- ssd_config.post_processing)
- (classification_loss, localization_loss, classification_weight,
- localization_weight,
- hard_example_miner) = losses_builder.build(ssd_config.loss)
- normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
-
- return ssd_meta_arch.SSDMetaArch(
- is_training,
- anchor_generator,
- ssd_box_predictor,
- box_coder,
- feature_extractor,
- matcher,
- region_similarity_calculator,
- image_resizer_fn,
- non_max_suppression_fn,
- score_conversion_fn,
- classification_loss,
- localization_loss,
- classification_weight,
- localization_weight,
- normalize_loss_by_num_matches,
- hard_example_miner)
-
-
-def _build_faster_rcnn_feature_extractor(
- feature_extractor_config, is_training, reuse_weights=None):
- """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
-
- Args:
- feature_extractor_config: A FasterRcnnFeatureExtractor proto config from
- faster_rcnn.proto.
- is_training: True if this feature extractor is being built for training.
- reuse_weights: if the feature extractor should reuse weights.
-
- Returns:
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
-
- Raises:
- ValueError: On invalid feature extractor type.
- """
- feature_type = feature_extractor_config.type
- first_stage_features_stride = (
- feature_extractor_config.first_stage_features_stride)
- batch_norm_trainable = feature_extractor_config.batch_norm_trainable
-
- if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
- raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
- feature_type))
- feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
- feature_type]
- return feature_extractor_class(
- is_training, first_stage_features_stride,
- batch_norm_trainable, reuse_weights)
-
-
-def _build_faster_rcnn_model(frcnn_config, is_training):
- """Builds a Faster R-CNN or R-FCN detection model based on the model config.
-
- Builds R-FCN model if the second_stage_box_predictor in the config is of type
- `rfcn_box_predictor` else builds a Faster R-CNN model.
-
- Args:
- frcnn_config: A faster_rcnn.proto object containing the config for the
- desired FasterRCNNMetaArch or RFCNMetaArch.
- is_training: True if this model is being built for training purposes.
-
- Returns:
- FasterRCNNMetaArch based on the config.
- Raises:
- ValueError: If frcnn_config.type is not recognized (i.e. not registered in
- model_class_map).
- """
- num_classes = frcnn_config.num_classes
- image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
-
- feature_extractor = _build_faster_rcnn_feature_extractor(
- frcnn_config.feature_extractor, is_training)
-
- first_stage_only = frcnn_config.first_stage_only
- first_stage_anchor_generator = anchor_generator_builder.build(
- frcnn_config.first_stage_anchor_generator)
-
- first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
- first_stage_box_predictor_arg_scope = hyperparams_builder.build(
- frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
- first_stage_box_predictor_kernel_size = (
- frcnn_config.first_stage_box_predictor_kernel_size)
- first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
- first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
- first_stage_positive_balance_fraction = (
- frcnn_config.first_stage_positive_balance_fraction)
- first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
- first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
- first_stage_max_proposals = frcnn_config.first_stage_max_proposals
- first_stage_loc_loss_weight = (
- frcnn_config.first_stage_localization_loss_weight)
- first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
-
- initial_crop_size = frcnn_config.initial_crop_size
- maxpool_kernel_size = frcnn_config.maxpool_kernel_size
- maxpool_stride = frcnn_config.maxpool_stride
-
- second_stage_box_predictor = box_predictor_builder.build(
- hyperparams_builder.build,
- frcnn_config.second_stage_box_predictor,
- is_training=is_training,
- num_classes=num_classes)
- second_stage_batch_size = frcnn_config.second_stage_batch_size
- second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
- (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
- ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
- second_stage_localization_loss_weight = (
- frcnn_config.second_stage_localization_loss_weight)
- second_stage_classification_loss = (
- losses_builder.build_faster_rcnn_classification_loss(
- frcnn_config.second_stage_classification_loss))
- second_stage_classification_loss_weight = (
- frcnn_config.second_stage_classification_loss_weight)
- second_stage_mask_prediction_loss_weight = (
- frcnn_config.second_stage_mask_prediction_loss_weight)
-
- hard_example_miner = None
- if frcnn_config.HasField('hard_example_miner'):
- hard_example_miner = losses_builder.build_hard_example_miner(
- frcnn_config.hard_example_miner,
- second_stage_classification_loss_weight,
- second_stage_localization_loss_weight)
-
- common_kwargs = {
- 'is_training': is_training,
- 'num_classes': num_classes,
- 'image_resizer_fn': image_resizer_fn,
- 'feature_extractor': feature_extractor,
- 'first_stage_only': first_stage_only,
- 'first_stage_anchor_generator': first_stage_anchor_generator,
- 'first_stage_atrous_rate': first_stage_atrous_rate,
- 'first_stage_box_predictor_arg_scope':
- first_stage_box_predictor_arg_scope,
- 'first_stage_box_predictor_kernel_size':
- first_stage_box_predictor_kernel_size,
- 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
- 'first_stage_minibatch_size': first_stage_minibatch_size,
- 'first_stage_positive_balance_fraction':
- first_stage_positive_balance_fraction,
- 'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
- 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
- 'first_stage_max_proposals': first_stage_max_proposals,
- 'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
- 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
- 'second_stage_batch_size': second_stage_batch_size,
- 'second_stage_balance_fraction': second_stage_balance_fraction,
- 'second_stage_non_max_suppression_fn':
- second_stage_non_max_suppression_fn,
- 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
- 'second_stage_localization_loss_weight':
- second_stage_localization_loss_weight,
- 'second_stage_classification_loss':
- second_stage_classification_loss,
- 'second_stage_classification_loss_weight':
- second_stage_classification_loss_weight,
- 'hard_example_miner': hard_example_miner}
-
- if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
- return rfcn_meta_arch.RFCNMetaArch(
- second_stage_rfcn_box_predictor=second_stage_box_predictor,
- **common_kwargs)
- else:
- return faster_rcnn_meta_arch.FasterRCNNMetaArch(
- initial_crop_size=initial_crop_size,
- maxpool_kernel_size=maxpool_kernel_size,
- maxpool_stride=maxpool_stride,
- second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
- second_stage_mask_prediction_loss_weight=(
- second_stage_mask_prediction_loss_weight),
- **common_kwargs)
diff --git a/object_detection/builders/model_builder_test.py b/object_detection/builders/model_builder_test.py
deleted file mode 100644
index 5e217094..00000000
--- a/object_detection/builders/model_builder_test.py
+++ /dev/null
@@ -1,741 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.models.model_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import model_builder
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.meta_architectures import rfcn_meta_arch
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
-from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
-from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
-from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
-from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
-from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
-from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
-from object_detection.protos import model_pb2
-
-FEATURE_EXTRACTOR_MAPS = {
- 'faster_rcnn_resnet50':
- frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
- 'faster_rcnn_resnet101':
- frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
- 'faster_rcnn_resnet152':
- frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
-}
-
-
-class ModelBuilderTest(tf.test.TestCase):
-
- def create_model(self, model_config):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
-
- Returns:
- DetectionModel based on the config.
- """
- return model_builder.build(model_config, is_training=True)
-
- def test_create_ssd_inception_v2_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_inception_v2'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDInceptionV2FeatureExtractor)
-
- def test_create_ssd_inception_v3_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_inception_v3'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDInceptionV3FeatureExtractor)
-
- def test_create_ssd_mobilenet_v1_model_from_config(self):
- model_text_proto = """
- ssd {
- feature_extractor {
- type: 'ssd_mobilenet_v1'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- batch_norm_trainable: true
- }
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = self.create_model(model_proto)
- self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
- self.assertIsInstance(model._feature_extractor,
- SSDMobileNetV1FeatureExtractor)
- self.assertTrue(model._feature_extractor._batch_norm_trainable)
-
- def test_create_faster_rcnn_resnet_v1_models_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
- model_proto.faster_rcnn.feature_extractor.type = extractor_type
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(model._feature_extractor, extractor_class)
-
- def test_create_faster_rcnn_resnet101_with_mask_prediction_enabled(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- predict_instance_masks: true
- }
- }
- second_stage_mask_prediction_loss_weight: 3.0
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0)
-
- def test_create_faster_rcnn_nas_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_nas'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(
- model._feature_extractor,
- frcnn_nas.FasterRCNNNASFeatureExtractor)
-
- def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(
- model._feature_extractor,
- frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor)
-
- def test_create_faster_rcnn_inception_v2_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_v2'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
- self.assertIsInstance(model._feature_extractor,
- frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor)
-
- def test_create_faster_rcnn_model_from_config_with_example_miner(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- }
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- hard_example_miner {
- num_hard_examples: 10
- iou_threshold: 0.99
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsNotNone(model._hard_example_miner)
-
- def test_create_rfcn_resnet_v1_model_from_config(self):
- model_text_proto = """
- faster_rcnn {
- num_classes: 3
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- rfcn_box_predictor {
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.01
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- }"""
- model_proto = model_pb2.DetectionModel()
- text_format.Merge(model_text_proto, model_proto)
- for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
- model_proto.faster_rcnn.feature_extractor.type = extractor_type
- model = model_builder.build(model_proto, is_training=True)
- self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
- self.assertIsInstance(model._feature_extractor, extractor_class)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/optimizer_builder.py b/object_detection/builders/optimizer_builder.py
deleted file mode 100644
index cccaba99..00000000
--- a/object_detection/builders/optimizer_builder.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to build DetectionModel training optimizers."""
-
-import tensorflow as tf
-from object_detection.utils import learning_schedules
-
-
-def build(optimizer_config, global_summaries):
- """Create optimizer based on config.
-
- Args:
- optimizer_config: A Optimizer proto message.
- global_summaries: A set to attach learning rate summary to.
-
- Returns:
- An optimizer.
-
- Raises:
- ValueError: when using an unsupported input data type.
- """
- optimizer_type = optimizer_config.WhichOneof('optimizer')
- optimizer = None
-
- if optimizer_type == 'rms_prop_optimizer':
- config = optimizer_config.rms_prop_optimizer
- optimizer = tf.train.RMSPropOptimizer(
- _create_learning_rate(config.learning_rate, global_summaries),
- decay=config.decay,
- momentum=config.momentum_optimizer_value,
- epsilon=config.epsilon)
-
- if optimizer_type == 'momentum_optimizer':
- config = optimizer_config.momentum_optimizer
- optimizer = tf.train.MomentumOptimizer(
- _create_learning_rate(config.learning_rate, global_summaries),
- momentum=config.momentum_optimizer_value)
-
- if optimizer_type == 'adam_optimizer':
- config = optimizer_config.adam_optimizer
- optimizer = tf.train.AdamOptimizer(
- _create_learning_rate(config.learning_rate, global_summaries))
-
- if optimizer is None:
- raise ValueError('Optimizer %s not supported.' % optimizer_type)
-
- if optimizer_config.use_moving_average:
- optimizer = tf.contrib.opt.MovingAverageOptimizer(
- optimizer, average_decay=optimizer_config.moving_average_decay)
-
- return optimizer
-
-
-def _create_learning_rate(learning_rate_config, global_summaries):
- """Create optimizer learning rate based on config.
-
- Args:
- learning_rate_config: A LearningRate proto message.
- global_summaries: A set to attach learning rate summary to.
-
- Returns:
- A learning rate.
-
- Raises:
- ValueError: when using an unsupported input data type.
- """
- learning_rate = None
- learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
- if learning_rate_type == 'constant_learning_rate':
- config = learning_rate_config.constant_learning_rate
- learning_rate = config.learning_rate
-
- if learning_rate_type == 'exponential_decay_learning_rate':
- config = learning_rate_config.exponential_decay_learning_rate
- learning_rate = tf.train.exponential_decay(
- config.initial_learning_rate,
- tf.train.get_or_create_global_step(),
- config.decay_steps,
- config.decay_factor,
- staircase=config.staircase)
-
- if learning_rate_type == 'manual_step_learning_rate':
- config = learning_rate_config.manual_step_learning_rate
- if not config.schedule:
- raise ValueError('Empty learning rate schedule.')
- learning_rate_step_boundaries = [x.step for x in config.schedule]
- learning_rate_sequence = [config.initial_learning_rate]
- learning_rate_sequence += [x.learning_rate for x in config.schedule]
- learning_rate = learning_schedules.manual_stepping(
- tf.train.get_or_create_global_step(), learning_rate_step_boundaries,
- learning_rate_sequence)
-
- if learning_rate_type == 'cosine_decay_learning_rate':
- config = learning_rate_config.cosine_decay_learning_rate
- learning_rate = learning_schedules.cosine_decay_with_warmup(
- tf.train.get_or_create_global_step(),
- config.learning_rate_base,
- config.total_steps,
- config.warmup_learning_rate,
- config.warmup_steps)
-
- if learning_rate is None:
- raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
-
- global_summaries.add(tf.summary.scalar('Learning_Rate', learning_rate))
- return learning_rate
diff --git a/object_detection/builders/optimizer_builder_test.py b/object_detection/builders/optimizer_builder_test.py
deleted file mode 100644
index e5bcbba1..00000000
--- a/object_detection/builders/optimizer_builder_test.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for optimizer_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import optimizer_builder
-from object_detection.protos import optimizer_pb2
-
-
-class LearningRateBuilderTest(tf.test.TestCase):
-
- def testBuildConstantLearningRate(self):
- learning_rate_text_proto = """
- constant_learning_rate {
- learning_rate: 0.004
- }
- """
- global_summaries = set([])
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto, global_summaries)
- self.assertAlmostEqual(learning_rate, 0.004)
-
- def testBuildExponentialDecayLearningRate(self):
- learning_rate_text_proto = """
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 99999
- decay_factor: 0.85
- staircase: false
- }
- """
- global_summaries = set([])
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto, global_summaries)
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testBuildManualStepLearningRate(self):
- learning_rate_text_proto = """
- manual_step_learning_rate {
- schedule {
- step: 0
- learning_rate: 0.006
- }
- schedule {
- step: 90000
- learning_rate: 0.00006
- }
- }
- """
- global_summaries = set([])
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto, global_summaries)
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testBuildCosineDecayLearningRate(self):
- learning_rate_text_proto = """
- cosine_decay_learning_rate {
- learning_rate_base: 0.002
- total_steps: 20000
- warmup_learning_rate: 0.0001
- warmup_steps: 1000
- }
- """
- global_summaries = set([])
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- learning_rate = optimizer_builder._create_learning_rate(
- learning_rate_proto, global_summaries)
- self.assertTrue(isinstance(learning_rate, tf.Tensor))
-
- def testRaiseErrorOnEmptyLearningRate(self):
- learning_rate_text_proto = """
- """
- global_summaries = set([])
- learning_rate_proto = optimizer_pb2.LearningRate()
- text_format.Merge(learning_rate_text_proto, learning_rate_proto)
- with self.assertRaises(ValueError):
- optimizer_builder._create_learning_rate(
- learning_rate_proto, global_summaries)
-
-
-class OptimizerBuilderTest(tf.test.TestCase):
-
- def testBuildRMSPropOptimizer(self):
- optimizer_text_proto = """
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- use_moving_average: false
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
- self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer))
-
- def testBuildMomentumOptimizer(self):
- optimizer_text_proto = """
- momentum_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.001
- }
- }
- momentum_optimizer_value: 0.99
- }
- use_moving_average: false
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
- self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer))
-
- def testBuildAdamOptimizer(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: false
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
- self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer))
-
- def testBuildMovingAverageOptimizer(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: True
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
- self.assertTrue(
- isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
-
- def testBuildMovingAverageOptimizerWithNonDefaultDecay(self):
- optimizer_text_proto = """
- adam_optimizer: {
- learning_rate: {
- constant_learning_rate {
- learning_rate: 0.002
- }
- }
- }
- use_moving_average: True
- moving_average_decay: 0.2
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
- self.assertTrue(
- isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
- # TODO(rathodv): Find a way to not depend on the private members.
- self.assertAlmostEqual(optimizer._ema._decay, 0.2)
-
- def testBuildEmptyOptimizer(self):
- optimizer_text_proto = """
- """
- global_summaries = set([])
- optimizer_proto = optimizer_pb2.Optimizer()
- text_format.Merge(optimizer_text_proto, optimizer_proto)
- with self.assertRaises(ValueError):
- optimizer_builder.build(optimizer_proto, global_summaries)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/post_processing_builder.py b/object_detection/builders/post_processing_builder.py
deleted file mode 100644
index fa3a7728..00000000
--- a/object_detection/builders/post_processing_builder.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder function for post processing operations."""
-import functools
-
-import tensorflow as tf
-from object_detection.core import post_processing
-from object_detection.protos import post_processing_pb2
-
-
-def build(post_processing_config):
- """Builds callables for post-processing operations.
-
- Builds callables for non-max suppression and score conversion based on the
- configuration.
-
- Non-max suppression callable takes `boxes`, `scores`, and optionally
- `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns
- `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See
- post_processing.batch_multiclass_non_max_suppression for the type and shape
- of these tensors.
-
- Score converter callable should be called with `input` tensor. The callable
- returns the output from one of 3 tf operations based on the configuration -
- tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for
- argument and return value descriptions.
-
- Args:
- post_processing_config: post_processing.proto object containing the
- parameters for the post-processing operations.
-
- Returns:
- non_max_suppressor_fn: Callable for non-max suppression.
- score_converter_fn: Callable for score conversion.
-
- Raises:
- ValueError: if the post_processing_config is of incorrect type.
- """
- if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
- raise ValueError('post_processing_config not of type '
- 'post_processing_pb2.Postprocessing.')
- non_max_suppressor_fn = _build_non_max_suppressor(
- post_processing_config.batch_non_max_suppression)
- score_converter_fn = _build_score_converter(
- post_processing_config.score_converter,
- post_processing_config.logit_scale)
- return non_max_suppressor_fn, score_converter_fn
-
-
-def _build_non_max_suppressor(nms_config):
- """Builds non-max suppresson based on the nms config.
-
- Args:
- nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto.
-
- Returns:
- non_max_suppressor_fn: Callable non-max suppressor.
-
- Raises:
- ValueError: On incorrect iou_threshold or on incompatible values of
- max_total_detections and max_detections_per_class.
- """
- if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
- raise ValueError('iou_threshold not in [0, 1.0].')
- if nms_config.max_detections_per_class > nms_config.max_total_detections:
- raise ValueError('max_detections_per_class should be no greater than '
- 'max_total_detections.')
-
- non_max_suppressor_fn = functools.partial(
- post_processing.batch_multiclass_non_max_suppression,
- score_thresh=nms_config.score_threshold,
- iou_thresh=nms_config.iou_threshold,
- max_size_per_class=nms_config.max_detections_per_class,
- max_total_size=nms_config.max_total_detections)
- return non_max_suppressor_fn
-
-
-def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale):
- """Create a function to scale logits then apply a Tensorflow function."""
- def score_converter_fn(logits):
- scaled_logits = tf.divide(logits, logit_scale, name='scale_logits')
- return tf_score_converter_fn(scaled_logits, name='convert_scores')
- score_converter_fn.__name__ = '%s_with_logit_scale' % (
- tf_score_converter_fn.__name__)
- return score_converter_fn
-
-
-def _build_score_converter(score_converter_config, logit_scale):
- """Builds score converter based on the config.
-
- Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
- the config.
-
- Args:
- score_converter_config: post_processing_pb2.PostProcessing.score_converter.
- logit_scale: temperature to use for SOFTMAX score_converter.
-
- Returns:
- Callable score converter op.
-
- Raises:
- ValueError: On unknown score converter.
- """
- if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
- return _score_converter_fn_with_logit_scale(tf.identity, logit_scale)
- if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
- return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale)
- if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
- return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale)
- raise ValueError('Unknown score converter.')
diff --git a/object_detection/builders/post_processing_builder_test.py b/object_detection/builders/post_processing_builder_test.py
deleted file mode 100644
index c39fbfb4..00000000
--- a/object_detection/builders/post_processing_builder_test.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for post_processing_builder."""
-
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.builders import post_processing_builder
-from object_detection.protos import post_processing_pb2
-
-
-class PostProcessingBuilderTest(tf.test.TestCase):
-
- def test_build_non_max_suppressor_with_correct_parameters(self):
- post_processing_text_proto = """
- batch_non_max_suppression {
- score_threshold: 0.7
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- non_max_suppressor, _ = post_processing_builder.build(
- post_processing_config)
- self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
- self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
- self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
- self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
-
- def test_build_identity_score_converter(self):
- post_processing_text_proto = """
- score_converter: IDENTITY
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
-
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(inputs)
- self.assertAllClose(converted_scores, expected_converted_scores)
-
- def test_build_identity_score_converter_with_logit_scale(self):
- post_processing_text_proto = """
- score_converter: IDENTITY
- logit_scale: 2.0
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
-
- inputs = tf.constant([1, 1], tf.float32)
- outputs = score_converter(inputs)
- with self.test_session() as sess:
- converted_scores = sess.run(outputs)
- expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
- self.assertAllClose(converted_scores, expected_converted_scores)
-
- def test_build_sigmoid_score_converter(self):
- post_processing_text_proto = """
- score_converter: SIGMOID
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale')
-
- def test_build_softmax_score_converter(self):
- post_processing_text_proto = """
- score_converter: SOFTMAX
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
-
- def test_build_softmax_score_converter_with_temperature(self):
- post_processing_text_proto = """
- score_converter: SOFTMAX
- logit_scale: 2.0
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- _, score_converter = post_processing_builder.build(post_processing_config)
- self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/preprocessor_builder.py b/object_detection/builders/preprocessor_builder.py
deleted file mode 100644
index 9263925e..00000000
--- a/object_detection/builders/preprocessor_builder.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder for preprocessing steps."""
-
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.protos import preprocessor_pb2
-
-
-def _get_step_config_from_proto(preprocessor_step_config, step_name):
- """Returns the value of a field named step_name from proto.
-
- Args:
- preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object.
- step_name: Name of the field to get value from.
-
- Returns:
- result_dict: a sub proto message from preprocessor_step_config which will be
- later converted to a dictionary.
-
- Raises:
- ValueError: If field does not exist in proto.
- """
- for field, value in preprocessor_step_config.ListFields():
- if field.name == step_name:
- return value
-
- raise ValueError('Could not get field %s from proto!', step_name)
-
-
-def _get_dict_from_proto(config):
- """Helper function to put all proto fields into a dictionary.
-
- For many preprocessing steps, there's an trivial 1-1 mapping from proto fields
- to function arguments. This function automatically populates a dictionary with
- the arguments from the proto.
-
- Protos that CANNOT be trivially populated include:
- * nested messages.
- * steps that check if an optional field is set (ie. where None != 0).
- * protos that don't map 1-1 to arguments (ie. list should be reshaped).
- * fields requiring additional validation (ie. repeated field has n elements).
-
- Args:
- config: A protobuf object that does not violate the conditions above.
-
- Returns:
- result_dict: |config| converted into a python dictionary.
- """
- result_dict = {}
- for field, value in config.ListFields():
- result_dict[field.name] = value
- return result_dict
-
-
-# A map from a PreprocessingStep proto config field name to the preprocessing
-# function that should be used. The PreprocessingStep proto should be parsable
-# with _get_dict_from_proto.
-PREPROCESSING_FUNCTION_MAP = {
- 'normalize_image': preprocessor.normalize_image,
- 'random_pixel_value_scale': preprocessor.random_pixel_value_scale,
- 'random_image_scale': preprocessor.random_image_scale,
- 'random_rgb_to_gray': preprocessor.random_rgb_to_gray,
- 'random_adjust_brightness': preprocessor.random_adjust_brightness,
- 'random_adjust_contrast': preprocessor.random_adjust_contrast,
- 'random_adjust_hue': preprocessor.random_adjust_hue,
- 'random_adjust_saturation': preprocessor.random_adjust_saturation,
- 'random_distort_color': preprocessor.random_distort_color,
- 'random_jitter_boxes': preprocessor.random_jitter_boxes,
- 'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio,
- 'random_black_patches': preprocessor.random_black_patches,
- 'scale_boxes_to_pixel_coordinates': (
- preprocessor.scale_boxes_to_pixel_coordinates),
- 'subtract_channel_mean': preprocessor.subtract_channel_mean,
-}
-
-
-# A map to convert from preprocessor_pb2.ResizeImage.Method enum to
-# tf.image.ResizeMethod.
-RESIZE_METHOD_MAP = {
- preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA,
- preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC,
- preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR,
- preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: (
- tf.image.ResizeMethod.NEAREST_NEIGHBOR),
-}
-
-
-def build(preprocessor_step_config):
- """Builds preprocessing step based on the configuration.
-
- Args:
- preprocessor_step_config: PreprocessingStep configuration proto.
-
- Returns:
- function, argmap: A callable function and an argument map to call function
- with.
-
- Raises:
- ValueError: On invalid configuration.
- """
- step_type = preprocessor_step_config.WhichOneof('preprocessing_step')
-
- if step_type in PREPROCESSING_FUNCTION_MAP:
- preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type]
- step_config = _get_step_config_from_proto(preprocessor_step_config,
- step_type)
- function_args = _get_dict_from_proto(step_config)
- return (preprocessing_function, function_args)
-
- if step_type == 'random_horizontal_flip':
- config = preprocessor_step_config.random_horizontal_flip
- return (preprocessor.random_horizontal_flip,
- {
- 'keypoint_flip_permutation': tuple(
- config.keypoint_flip_permutation),
- })
-
- if step_type == 'random_vertical_flip':
- config = preprocessor_step_config.random_vertical_flip
- return (preprocessor.random_vertical_flip,
- {
- 'keypoint_flip_permutation': tuple(
- config.keypoint_flip_permutation),
- })
-
- if step_type == 'random_rotation90':
- return (preprocessor.random_rotation90, {})
-
- if step_type == 'random_crop_image':
- config = preprocessor_step_config.random_crop_image
- return (preprocessor.random_crop_image,
- {
- 'min_object_covered': config.min_object_covered,
- 'aspect_ratio_range': (config.min_aspect_ratio,
- config.max_aspect_ratio),
- 'area_range': (config.min_area, config.max_area),
- 'overlap_thresh': config.overlap_thresh,
- 'random_coef': config.random_coef,
- })
-
- if step_type == 'random_pad_image':
- config = preprocessor_step_config.random_pad_image
- min_image_size = None
- if (config.HasField('min_image_height') !=
- config.HasField('min_image_width')):
- raise ValueError('min_image_height and min_image_width should be either '
- 'both set or both unset.')
- if config.HasField('min_image_height'):
- min_image_size = (config.min_image_height, config.min_image_width)
-
- max_image_size = None
- if (config.HasField('max_image_height') !=
- config.HasField('max_image_width')):
- raise ValueError('max_image_height and max_image_width should be either '
- 'both set or both unset.')
- if config.HasField('max_image_height'):
- max_image_size = (config.max_image_height, config.max_image_width)
-
- pad_color = config.pad_color
- if pad_color and len(pad_color) != 3:
- raise ValueError('pad_color should have 3 elements (RGB) if set!')
- if not pad_color:
- pad_color = None
- return (preprocessor.random_pad_image,
- {
- 'min_image_size': min_image_size,
- 'max_image_size': max_image_size,
- 'pad_color': pad_color,
- })
-
- if step_type == 'random_crop_pad_image':
- config = preprocessor_step_config.random_crop_pad_image
- min_padded_size_ratio = config.min_padded_size_ratio
- if min_padded_size_ratio and len(min_padded_size_ratio) != 2:
- raise ValueError('min_padded_size_ratio should have 2 elements if set!')
- max_padded_size_ratio = config.max_padded_size_ratio
- if max_padded_size_ratio and len(max_padded_size_ratio) != 2:
- raise ValueError('max_padded_size_ratio should have 2 elements if set!')
- pad_color = config.pad_color
- if pad_color and len(pad_color) != 3:
- raise ValueError('pad_color should have 3 elements if set!')
- kwargs = {
- 'min_object_covered': config.min_object_covered,
- 'aspect_ratio_range': (config.min_aspect_ratio,
- config.max_aspect_ratio),
- 'area_range': (config.min_area, config.max_area),
- 'overlap_thresh': config.overlap_thresh,
- 'random_coef': config.random_coef,
- }
- if min_padded_size_ratio:
- kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
- if max_padded_size_ratio:
- kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
- if pad_color:
- kwargs['pad_color'] = tuple(pad_color)
- return (preprocessor.random_crop_pad_image, kwargs)
-
- if step_type == 'random_resize_method':
- config = preprocessor_step_config.random_resize_method
- return (preprocessor.random_resize_method,
- {
- 'target_size': [config.target_height, config.target_width],
- })
-
- if step_type == 'resize_image':
- config = preprocessor_step_config.resize_image
- method = RESIZE_METHOD_MAP[config.method]
- return (preprocessor.resize_image,
- {
- 'new_height': config.new_height,
- 'new_width': config.new_width,
- 'method': method
- })
-
- if step_type == 'ssd_random_crop':
- config = preprocessor_step_config.ssd_random_crop
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- return (preprocessor.ssd_random_crop,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio_range': aspect_ratio_range,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- })
- return (preprocessor.ssd_random_crop, {})
-
- if step_type == 'ssd_random_crop_pad':
- config = preprocessor_step_config.ssd_random_crop_pad
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- min_padded_size_ratio = [
- (op.min_padded_size_ratio[0], op.min_padded_size_ratio[1])
- for op in config.operations]
- max_padded_size_ratio = [
- (op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
- for op in config.operations]
- pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
- for op in config.operations]
- return (preprocessor.ssd_random_crop_pad,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio_range': aspect_ratio_range,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- 'min_padded_size_ratio': min_padded_size_ratio,
- 'max_padded_size_ratio': max_padded_size_ratio,
- 'pad_color': pad_color,
- })
- return (preprocessor.ssd_random_crop_pad, {})
-
- if step_type == 'ssd_random_crop_fixed_aspect_ratio':
- config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- return (preprocessor.ssd_random_crop_fixed_aspect_ratio,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio': config.aspect_ratio,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- })
- return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})
-
- if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio':
- config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio
- if config.operations:
- min_object_covered = [op.min_object_covered for op in config.operations]
- aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
- for op in config.operations]
- area_range = [(op.min_area, op.max_area) for op in config.operations]
- overlap_thresh = [op.overlap_thresh for op in config.operations]
- random_coef = [op.random_coef for op in config.operations]
- min_padded_size_ratio = [
- (op.min_padded_size_ratio[0], op.min_padded_size_ratio[1])
- for op in config.operations]
- max_padded_size_ratio = [
- (op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
- for op in config.operations]
- return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio,
- {
- 'min_object_covered': min_object_covered,
- 'aspect_ratio': config.aspect_ratio,
- 'aspect_ratio_range': aspect_ratio_range,
- 'area_range': area_range,
- 'overlap_thresh': overlap_thresh,
- 'random_coef': random_coef,
- 'min_padded_size_ratio': min_padded_size_ratio,
- 'max_padded_size_ratio': max_padded_size_ratio,
- })
- return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, {})
-
- raise ValueError('Unknown preprocessing step.')
diff --git a/object_detection/builders/preprocessor_builder_test.py b/object_detection/builders/preprocessor_builder_test.py
deleted file mode 100644
index cc2789aa..00000000
--- a/object_detection/builders/preprocessor_builder_test.py
+++ /dev/null
@@ -1,558 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for preprocessor_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.builders import preprocessor_builder
-from object_detection.core import preprocessor
-from object_detection.protos import preprocessor_pb2
-
-
-class PreprocessorBuilderTest(tf.test.TestCase):
-
- def assert_dictionary_close(self, dict1, dict2):
- """Helper to check if two dicts with floatst or integers are close."""
- self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys()))
- for key in dict1:
- value = dict1[key]
- if isinstance(value, float):
- self.assertAlmostEqual(value, dict2[key])
- else:
- self.assertEqual(value, dict2[key])
-
- def test_build_normalize_image(self):
- preprocessor_text_proto = """
- normalize_image {
- original_minval: 0.0
- original_maxval: 255.0
- target_minval: -1.0
- target_maxval: 1.0
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.normalize_image)
- self.assertEqual(args, {
- 'original_minval': 0.0,
- 'original_maxval': 255.0,
- 'target_minval': -1.0,
- 'target_maxval': 1.0,
- })
-
- def test_build_random_horizontal_flip(self):
- preprocessor_text_proto = """
- random_horizontal_flip {
- keypoint_flip_permutation: 1
- keypoint_flip_permutation: 0
- keypoint_flip_permutation: 2
- keypoint_flip_permutation: 3
- keypoint_flip_permutation: 5
- keypoint_flip_permutation: 4
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_horizontal_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
-
- def test_build_random_vertical_flip(self):
- preprocessor_text_proto = """
- random_vertical_flip {
- keypoint_flip_permutation: 1
- keypoint_flip_permutation: 0
- keypoint_flip_permutation: 2
- keypoint_flip_permutation: 3
- keypoint_flip_permutation: 5
- keypoint_flip_permutation: 4
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_vertical_flip)
- self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
-
- def test_build_random_rotation90(self):
- preprocessor_text_proto = """
- random_rotation90 {}
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_rotation90)
- self.assertEqual(args, {})
-
- def test_build_random_pixel_value_scale(self):
- preprocessor_text_proto = """
- random_pixel_value_scale {
- minval: 0.8
- maxval: 1.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_pixel_value_scale)
- self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2})
-
- def test_build_random_image_scale(self):
- preprocessor_text_proto = """
- random_image_scale {
- min_scale_ratio: 0.8
- max_scale_ratio: 2.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_image_scale)
- self.assert_dictionary_close(args, {'min_scale_ratio': 0.8,
- 'max_scale_ratio': 2.2})
-
- def test_build_random_rgb_to_gray(self):
- preprocessor_text_proto = """
- random_rgb_to_gray {
- probability: 0.8
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_rgb_to_gray)
- self.assert_dictionary_close(args, {'probability': 0.8})
-
- def test_build_random_adjust_brightness(self):
- preprocessor_text_proto = """
- random_adjust_brightness {
- max_delta: 0.2
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_brightness)
- self.assert_dictionary_close(args, {'max_delta': 0.2})
-
- def test_build_random_adjust_contrast(self):
- preprocessor_text_proto = """
- random_adjust_contrast {
- min_delta: 0.7
- max_delta: 1.1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_contrast)
- self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1})
-
- def test_build_random_adjust_hue(self):
- preprocessor_text_proto = """
- random_adjust_hue {
- max_delta: 0.01
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_hue)
- self.assert_dictionary_close(args, {'max_delta': 0.01})
-
- def test_build_random_adjust_saturation(self):
- preprocessor_text_proto = """
- random_adjust_saturation {
- min_delta: 0.75
- max_delta: 1.15
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_adjust_saturation)
- self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15})
-
- def test_build_random_distort_color(self):
- preprocessor_text_proto = """
- random_distort_color {
- color_ordering: 1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_distort_color)
- self.assertEqual(args, {'color_ordering': 1})
-
- def test_build_random_jitter_boxes(self):
- preprocessor_text_proto = """
- random_jitter_boxes {
- ratio: 0.1
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_jitter_boxes)
- self.assert_dictionary_close(args, {'ratio': 0.1})
-
- def test_build_random_crop_image(self):
- preprocessor_text_proto = """
- random_crop_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- })
-
- def test_build_random_pad_image(self):
- preprocessor_text_proto = """
- random_pad_image {
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_pad_image)
- self.assertEqual(args, {
- 'min_image_size': None,
- 'max_image_size': None,
- 'pad_color': None,
- })
-
- def test_build_random_crop_pad_image(self):
- preprocessor_text_proto = """
- random_crop_pad_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_pad_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- })
-
- def test_build_random_crop_pad_image_with_optional_parameters(self):
- preprocessor_text_proto = """
- random_crop_pad_image {
- min_object_covered: 0.75
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.25
- max_area: 0.875
- overlap_thresh: 0.5
- random_coef: 0.125
- min_padded_size_ratio: 0.5
- min_padded_size_ratio: 0.75
- max_padded_size_ratio: 0.5
- max_padded_size_ratio: 0.75
- pad_color: 0.5
- pad_color: 0.5
- pad_color: 1.0
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_pad_image)
- self.assertEqual(args, {
- 'min_object_covered': 0.75,
- 'aspect_ratio_range': (0.75, 1.5),
- 'area_range': (0.25, 0.875),
- 'overlap_thresh': 0.5,
- 'random_coef': 0.125,
- 'min_padded_size_ratio': (0.5, 0.75),
- 'max_padded_size_ratio': (0.5, 0.75),
- 'pad_color': (0.5, 0.5, 1.0)
- })
-
- def test_build_random_crop_to_aspect_ratio(self):
- preprocessor_text_proto = """
- random_crop_to_aspect_ratio {
- aspect_ratio: 0.85
- overlap_thresh: 0.35
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio)
- self.assert_dictionary_close(args, {'aspect_ratio': 0.85,
- 'overlap_thresh': 0.35})
-
- def test_build_random_black_patches(self):
- preprocessor_text_proto = """
- random_black_patches {
- max_black_patches: 20
- probability: 0.95
- size_to_image_ratio: 0.12
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_black_patches)
- self.assert_dictionary_close(args, {'max_black_patches': 20,
- 'probability': 0.95,
- 'size_to_image_ratio': 0.12})
-
- def test_build_random_resize_method(self):
- preprocessor_text_proto = """
- random_resize_method {
- target_height: 75
- target_width: 100
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.random_resize_method)
- self.assert_dictionary_close(args, {'target_size': [75, 100]})
-
- def test_build_scale_boxes_to_pixel_coordinates(self):
- preprocessor_text_proto = """
- scale_boxes_to_pixel_coordinates {}
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates)
- self.assertEqual(args, {})
-
- def test_build_resize_image(self):
- preprocessor_text_proto = """
- resize_image {
- new_height: 75
- new_width: 100
- method: BICUBIC
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.resize_image)
- self.assertEqual(args, {'new_height': 75,
- 'new_width': 100,
- 'method': tf.image.ResizeMethod.BICUBIC})
-
- def test_build_subtract_channel_mean(self):
- preprocessor_text_proto = """
- subtract_channel_mean {
- means: [1.0, 2.0, 3.0]
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.subtract_channel_mean)
- self.assertEqual(args, {'means': [1.0, 2.0, 3.0]})
-
- def test_build_ssd_random_crop(self):
- preprocessor_text_proto = """
- ssd_random_crop {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- }
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375]})
-
- def test_build_ssd_random_crop_empty_operations(self):
- preprocessor_text_proto = """
- ssd_random_crop {
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop)
- self.assertEqual(args, {})
-
- def test_build_ssd_random_crop_pad(self):
- preprocessor_text_proto = """
- ssd_random_crop_pad {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- pad_color_r: 0.5
- pad_color_g: 0.5
- pad_color_b: 0.5
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- pad_color_r: 0.5
- pad_color_g: 0.5
- pad_color_b: 0.5
- }
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop_pad)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375],
- 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)],
- 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)],
- 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]})
-
- def test_build_ssd_random_crop_fixed_aspect_ratio(self):
- preprocessor_text_proto = """
- ssd_random_crop_fixed_aspect_ratio {
- operations {
- min_object_covered: 0.0
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- }
- operations {
- min_object_covered: 0.25
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- }
- aspect_ratio: 0.875
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio': 0.875,
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375]})
-
- def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self):
- preprocessor_text_proto = """
- ssd_random_crop_pad_fixed_aspect_ratio {
- operations {
- min_object_covered: 0.0
- min_aspect_ratio: 0.875
- max_aspect_ratio: 1.125
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.0
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- }
- operations {
- min_object_covered: 0.25
- min_aspect_ratio: 0.75
- max_aspect_ratio: 1.5
- min_area: 0.5
- max_area: 1.0
- overlap_thresh: 0.25
- random_coef: 0.375
- min_padded_size_ratio: [1.0, 1.0]
- max_padded_size_ratio: [2.0, 2.0]
- }
- aspect_ratio: 0.875
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- function, args = preprocessor_builder.build(preprocessor_proto)
- self.assertEqual(function,
- preprocessor.ssd_random_crop_pad_fixed_aspect_ratio)
- self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
- 'aspect_ratio': 0.875,
- 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
- 'area_range': [(0.5, 1.0), (0.5, 1.0)],
- 'overlap_thresh': [0.0, 0.25],
- 'random_coef': [0.375, 0.375],
- 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)],
- 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)]})
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/builders/region_similarity_calculator_builder.py b/object_detection/builders/region_similarity_calculator_builder.py
deleted file mode 100644
index fa1d6717..00000000
--- a/object_detection/builders/region_similarity_calculator_builder.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Builder for region similarity calculators."""
-
-from object_detection.core import region_similarity_calculator
-from object_detection.protos import region_similarity_calculator_pb2
-
-
-def build(region_similarity_calculator_config):
- """Builds region similarity calculator based on the configuration.
-
- Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See
- core/region_similarity_calculator.proto for details.
-
- Args:
- region_similarity_calculator_config: RegionSimilarityCalculator
- configuration proto.
-
- Returns:
- region_similarity_calculator: RegionSimilarityCalculator object.
-
- Raises:
- ValueError: On unknown region similarity calculator.
- """
-
- if not isinstance(
- region_similarity_calculator_config,
- region_similarity_calculator_pb2.RegionSimilarityCalculator):
- raise ValueError(
- 'region_similarity_calculator_config not of type '
- 'region_similarity_calculator_pb2.RegionsSimilarityCalculator')
-
- similarity_calculator = region_similarity_calculator_config.WhichOneof(
- 'region_similarity')
- if similarity_calculator == 'iou_similarity':
- return region_similarity_calculator.IouSimilarity()
- if similarity_calculator == 'ioa_similarity':
- return region_similarity_calculator.IoaSimilarity()
- if similarity_calculator == 'neg_sq_dist_similarity':
- return region_similarity_calculator.NegSqDistSimilarity()
-
- raise ValueError('Unknown region similarity calculator.')
-
diff --git a/object_detection/builders/region_similarity_calculator_builder_test.py b/object_detection/builders/region_similarity_calculator_builder_test.py
deleted file mode 100644
index ca3a5512..00000000
--- a/object_detection/builders/region_similarity_calculator_builder_test.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for region_similarity_calculator_builder."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import region_similarity_calculator_builder
-from object_detection.core import region_similarity_calculator
-from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2
-
-
-class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase):
-
- def testBuildIoaSimilarityCalculator(self):
- similarity_calc_text_proto = """
- ioa_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.IoaSimilarity))
-
- def testBuildIouSimilarityCalculator(self):
- similarity_calc_text_proto = """
- iou_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.IouSimilarity))
-
- def testBuildNegSqDistSimilarityCalculator(self):
- similarity_calc_text_proto = """
- neg_sq_dist_similarity {
- }
- """
- similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
- text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
- similarity_calc = region_similarity_calculator_builder.build(
- similarity_calc_proto)
- self.assertTrue(isinstance(similarity_calc,
- region_similarity_calculator.
- NegSqDistSimilarity))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/BUILD b/object_detection/core/BUILD
deleted file mode 100644
index 5d8aaad7..00000000
--- a/object_detection/core/BUILD
+++ /dev/null
@@ -1,368 +0,0 @@
-# Tensorflow Object Detection API: Core.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-# Apache 2.0
-
-py_library(
- name = "batcher",
- srcs = ["batcher.py"],
- deps = [
- ":prefetcher",
- ":preprocessor",
- ":standard_fields",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "batcher_test",
- srcs = ["batcher_test.py"],
- deps = [
- ":batcher",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "box_list",
- srcs = [
- "box_list.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "box_list_test",
- srcs = ["box_list_test.py"],
- deps = [
- ":box_list",
- ],
-)
-
-py_library(
- name = "box_list_ops",
- srcs = [
- "box_list_ops.py",
- ],
- deps = [
- ":box_list",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:shape_utils",
- ],
-)
-
-py_test(
- name = "box_list_ops_test",
- srcs = ["box_list_ops_test.py"],
- deps = [
- ":box_list",
- ":box_list_ops",
- ],
-)
-
-py_library(
- name = "box_coder",
- srcs = [
- "box_coder.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "box_coder_test",
- srcs = [
- "box_coder_test.py",
- ],
- deps = [
- ":box_coder",
- ":box_list",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "keypoint_ops",
- srcs = [
- "keypoint_ops.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "keypoint_ops_test",
- srcs = ["keypoint_ops_test.py"],
- deps = [
- ":keypoint_ops",
- ],
-)
-
-py_library(
- name = "losses",
- srcs = ["losses.py"],
- deps = [
- ":box_list",
- ":box_list_ops",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:ops",
- ],
-)
-
-py_library(
- name = "matcher",
- srcs = [
- "matcher.py",
- ],
- deps = [
- ],
-)
-
-py_library(
- name = "model",
- srcs = ["model.py"],
- deps = [
- ":standard_fields",
- ],
-)
-
-py_test(
- name = "matcher_test",
- srcs = [
- "matcher_test.py",
- ],
- deps = [
- ":matcher",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "prefetcher",
- srcs = ["prefetcher.py"],
- deps = ["//tensorflow"],
-)
-
-py_library(
- name = "preprocessor",
- srcs = [
- "preprocessor.py",
- ],
- deps = [
- ":box_list",
- ":box_list_ops",
- ":keypoint_ops",
- ":standard_fields",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "preprocessor_test",
- srcs = [
- "preprocessor_test.py",
- ],
- deps = [
- ":preprocessor",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "losses_test",
- srcs = ["losses_test.py"],
- deps = [
- ":box_list",
- ":losses",
- ":matcher",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "prefetcher_test",
- srcs = ["prefetcher_test.py"],
- deps = [
- ":prefetcher",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "standard_fields",
- srcs = [
- "standard_fields.py",
- ],
-)
-
-py_library(
- name = "post_processing",
- srcs = ["post_processing.py"],
- deps = [
- ":box_list",
- ":box_list_ops",
- ":standard_fields",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "post_processing_test",
- srcs = ["post_processing_test.py"],
- deps = [
- ":box_list",
- ":box_list_ops",
- ":post_processing",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "target_assigner",
- srcs = [
- "target_assigner.py",
- ],
- deps = [
- ":box_list",
- ":box_list_ops",
- ":matcher",
- ":region_similarity_calculator",
- "//tensorflow",
- "//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
- "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/matchers:argmax_matcher",
- "//tensorflow_models/object_detection/matchers:bipartite_matcher",
- ],
-)
-
-py_test(
- name = "target_assigner_test",
- size = "large",
- timeout = "long",
- srcs = ["target_assigner_test.py"],
- deps = [
- ":box_list",
- ":region_similarity_calculator",
- ":target_assigner",
- "//tensorflow",
- "//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
- "//tensorflow_models/object_detection/matchers:bipartite_matcher",
- ],
-)
-
-py_library(
- name = "data_decoder",
- srcs = ["data_decoder.py"],
-)
-
-py_library(
- name = "data_parser",
- srcs = ["data_parser.py"],
-)
-
-py_library(
- name = "box_predictor",
- srcs = ["box_predictor.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/object_detection/utils:shape_utils",
- "//tensorflow_models/object_detection/utils:static_shape",
- ],
-)
-
-py_test(
- name = "box_predictor_test",
- srcs = ["box_predictor_test.py"],
- deps = [
- ":box_predictor",
- "//tensorflow",
- "//tensorflow_models/object_detection/builders:hyperparams_builder",
- "//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
- ],
-)
-
-py_library(
- name = "region_similarity_calculator",
- srcs = [
- "region_similarity_calculator.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list_ops",
- ],
-)
-
-py_test(
- name = "region_similarity_calculator_test",
- srcs = [
- "region_similarity_calculator_test.py",
- ],
- deps = [
- ":region_similarity_calculator",
- "//tensorflow_models/object_detection/core:box_list",
- ],
-)
-
-py_library(
- name = "anchor_generator",
- srcs = [
- "anchor_generator.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "minibatch_sampler",
- srcs = [
- "minibatch_sampler.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:ops",
- ],
-)
-
-py_test(
- name = "minibatch_sampler_test",
- srcs = [
- "minibatch_sampler_test.py",
- ],
- deps = [
- ":minibatch_sampler",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "balanced_positive_negative_sampler",
- srcs = [
- "balanced_positive_negative_sampler.py",
- ],
- deps = [
- ":minibatch_sampler",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "balanced_positive_negative_sampler_test",
- srcs = [
- "balanced_positive_negative_sampler_test.py",
- ],
- deps = [
- ":balanced_positive_negative_sampler",
- "//tensorflow",
- ],
-)
diff --git a/object_detection/core/__init__.py b/object_detection/core/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/core/__pycache__/__init__.cpython-35.pyc b/object_detection/core/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index a42c6d32..00000000
Binary files a/object_detection/core/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc b/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc
deleted file mode 100644
index c4e9bddb..00000000
Binary files a/object_detection/core/__pycache__/anchor_generator.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc b/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc
deleted file mode 100644
index a259a66b..00000000
Binary files a/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/box_coder.cpython-35.pyc b/object_detection/core/__pycache__/box_coder.cpython-35.pyc
deleted file mode 100644
index f18d49fb..00000000
Binary files a/object_detection/core/__pycache__/box_coder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/box_list.cpython-35.pyc b/object_detection/core/__pycache__/box_list.cpython-35.pyc
deleted file mode 100644
index f1cb18db..00000000
Binary files a/object_detection/core/__pycache__/box_list.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc b/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc
deleted file mode 100644
index 5a106696..00000000
Binary files a/object_detection/core/__pycache__/box_list_ops.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/box_predictor.cpython-35.pyc b/object_detection/core/__pycache__/box_predictor.cpython-35.pyc
deleted file mode 100644
index e419bbbf..00000000
Binary files a/object_detection/core/__pycache__/box_predictor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/data_decoder.cpython-35.pyc b/object_detection/core/__pycache__/data_decoder.cpython-35.pyc
deleted file mode 100644
index 65b7de17..00000000
Binary files a/object_detection/core/__pycache__/data_decoder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc b/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc
deleted file mode 100644
index 7ca49b1a..00000000
Binary files a/object_detection/core/__pycache__/keypoint_ops.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/losses.cpython-35.pyc b/object_detection/core/__pycache__/losses.cpython-35.pyc
deleted file mode 100644
index 4ffe5697..00000000
Binary files a/object_detection/core/__pycache__/losses.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/matcher.cpython-35.pyc b/object_detection/core/__pycache__/matcher.cpython-35.pyc
deleted file mode 100644
index 0f331fe1..00000000
Binary files a/object_detection/core/__pycache__/matcher.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc b/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc
deleted file mode 100644
index 7fcdb236..00000000
Binary files a/object_detection/core/__pycache__/minibatch_sampler.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/model.cpython-35.pyc b/object_detection/core/__pycache__/model.cpython-35.pyc
deleted file mode 100644
index f162341c..00000000
Binary files a/object_detection/core/__pycache__/model.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/post_processing.cpython-35.pyc b/object_detection/core/__pycache__/post_processing.cpython-35.pyc
deleted file mode 100644
index 6192014d..00000000
Binary files a/object_detection/core/__pycache__/post_processing.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/preprocessor.cpython-35.pyc b/object_detection/core/__pycache__/preprocessor.cpython-35.pyc
deleted file mode 100644
index e0b16b7f..00000000
Binary files a/object_detection/core/__pycache__/preprocessor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc b/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc
deleted file mode 100644
index 953ac19d..00000000
Binary files a/object_detection/core/__pycache__/region_similarity_calculator.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/standard_fields.cpython-35.pyc b/object_detection/core/__pycache__/standard_fields.cpython-35.pyc
deleted file mode 100644
index 31f74c6d..00000000
Binary files a/object_detection/core/__pycache__/standard_fields.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/__pycache__/target_assigner.cpython-35.pyc b/object_detection/core/__pycache__/target_assigner.cpython-35.pyc
deleted file mode 100644
index 039d78c4..00000000
Binary files a/object_detection/core/__pycache__/target_assigner.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/core/anchor_generator.py b/object_detection/core/anchor_generator.py
deleted file mode 100644
index ed6a2bc5..00000000
--- a/object_detection/core/anchor_generator.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base anchor generator.
-
-The job of the anchor generator is to create (or load) a collection
-of bounding boxes to be used as anchors.
-
-Generated anchors are assumed to match some convolutional grid or list of grid
-shapes. For example, we might want to generate anchors matching an 8x8
-feature map and a 4x4 feature map. If we place 3 anchors per grid location
-on the first feature map and 6 anchors per grid location on the second feature
-map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
-
-To support fully convolutional settings, feature map shapes are passed
-dynamically at generation time. The number of anchors to place at each location
-is static --- implementations of AnchorGenerator must always be able return
-the number of anchors that it uses per location for each feature map.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-
-class AnchorGenerator(object):
- """Abstract base class for anchor generators."""
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def name_scope(self):
- """Name scope.
-
- Must be defined by implementations.
-
- Returns:
- a string representing the name scope of the anchor generation operation.
- """
- pass
-
- @property
- def check_num_anchors(self):
- """Whether to dynamically check the number of anchors generated.
-
- Can be overridden by implementations that would like to disable this
- behavior.
-
- Returns:
- a boolean controlling whether the Generate function should dynamically
- check the number of anchors generated against the mathematically
- expected number of anchors.
- """
- return True
-
- @abstractmethod
- def num_anchors_per_location(self):
- """Returns the number of anchors per spatial location.
-
- Returns:
- a list of integers, one for each expected feature map to be passed to
- the `generate` function.
- """
- pass
-
- def generate(self, feature_map_shape_list, **params):
- """Generates a collection of bounding boxes to be used as anchors.
-
- TODO: remove **params from argument list and make stride and offsets (for
- multiple_grid_anchor_generator) constructor arguments.
-
- Args:
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with. Pairs can be provided as 1-dimensional
- integer tensors of length 2 or simply as tuples of integers.
- **params: parameters for anchor generation op
-
- Returns:
- boxes: a BoxList holding a collection of N anchor boxes
- Raises:
- ValueError: if the number of feature map shapes does not match the length
- of NumAnchorsPerLocation.
- """
- if self.check_num_anchors and (
- len(feature_map_shape_list) != len(self.num_anchors_per_location())):
- raise ValueError('Number of feature maps is expected to equal the length '
- 'of `num_anchors_per_location`.')
- with tf.name_scope(self.name_scope()):
- anchors = self._generate(feature_map_shape_list, **params)
- if self.check_num_anchors:
- with tf.control_dependencies([
- self._assert_correct_number_of_anchors(
- anchors, feature_map_shape_list)]):
- anchors.set(tf.identity(anchors.get()))
- return anchors
-
- @abstractmethod
- def _generate(self, feature_map_shape_list, **params):
- """To be overridden by implementations.
-
- Args:
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with.
- **params: parameters for anchor generation op
-
- Returns:
- boxes: a BoxList holding a collection of N anchor boxes
- """
- pass
-
- def _assert_correct_number_of_anchors(self, anchors, feature_map_shape_list):
- """Assert that correct number of anchors was generated.
-
- Args:
- anchors: box_list.BoxList object holding anchors generated
- feature_map_shape_list: list of (height, width) pairs in the format
- [(height_0, width_0), (height_1, width_1), ...] that the generated
- anchors must align with.
- Returns:
- Op that raises InvalidArgumentError if the number of anchors does not
- match the number of expected anchors.
- """
- expected_num_anchors = 0
- for num_anchors_per_location, feature_map_shape in zip(
- self.num_anchors_per_location(), feature_map_shape_list):
- expected_num_anchors += (num_anchors_per_location
- * feature_map_shape[0]
- * feature_map_shape[1])
- return tf.assert_equal(expected_num_anchors, anchors.num_boxes())
diff --git a/object_detection/core/balanced_positive_negative_sampler.py b/object_detection/core/balanced_positive_negative_sampler.py
deleted file mode 100644
index 68844c4f..00000000
--- a/object_detection/core/balanced_positive_negative_sampler.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Class to subsample minibatches by balancing positives and negatives.
-
-Subsamples minibatches based on a pre-specified positive fraction in range
-[0,1]. The class presumes there are many more negatives than positive examples:
-if the desired batch_size cannot be achieved with the pre-specified positive
-fraction, it fills the rest with negative examples. If this is not sufficient
-for obtaining the desired batch_size, it returns fewer examples.
-
-The main function to call is Subsample(self, indicator, labels). For convenience
-one can also call SubsampleWeights(self, weights, labels) which is defined in
-the minibatch_sampler base class.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import minibatch_sampler
-
-
-class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
- """Subsamples minibatches to a desired balance of positives and negatives."""
-
- def __init__(self, positive_fraction=0.5):
- """Constructs a minibatch sampler.
-
- Args:
- positive_fraction: desired fraction of positive examples (scalar in [0,1])
-
- Raises:
- ValueError: if positive_fraction < 0, or positive_fraction > 1
- """
- if positive_fraction < 0 or positive_fraction > 1:
- raise ValueError('positive_fraction should be in range [0,1]. '
- 'Received: %s.' % positive_fraction)
- self._positive_fraction = positive_fraction
-
- def subsample(self, indicator, batch_size, labels):
- """Returns subsampled minibatch.
-
- Args:
- indicator: boolean tensor of shape [N] whose True entries can be sampled.
- batch_size: desired batch size.
- labels: boolean tensor of shape [N] denoting positive(=True) and negative
- (=False) examples.
-
- Returns:
- is_sampled: boolean tensor of shape [N], True for entries which are
- sampled.
-
- Raises:
- ValueError: if labels and indicator are not 1D boolean tensors.
- """
- if len(indicator.get_shape().as_list()) != 1:
- raise ValueError('indicator must be 1 dimensional, got a tensor of '
- 'shape %s' % indicator.get_shape())
- if len(labels.get_shape().as_list()) != 1:
- raise ValueError('labels must be 1 dimensional, got a tensor of '
- 'shape %s' % labels.get_shape())
- if labels.dtype != tf.bool:
- raise ValueError('labels should be of type bool. Received: %s' %
- labels.dtype)
- if indicator.dtype != tf.bool:
- raise ValueError('indicator should be of type bool. Received: %s' %
- indicator.dtype)
-
- # Only sample from indicated samples
- negative_idx = tf.logical_not(labels)
- positive_idx = tf.logical_and(labels, indicator)
- negative_idx = tf.logical_and(negative_idx, indicator)
-
- # Sample positive and negative samples separately
- max_num_pos = int(self._positive_fraction * batch_size)
- sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
- max_num_neg = batch_size - tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
- sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
-
- sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx)
- return sampled_idx
diff --git a/object_detection/core/balanced_positive_negative_sampler_test.py b/object_detection/core/balanced_positive_negative_sampler_test.py
deleted file mode 100644
index 23991cf5..00000000
--- a/object_detection/core/balanced_positive_negative_sampler_test.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.balanced_positive_negative_sampler."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import balanced_positive_negative_sampler
-
-
-class BalancedPositiveNegativeSamplerTest(tf.test.TestCase):
-
- def test_subsample_all_examples(self):
- numpy_labels = np.random.permutation(300)
- indicator = tf.constant(np.ones(300) == 1)
- numpy_labels = (numpy_labels - 200) > 0
-
- labels = tf.constant(numpy_labels)
-
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- is_sampled = sampler.subsample(indicator, 64, labels)
- with self.test_session() as sess:
- is_sampled = sess.run(is_sampled)
- self.assertTrue(sum(is_sampled) == 64)
- self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
- self.assertTrue(sum(np.logical_and(
- np.logical_not(numpy_labels), is_sampled)) == 32)
-
- def test_subsample_selection(self):
- # Test random sampling when only some examples can be sampled:
- # 100 samples, 20 positives, 10 positives cannot be sampled
- numpy_labels = np.arange(100)
- numpy_indicator = numpy_labels < 90
- indicator = tf.constant(numpy_indicator)
- numpy_labels = (numpy_labels - 80) >= 0
-
- labels = tf.constant(numpy_labels)
-
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- is_sampled = sampler.subsample(indicator, 64, labels)
- with self.test_session() as sess:
- is_sampled = sess.run(is_sampled)
- self.assertTrue(sum(is_sampled) == 64)
- self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
- self.assertTrue(sum(np.logical_and(
- np.logical_not(numpy_labels), is_sampled)) == 54)
- self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
- numpy_indicator))
-
- def test_raises_error_with_incorrect_label_shape(self):
- labels = tf.constant([[True, False, False]])
- indicator = tf.constant([True, False, True])
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- with self.assertRaises(ValueError):
- sampler.subsample(indicator, 64, labels)
-
- def test_raises_error_with_incorrect_indicator_shape(self):
- labels = tf.constant([True, False, False])
- indicator = tf.constant([[True, False, True]])
- sampler = (balanced_positive_negative_sampler.
- BalancedPositiveNegativeSampler())
- with self.assertRaises(ValueError):
- sampler.subsample(indicator, 64, labels)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/batcher.py b/object_detection/core/batcher.py
deleted file mode 100644
index c5dfb712..00000000
--- a/object_detection/core/batcher.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Provides functions to batch a dictionary of input tensors."""
-import collections
-
-import tensorflow as tf
-
-from object_detection.core import prefetcher
-
-rt_shape_str = '_runtime_shapes'
-
-
-class BatchQueue(object):
- """BatchQueue class.
-
- This class creates a batch queue to asynchronously enqueue tensors_dict.
- It also adds a FIFO prefetcher so that the batches are readily available
- for the consumers. Dequeue ops for a BatchQueue object can be created via
- the Dequeue method which evaluates to a batch of tensor_dict.
-
- Example input pipeline with batching:
- ------------------------------------
- key, string_tensor = slim.parallel_reader.parallel_read(...)
- tensor_dict = decoder.decode(string_tensor)
- tensor_dict = preprocessor.preprocess(tensor_dict, ...)
- batch_queue = batcher.BatchQueue(tensor_dict,
- batch_size=32,
- batch_queue_capacity=2000,
- num_batch_queue_threads=8,
- prefetch_queue_capacity=20)
- tensor_dict = batch_queue.dequeue()
- outputs = Model(tensor_dict)
- ...
- -----------------------------------
-
- Notes:
- -----
- This class batches tensors of unequal sizes by zero padding and unpadding
- them after generating a batch. This can be computationally expensive when
- batching tensors (such as images) that are of vastly different sizes. So it is
- recommended that the shapes of such tensors be fully defined in tensor_dict
- while other lightweight tensors such as bounding box corners and class labels
- can be of varying sizes. Use either crop or resize operations to fully define
- the shape of an image in tensor_dict.
-
- It is also recommended to perform any preprocessing operations on tensors
- before passing to BatchQueue and subsequently calling the Dequeue method.
-
- Another caveat is that this class does not read the last batch if it is not
- full. The current implementation makes it hard to support that use case. So,
- for evaluation, when it is critical to run all the examples through your
- network use the input pipeline example mentioned in core/prefetcher.py.
- """
-
- def __init__(self, tensor_dict, batch_size, batch_queue_capacity,
- num_batch_queue_threads, prefetch_queue_capacity):
- """Constructs a batch queue holding tensor_dict.
-
- Args:
- tensor_dict: dictionary of tensors to batch.
- batch_size: batch size.
- batch_queue_capacity: max capacity of the queue from which the tensors are
- batched.
- num_batch_queue_threads: number of threads to use for batching.
- prefetch_queue_capacity: max capacity of the queue used to prefetch
- assembled batches.
- """
- # Remember static shapes to set shapes of batched tensors.
- static_shapes = collections.OrderedDict(
- {key: tensor.get_shape() for key, tensor in tensor_dict.items()})
- # Remember runtime shapes to unpad tensors after batching.
- runtime_shapes = collections.OrderedDict(
- {(key + rt_shape_str): tf.shape(tensor)
- for key, tensor in tensor_dict.items()})
-
- all_tensors = tensor_dict
- all_tensors.update(runtime_shapes)
- batched_tensors = tf.train.batch(
- all_tensors,
- capacity=batch_queue_capacity,
- batch_size=batch_size,
- dynamic_pad=True,
- num_threads=num_batch_queue_threads)
-
- self._queue = prefetcher.prefetch(batched_tensors,
- prefetch_queue_capacity)
- self._static_shapes = static_shapes
- self._batch_size = batch_size
-
- def dequeue(self):
- """Dequeues a batch of tensor_dict from the BatchQueue.
-
- TODO: use allow_smaller_final_batch to allow running over the whole eval set
-
- Returns:
- A list of tensor_dicts of the requested batch_size.
- """
- batched_tensors = self._queue.dequeue()
- # Separate input tensors from tensors containing their runtime shapes.
- tensors = {}
- shapes = {}
- for key, batched_tensor in batched_tensors.items():
- unbatched_tensor_list = tf.unstack(batched_tensor)
- for i, unbatched_tensor in enumerate(unbatched_tensor_list):
- if rt_shape_str in key:
- shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor
- else:
- tensors[(key, i)] = unbatched_tensor
-
- # Undo that padding using shapes and create a list of size `batch_size` that
- # contains tensor dictionaries.
- tensor_dict_list = []
- batch_size = self._batch_size
- for batch_id in range(batch_size):
- tensor_dict = {}
- for key in self._static_shapes:
- tensor_dict[key] = tf.slice(tensors[(key, batch_id)],
- tf.zeros_like(shapes[(key, batch_id)]),
- shapes[(key, batch_id)])
- tensor_dict[key].set_shape(self._static_shapes[key])
- tensor_dict_list.append(tensor_dict)
-
- return tensor_dict_list
diff --git a/object_detection/core/batcher_test.py b/object_detection/core/batcher_test.py
deleted file mode 100644
index 61b4390b..00000000
--- a/object_detection/core/batcher_test.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.batcher."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import batcher
-
-slim = tf.contrib.slim
-
-
-class BatcherTest(tf.test.TestCase):
-
- def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- boxes = tf.tile(
- tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'boxes': boxes},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, 4], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions(
- self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- image = tf.reshape(
- tf.range(counter * counter), tf.stack([counter, counter]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, None], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self):
- with self.test_session() as sess:
- batch_size = 3
- num_batches = 2
- examples = tf.Variable(tf.constant(1, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 1)
- image = tf.reshape(tf.range(1, 13), [4, 3]) * counter
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([4, 3], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 1
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i)
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
- def test_batcher_when_batch_size_is_one(self):
- with self.test_session() as sess:
- batch_size = 1
- num_batches = 2
- examples = tf.Variable(tf.constant(2, dtype=tf.int32))
- counter = examples.count_up_to(num_batches * batch_size + 2)
- image = tf.reshape(
- tf.range(counter * counter), tf.stack([counter, counter]))
- batch_queue = batcher.BatchQueue(
- tensor_dict={'image': image},
- batch_size=batch_size,
- batch_queue_capacity=100,
- num_batch_queue_threads=1,
- prefetch_queue_capacity=100)
- batch = batch_queue.dequeue()
-
- for tensor_dict in batch:
- for tensor in tensor_dict.values():
- self.assertAllEqual([None, None], tensor.get_shape().as_list())
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- i = 2
- for _ in range(num_batches):
- batch_np = sess.run(batch)
- for tensor_dict in batch_np:
- for tensor in tensor_dict.values():
- self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
- i += 1
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(batch)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/box_coder.py b/object_detection/core/box_coder.py
deleted file mode 100644
index f20ac956..00000000
--- a/object_detection/core/box_coder.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base box coder.
-
-Box coders convert between coordinate frames, namely image-centric
-(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
-defined by a specific anchor).
-
-Users of a BoxCoder can call two methods:
- encode: which encodes a box with respect to a given anchor
- (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
- decode: which inverts this encoding with a decode operation.
-In both cases, the arguments are assumed to be in 1-1 correspondence already;
-it is not the job of a BoxCoder to perform matching.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-from abc import abstractproperty
-
-import tensorflow as tf
-
-
-# Box coder types.
-FASTER_RCNN = 'faster_rcnn'
-KEYPOINT = 'keypoint'
-MEAN_STDDEV = 'mean_stddev'
-SQUARE = 'square'
-
-
-class BoxCoder(object):
- """Abstract base class for box coder."""
- __metaclass__ = ABCMeta
-
- @abstractproperty
- def code_size(self):
- """Return the size of each code.
-
- This number is a constant and should agree with the output of the `encode`
- op (e.g. if rel_codes is the output of self.encode(...), then it should have
- shape [N, code_size()]). This abstractproperty should be overridden by
- implementations.
-
- Returns:
- an integer constant
- """
- pass
-
- def encode(self, boxes, anchors):
- """Encode a box list relative to an anchor collection.
-
- Args:
- boxes: BoxList holding N boxes to be encoded
- anchors: BoxList of N anchors
-
- Returns:
- a tensor representing N relative-encoded boxes
- """
- with tf.name_scope('Encode'):
- return self._encode(boxes, anchors)
-
- def decode(self, rel_codes, anchors):
- """Decode boxes that are encoded relative to an anchor collection.
-
- Args:
- rel_codes: a tensor representing N relative-encoded boxes
- anchors: BoxList of anchors
-
- Returns:
- boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
- with corners y_min, x_min, y_max, x_max)
- """
- with tf.name_scope('Decode'):
- return self._decode(rel_codes, anchors)
-
- @abstractmethod
- def _encode(self, boxes, anchors):
- """Method to be overriden by implementations.
-
- Args:
- boxes: BoxList holding N boxes to be encoded
- anchors: BoxList of N anchors
-
- Returns:
- a tensor representing N relative-encoded boxes
- """
- pass
-
- @abstractmethod
- def _decode(self, rel_codes, anchors):
- """Method to be overriden by implementations.
-
- Args:
- rel_codes: a tensor representing N relative-encoded boxes
- anchors: BoxList of anchors
-
- Returns:
- boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
- with corners y_min, x_min, y_max, x_max)
- """
- pass
-
-
-def batch_decode(encoded_boxes, box_coder, anchors):
- """Decode a batch of encoded boxes.
-
- This op takes a batch of encoded bounding boxes and transforms
- them to a batch of bounding boxes specified by their corners in
- the order of [y_min, x_min, y_max, x_max].
-
- Args:
- encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
- code_size] representing the location of the objects.
- box_coder: a BoxCoder object.
- anchors: a BoxList of anchors used to encode `encoded_boxes`.
-
- Returns:
- decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
- coder_size] representing the corners of the objects in the order
- of [y_min, x_min, y_max, x_max].
-
- Raises:
- ValueError: if batch sizes of the inputs are inconsistent, or if
- the number of anchors inferred from encoded_boxes and anchors are
- inconsistent.
- """
- encoded_boxes.get_shape().assert_has_rank(3)
- if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
- raise ValueError('The number of anchors inferred from encoded_boxes'
- ' and anchors are inconsistent: shape[1] of encoded_boxes'
- ' %s should be equal to the number of anchors: %s.' %
- (encoded_boxes.get_shape()[1].value,
- anchors.num_boxes_static()))
-
- decoded_boxes = tf.stack([
- box_coder.decode(boxes, anchors).get()
- for boxes in tf.unstack(encoded_boxes)
- ])
- return decoded_boxes
diff --git a/object_detection/core/box_coder_test.py b/object_detection/core/box_coder_test.py
deleted file mode 100644
index c087a325..00000000
--- a/object_detection/core/box_coder_test.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_coder."""
-
-import tensorflow as tf
-
-from object_detection.core import box_coder
-from object_detection.core import box_list
-
-
-class MockBoxCoder(box_coder.BoxCoder):
- """Test BoxCoder that encodes/decodes using the multiply-by-two function."""
-
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- return 2.0 * boxes.get()
-
- def _decode(self, rel_codes, anchors):
- return box_list.BoxList(rel_codes / 2.0)
-
-
-class BoxCoderTest(tf.test.TestCase):
-
- def test_batch_decode(self):
- mock_anchor_corners = tf.constant(
- [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
- mock_anchors = box_list.BoxList(mock_anchor_corners)
- mock_box_coder = MockBoxCoder()
-
- expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
- [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
-
- encoded_boxes_list = [mock_box_coder.encode(
- box_list.BoxList(tf.constant(boxes)), mock_anchors)
- for boxes in expected_boxes]
- encoded_boxes = tf.stack(encoded_boxes_list)
- decoded_boxes = box_coder.batch_decode(
- encoded_boxes, mock_box_coder, mock_anchors)
-
- with self.test_session() as sess:
- decoded_boxes_result = sess.run(decoded_boxes)
- self.assertAllClose(expected_boxes, decoded_boxes_result)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/box_list.py b/object_detection/core/box_list.py
deleted file mode 100644
index c0196f05..00000000
--- a/object_detection/core/box_list.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bounding Box List definition.
-
-BoxList represents a list of bounding boxes as tensorflow
-tensors, where each bounding box is represented as a row of 4 numbers,
-[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
-within a given list correspond to a single image. See also
-box_list_ops.py for common box related operations (such as area, iou, etc).
-
-Optionally, users can add additional related fields (such as weights).
-We assume the following things to be true about fields:
-* they correspond to boxes in the box_list along the 0th dimension
-* they have inferrable rank at graph construction time
-* all dimensions except for possibly the 0th can be inferred
- (i.e., not None) at graph construction time.
-
-Some other notes:
- * Following tensorflow conventions, we use height, width ordering,
- and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
- * Tensors are always provided as (flat) [N, 4] tensors.
-"""
-
-import tensorflow as tf
-
-
-class BoxList(object):
- """Box collection."""
-
- def __init__(self, boxes):
- """Constructs box collection.
-
- Args:
- boxes: a tensor of shape [N, 4] representing box corners
-
- Raises:
- ValueError: if invalid dimensions for bbox data or if bbox data is not in
- float32 format.
- """
- if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
- raise ValueError('Invalid dimensions for box data.')
- if boxes.dtype != tf.float32:
- raise ValueError('Invalid tensor type: should be tf.float32')
- self.data = {'boxes': boxes}
-
- def num_boxes(self):
- """Returns number of boxes held in collection.
-
- Returns:
- a tensor representing the number of boxes held in the collection.
- """
- return tf.shape(self.data['boxes'])[0]
-
- def num_boxes_static(self):
- """Returns number of boxes held in collection.
-
- This number is inferred at graph construction time rather than run-time.
-
- Returns:
- Number of boxes held in collection (integer) or None if this is not
- inferrable at graph construction time.
- """
- return self.data['boxes'].get_shape()[0].value
-
- def get_all_fields(self):
- """Returns all fields."""
- return self.data.keys()
-
- def get_extra_fields(self):
- """Returns all non-box fields (i.e., everything not named 'boxes')."""
- return [k for k in self.data.keys() if k != 'boxes']
-
- def add_field(self, field, field_data):
- """Add field to box list.
-
- This method can be used to add related box data such as
- weights/labels, etc.
-
- Args:
- field: a string key to access the data via `get`
- field_data: a tensor containing the data to store in the BoxList
- """
- self.data[field] = field_data
-
- def has_field(self, field):
- return field in self.data
-
- def get(self):
- """Convenience function for accessing box coordinates.
-
- Returns:
- a tensor with shape [N, 4] representing box coordinates.
- """
- return self.get_field('boxes')
-
- def set(self, boxes):
- """Convenience function for setting box coordinates.
-
- Args:
- boxes: a tensor of shape [N, 4] representing box corners
-
- Raises:
- ValueError: if invalid dimensions for bbox data
- """
- if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
- raise ValueError('Invalid dimensions for box data.')
- self.data['boxes'] = boxes
-
- def get_field(self, field):
- """Accesses a box collection and associated fields.
-
- This function returns specified field with object; if no field is specified,
- it returns the box coordinates.
-
- Args:
- field: this optional string parameter can be used to specify
- a related field to be accessed.
-
- Returns:
- a tensor representing the box collection or an associated field.
-
- Raises:
- ValueError: if invalid field
- """
- if not self.has_field(field):
- raise ValueError('field ' + str(field) + ' does not exist')
- return self.data[field]
-
- def set_field(self, field, value):
- """Sets the value of a field.
-
- Updates the field of a box_list with a given value.
-
- Args:
- field: (string) name of the field to set value.
- value: the value to assign to the field.
-
- Raises:
- ValueError: if the box_list does not have specified field.
- """
- if not self.has_field(field):
- raise ValueError('field %s does not exist' % field)
- self.data[field] = value
-
- def get_center_coordinates_and_sizes(self, scope=None):
- """Computes the center coordinates, height and width of the boxes.
-
- Args:
- scope: name scope of the function.
-
- Returns:
- a list of 4 1-D tensors [ycenter, xcenter, height, width].
- """
- with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
- box_corners = self.get()
- ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
- width = xmax - xmin
- height = ymax - ymin
- ycenter = ymin + height / 2.
- xcenter = xmin + width / 2.
- return [ycenter, xcenter, height, width]
-
- def transpose_coordinates(self, scope=None):
- """Transpose the coordinate representation in a boxlist.
-
- Args:
- scope: name scope of the function.
- """
- with tf.name_scope(scope, 'transpose_coordinates'):
- y_min, x_min, y_max, x_max = tf.split(
- value=self.get(), num_or_size_splits=4, axis=1)
- self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
-
- def as_tensor_dict(self, fields=None):
- """Retrieves specified fields as a dictionary of tensors.
-
- Args:
- fields: (optional) list of fields to return in the dictionary.
- If None (default), all fields are returned.
-
- Returns:
- tensor_dict: A dictionary of tensors specified by fields.
-
- Raises:
- ValueError: if specified field is not contained in boxlist.
- """
- tensor_dict = {}
- if fields is None:
- fields = self.get_all_fields()
- for field in fields:
- if not self.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- tensor_dict[field] = self.get_field(field)
- return tensor_dict
diff --git a/object_detection/core/box_list_ops.py b/object_detection/core/box_list_ops.py
deleted file mode 100644
index c98048d5..00000000
--- a/object_detection/core/box_list_ops.py
+++ /dev/null
@@ -1,984 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bounding Box List operations.
-
-Example box operations that are supported:
- * areas: compute bounding box areas
- * iou: pairwise intersection-over-union scores
- * sq_dist: pairwise distances between bounding boxes
-
-Whenever box_list_ops functions output a BoxList, the fields of the incoming
-BoxList are retained unless documented otherwise.
-"""
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.utils import shape_utils
-
-
-class SortOrder(object):
- """Enum class for sort order.
-
- Attributes:
- ascend: ascend order.
- descend: descend order.
- """
- ascend = 1
- descend = 2
-
-
-def area(boxlist, scope=None):
- """Computes area of boxes.
-
- Args:
- boxlist: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing box areas.
- """
- with tf.name_scope(scope, 'Area'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
-
-
-def height_width(boxlist, scope=None):
- """Computes height and width of boxes in boxlist.
-
- Args:
- boxlist: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- Height: A tensor with shape [N] representing box heights.
- Width: A tensor with shape [N] representing box widths.
- """
- with tf.name_scope(scope, 'HeightWidth'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
-
-
-def scale(boxlist, y_scale, x_scale, scope=None):
- """scale box coordinates in x and y dimensions.
-
- Args:
- boxlist: BoxList holding N boxes
- y_scale: (float) scalar tensor
- x_scale: (float) scalar tensor
- scope: name scope.
-
- Returns:
- boxlist: BoxList holding N boxes
- """
- with tf.name_scope(scope, 'Scale'):
- y_scale = tf.cast(y_scale, tf.float32)
- x_scale = tf.cast(x_scale, tf.float32)
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- y_min = y_scale * y_min
- y_max = y_scale * y_max
- x_min = x_scale * x_min
- x_max = x_scale * x_max
- scaled_boxlist = box_list.BoxList(
- tf.concat([y_min, x_min, y_max, x_max], 1))
- return _copy_extra_fields(scaled_boxlist, boxlist)
-
-
-def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
- """Clip bounding boxes to a window.
-
- This op clips any input bounding boxes (represented by bounding box
- corners) to a window, optionally filtering out boxes that do not
- overlap at all with the window.
-
- Args:
- boxlist: BoxList holding M_in boxes
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window to which the op should clip boxes.
- filter_nonoverlapping: whether to filter out boxes that do not overlap at
- all with the window.
- scope: name scope.
-
- Returns:
- a BoxList holding M_out boxes where M_out <= M_in
- """
- with tf.name_scope(scope, 'ClipToWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
- y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
- x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
- x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
- clipped = box_list.BoxList(
- tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
- 1))
- clipped = _copy_extra_fields(clipped, boxlist)
- if filter_nonoverlapping:
- areas = area(clipped)
- nonzero_area_indices = tf.cast(
- tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
- clipped = gather(clipped, nonzero_area_indices)
- return clipped
-
-
-def prune_outside_window(boxlist, window, scope=None):
- """Prunes bounding boxes that fall outside a given window.
-
- This function prunes bounding boxes that even partially fall outside the given
- window. See also clip_to_window which only prunes bounding boxes that fall
- completely outside the window, and clips any bounding boxes that partially
- overflow.
-
- Args:
- boxlist: a BoxList holding M_in boxes.
- window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
- of the window
- scope: name scope.
-
- Returns:
- pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
- valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
- in the input tensor.
- """
- with tf.name_scope(scope, 'PruneOutsideWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- coordinate_violations = tf.concat([
- tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
- tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
- ], 1)
- valid_indices = tf.reshape(
- tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
- return gather(boxlist, valid_indices), valid_indices
-
-
-def prune_completely_outside_window(boxlist, window, scope=None):
- """Prunes bounding boxes that fall completely outside of the given window.
-
- The function clip_to_window prunes bounding boxes that fall
- completely outside the window, but also clips any bounding boxes that
- partially overflow. This function does not clip partially overflowing boxes.
-
- Args:
- boxlist: a BoxList holding M_in boxes.
- window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
- of the window
- scope: name scope.
-
- Returns:
- pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
- valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
- in the input tensor.
- """
- with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
- y_min, x_min, y_max, x_max = tf.split(
- value=boxlist.get(), num_or_size_splits=4, axis=1)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- coordinate_violations = tf.concat([
- tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
- tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
- ], 1)
- valid_indices = tf.reshape(
- tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
- return gather(boxlist, valid_indices), valid_indices
-
-
-def intersection(boxlist1, boxlist2, scope=None):
- """Compute pairwise intersection areas between boxes.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise intersections
- """
- with tf.name_scope(scope, 'Intersection'):
- y_min1, x_min1, y_max1, x_max1 = tf.split(
- value=boxlist1.get(), num_or_size_splits=4, axis=1)
- y_min2, x_min2, y_max2, x_max2 = tf.split(
- value=boxlist2.get(), num_or_size_splits=4, axis=1)
- all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
- all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
- intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
- all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
- all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
- intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
- return intersect_heights * intersect_widths
-
-
-def matched_intersection(boxlist1, boxlist2, scope=None):
- """Compute intersection areas between corresponding boxes in two boxlists.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing pairwise intersections
- """
- with tf.name_scope(scope, 'MatchedIntersection'):
- y_min1, x_min1, y_max1, x_max1 = tf.split(
- value=boxlist1.get(), num_or_size_splits=4, axis=1)
- y_min2, x_min2, y_max2, x_max2 = tf.split(
- value=boxlist2.get(), num_or_size_splits=4, axis=1)
- min_ymax = tf.minimum(y_max1, y_max2)
- max_ymin = tf.maximum(y_min1, y_min2)
- intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
- min_xmax = tf.minimum(x_max1, x_max2)
- max_xmin = tf.maximum(x_min1, x_min2)
- intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
- return tf.reshape(intersect_heights * intersect_widths, [-1])
-
-
-def iou(boxlist1, boxlist2, scope=None):
- """Computes pairwise intersection-over-union between box collections.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise iou scores.
- """
- with tf.name_scope(scope, 'IOU'):
- intersections = intersection(boxlist1, boxlist2)
- areas1 = area(boxlist1)
- areas2 = area(boxlist2)
- unions = (
- tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
- return tf.where(
- tf.equal(intersections, 0.0),
- tf.zeros_like(intersections), tf.truediv(intersections, unions))
-
-
-def matched_iou(boxlist1, boxlist2, scope=None):
- """Compute intersection-over-union between corresponding boxes in boxlists.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding N boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N] representing pairwise iou scores.
- """
- with tf.name_scope(scope, 'MatchedIOU'):
- intersections = matched_intersection(boxlist1, boxlist2)
- areas1 = area(boxlist1)
- areas2 = area(boxlist2)
- unions = areas1 + areas2 - intersections
- return tf.where(
- tf.equal(intersections, 0.0),
- tf.zeros_like(intersections), tf.truediv(intersections, unions))
-
-
-def ioa(boxlist1, boxlist2, scope=None):
- """Computes pairwise intersection-over-area between box collections.
-
- intersection-over-area (IOA) between two boxes box1 and box2 is defined as
- their intersection area over box2's area. Note that ioa is not symmetric,
- that is, ioa(box1, box2) != ioa(box2, box1).
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise ioa scores.
- """
- with tf.name_scope(scope, 'IOA'):
- intersections = intersection(boxlist1, boxlist2)
- areas = tf.expand_dims(area(boxlist2), 0)
- return tf.truediv(intersections, areas)
-
-
-def prune_non_overlapping_boxes(
- boxlist1, boxlist2, min_overlap=0.0, scope=None):
- """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
-
- For each box in boxlist1, we want its IOA to be more than minoverlap with
- at least one of the boxes in boxlist2. If it does not, we remove it.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
- min_overlap: Minimum required overlap between boxes, to count them as
- overlapping.
- scope: name scope.
-
- Returns:
- new_boxlist1: A pruned boxlist with size [N', 4].
- keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
- first input BoxList `boxlist1`.
- """
- with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
- ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
- ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
- keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
- keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
- new_boxlist1 = gather(boxlist1, keep_inds)
- return new_boxlist1, keep_inds
-
-
-def prune_small_boxes(boxlist, min_side, scope=None):
- """Prunes small boxes in the boxlist which have a side smaller than min_side.
-
- Args:
- boxlist: BoxList holding N boxes.
- min_side: Minimum width AND height of box to survive pruning.
- scope: name scope.
-
- Returns:
- A pruned boxlist.
- """
- with tf.name_scope(scope, 'PruneSmallBoxes'):
- height, width = height_width(boxlist)
- is_valid = tf.logical_and(tf.greater_equal(width, min_side),
- tf.greater_equal(height, min_side))
- return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
-
-
-def change_coordinate_frame(boxlist, window, scope=None):
- """Change coordinate frame of the boxlist to be relative to window's frame.
-
- Given a window of the form [ymin, xmin, ymax, xmax],
- changes bounding box coordinates from boxlist to be relative to this window
- (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
-
- An example use case is data augmentation: where we are given groundtruth
- boxes (boxlist) and would like to randomly crop the image to some
- window (window). In this case we need to change the coordinate frame of
- each groundtruth box to be relative to this new window.
-
- Args:
- boxlist: A BoxList object holding N boxes.
- window: A rank 1 tensor [4].
- scope: name scope.
-
- Returns:
- Returns a BoxList object with N boxes.
- """
- with tf.name_scope(scope, 'ChangeCoordinateFrame'):
- win_height = window[2] - window[0]
- win_width = window[3] - window[1]
- boxlist_new = scale(box_list.BoxList(
- boxlist.get() - [window[0], window[1], window[0], window[1]]),
- 1.0 / win_height, 1.0 / win_width)
- boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
- return boxlist_new
-
-
-def sq_dist(boxlist1, boxlist2, scope=None):
- """Computes the pairwise squared distances between box corners.
-
- This op treats each box as if it were a point in a 4d Euclidean space and
- computes pairwise squared distances.
-
- Mathematically, we are given two matrices of box coordinates X and Y,
- where X(i,:) is the i'th row of X, containing the 4 numbers defining the
- corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
- boxlist2. We compute
- Z(i,j) = ||X(i,:) - Y(j,:)||^2
- = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
- scope: name scope.
-
- Returns:
- a tensor with shape [N, M] representing pairwise distances
- """
- with tf.name_scope(scope, 'SqDist'):
- sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
- sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
- innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
- transpose_a=False, transpose_b=True)
- return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
-
-
-def boolean_mask(boxlist, indicator, fields=None, scope=None):
- """Select boxes from BoxList according to indicator and return new BoxList.
-
- `boolean_mask` returns the subset of boxes that are marked as "True" by the
- indicator tensor. By default, `boolean_mask` returns boxes corresponding to
- the input index list, as well as all additional fields stored in the boxlist
- (indexing into the first dimension). However one can optionally only draw
- from a subset of fields.
-
- Args:
- boxlist: BoxList holding N boxes
- indicator: a rank-1 boolean tensor
- fields: (optional) list of fields to also gather from. If None (default),
- all fields are gathered from. Pass an empty fields list to only gather
- the box coordinates.
- scope: name scope.
-
- Returns:
- subboxlist: a BoxList corresponding to the subset of the input BoxList
- specified by indicator
- Raises:
- ValueError: if `indicator` is not a rank-1 boolean tensor.
- """
- with tf.name_scope(scope, 'BooleanMask'):
- if indicator.shape.ndims != 1:
- raise ValueError('indicator should have rank 1')
- if indicator.dtype != tf.bool:
- raise ValueError('indicator should be a boolean tensor')
- subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
- if fields is None:
- fields = boxlist.get_extra_fields()
- for field in fields:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
- subboxlist.add_field(field, subfieldlist)
- return subboxlist
-
-
-def gather(boxlist, indices, fields=None, scope=None):
- """Gather boxes from BoxList according to indices and return new BoxList.
-
- By default, `gather` returns boxes corresponding to the input index list, as
- well as all additional fields stored in the boxlist (indexing into the
- first dimension). However one can optionally only gather from a
- subset of fields.
-
- Args:
- boxlist: BoxList holding N boxes
- indices: a rank-1 tensor of type int32 / int64
- fields: (optional) list of fields to also gather from. If None (default),
- all fields are gathered from. Pass an empty fields list to only gather
- the box coordinates.
- scope: name scope.
-
- Returns:
- subboxlist: a BoxList corresponding to the subset of the input BoxList
- specified by indices
- Raises:
- ValueError: if specified field is not contained in boxlist or if the
- indices are not of type int32
- """
- with tf.name_scope(scope, 'Gather'):
- if len(indices.shape.as_list()) != 1:
- raise ValueError('indices should have rank 1')
- if indices.dtype != tf.int32 and indices.dtype != tf.int64:
- raise ValueError('indices should be an int32 / int64 tensor')
- subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
- if fields is None:
- fields = boxlist.get_extra_fields()
- for field in fields:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all specified fields')
- subfieldlist = tf.gather(boxlist.get_field(field), indices)
- subboxlist.add_field(field, subfieldlist)
- return subboxlist
-
-
-def concatenate(boxlists, fields=None, scope=None):
- """Concatenate list of BoxLists.
-
- This op concatenates a list of input BoxLists into a larger BoxList. It also
- handles concatenation of BoxList fields as long as the field tensor shapes
- are equal except for the first dimension.
-
- Args:
- boxlists: list of BoxList objects
- fields: optional list of fields to also concatenate. By default, all
- fields from the first BoxList in the list are included in the
- concatenation.
- scope: name scope.
-
- Returns:
- a BoxList with number of boxes equal to
- sum([boxlist.num_boxes() for boxlist in BoxList])
- Raises:
- ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
- contains non BoxList objects), or if requested fields are not contained in
- all boxlists
- """
- with tf.name_scope(scope, 'Concatenate'):
- if not isinstance(boxlists, list):
- raise ValueError('boxlists should be a list')
- if not boxlists:
- raise ValueError('boxlists should have nonzero length')
- for boxlist in boxlists:
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('all elements of boxlists should be BoxList objects')
- concatenated = box_list.BoxList(
- tf.concat([boxlist.get() for boxlist in boxlists], 0))
- if fields is None:
- fields = boxlists[0].get_extra_fields()
- for field in fields:
- first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
- first_field_shape[0] = -1
- if None in first_field_shape:
- raise ValueError('field %s must have fully defined shape except for the'
- ' 0th dimension.' % field)
- for boxlist in boxlists:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all requested fields')
- field_shape = boxlist.get_field(field).get_shape().as_list()
- field_shape[0] = -1
- if field_shape != first_field_shape:
- raise ValueError('field %s must have same shape for all boxlists '
- 'except for the 0th dimension.' % field)
- concatenated_field = tf.concat(
- [boxlist.get_field(field) for boxlist in boxlists], 0)
- concatenated.add_field(field, concatenated_field)
- return concatenated
-
-
-def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
- """Sort boxes and associated fields according to a scalar field.
-
- A common use case is reordering the boxes according to descending scores.
-
- Args:
- boxlist: BoxList holding N boxes.
- field: A BoxList field for sorting and reordering the BoxList.
- order: (Optional) descend or ascend. Default is descend.
- scope: name scope.
-
- Returns:
- sorted_boxlist: A sorted BoxList with the field in the specified order.
-
- Raises:
- ValueError: if specified field does not exist
- ValueError: if the order is not either descend or ascend
- """
- with tf.name_scope(scope, 'SortByField'):
- if order != SortOrder.descend and order != SortOrder.ascend:
- raise ValueError('Invalid sort order')
-
- field_to_sort = boxlist.get_field(field)
- if len(field_to_sort.shape.as_list()) != 1:
- raise ValueError('Field should have rank 1')
-
- num_boxes = boxlist.num_boxes()
- num_entries = tf.size(field_to_sort)
- length_assert = tf.Assert(
- tf.equal(num_boxes, num_entries),
- ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
-
- with tf.control_dependencies([length_assert]):
- # TODO: Remove with tf.device when top_k operation runs
- # correctly on GPU.
- with tf.device('/cpu:0'):
- _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
-
- if order == SortOrder.ascend:
- sorted_indices = tf.reverse_v2(sorted_indices, [0])
-
- return gather(boxlist, sorted_indices)
-
-
-def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
- """Overlay bounding box list on image.
-
- Currently this visualization plots a 1 pixel thick red bounding box on top
- of the image. Note that tf.image.draw_bounding_boxes essentially is
- 1 indexed.
-
- Args:
- image: an image tensor with shape [height, width, 3]
- boxlist: a BoxList
- normalized: (boolean) specify whether corners are to be interpreted
- as absolute coordinates in image space or normalized with respect to the
- image size.
- scope: name scope.
-
- Returns:
- image_and_boxes: an image tensor with shape [height, width, 3]
- """
- with tf.name_scope(scope, 'VisualizeBoxesInImage'):
- if not normalized:
- height, width, _ = tf.unstack(tf.shape(image))
- boxlist = scale(boxlist,
- 1.0 / tf.cast(height, tf.float32),
- 1.0 / tf.cast(width, tf.float32))
- corners = tf.expand_dims(boxlist.get(), 0)
- image = tf.expand_dims(image, 0)
- return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
-
-
-def filter_field_value_equals(boxlist, field, value, scope=None):
- """Filter to keep only boxes with field entries equal to the given value.
-
- Args:
- boxlist: BoxList holding N boxes.
- field: field name for filtering.
- value: scalar value.
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= N
-
- Raises:
- ValueError: if boxlist not a BoxList object or if it does not have
- the specified field.
- """
- with tf.name_scope(scope, 'FilterFieldValueEquals'):
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain the specified field')
- filter_field = boxlist.get_field(field)
- gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
- return gather(boxlist, gather_index)
-
-
-def filter_greater_than(boxlist, thresh, scope=None):
- """Filter to keep only boxes with score exceeding a given threshold.
-
- This op keeps the collection of boxes whose corresponding scores are
- greater than the input threshold.
-
- TODO: Change function name to filter_scores_greater_than
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores.
- thresh: scalar threshold
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= N
-
- Raises:
- ValueError: if boxlist not a BoxList object or if it does not
- have a scores field
- """
- with tf.name_scope(scope, 'FilterGreaterThan'):
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- scores = boxlist.get_field('scores')
- if len(scores.shape.as_list()) > 2:
- raise ValueError('Scores should have rank 1 or 2')
- if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
- raise ValueError('Scores should have rank 1 or have shape '
- 'consistent with [None, 1]')
- high_score_indices = tf.cast(tf.reshape(
- tf.where(tf.greater(scores, thresh)),
- [-1]), tf.int32)
- return gather(boxlist, high_score_indices)
-
-
-def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
- """Non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. Note that this only works for a single class ---
- to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores.
- thresh: scalar threshold
- max_output_size: maximum number of retained boxes
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes where M <= max_output_size
- Raises:
- ValueError: if thresh is not in [0, 1]
- """
- with tf.name_scope(scope, 'NonMaxSuppression'):
- if not 0 <= thresh <= 1.0:
- raise ValueError('thresh must be between 0 and 1')
- if not isinstance(boxlist, box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- selected_indices = tf.image.non_max_suppression(
- boxlist.get(), boxlist.get_field('scores'),
- max_output_size, iou_threshold=thresh)
- return gather(boxlist, selected_indices)
-
-
-def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
- """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
-
- Args:
- boxlist_to_copy_to: BoxList to which extra fields are copied.
- boxlist_to_copy_from: BoxList from which fields are copied.
-
- Returns:
- boxlist_to_copy_to with extra fields.
- """
- for field in boxlist_to_copy_from.get_extra_fields():
- boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
- return boxlist_to_copy_to
-
-
-def to_normalized_coordinates(boxlist, height, width,
- check_range=True, scope=None):
- """Converts absolute box coordinates to normalized coordinates in [0, 1].
-
- Usually one uses the dynamic shape of the image or conv-layer tensor:
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(images)[1],
- tf.shape(images)[2]),
-
- This function raises an assertion failed error at graph execution time when
- the maximum coordinate is smaller than 1.01 (which means that coordinates are
- already normalized). The value 1.01 is to deal with small rounding errors.
-
- Args:
- boxlist: BoxList with coordinates in terms of pixel-locations.
- height: Maximum value for height of absolute box coordinates.
- width: Maximum value for width of absolute box coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- scope: name scope.
-
- Returns:
- boxlist with normalized coordinates in [0, 1].
- """
- with tf.name_scope(scope, 'ToNormalizedCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- if check_range:
- max_val = tf.reduce_max(boxlist.get())
- max_assert = tf.Assert(tf.greater(max_val, 1.01),
- ['max value is lower than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(boxlist, 1 / height, 1 / width)
-
-
-def to_absolute_coordinates(boxlist,
- height,
- width,
- check_range=False,
- maximum_normalized_coordinate=1.01,
- scope=None):
- """Converts normalized box coordinates to absolute pixel coordinates.
-
- This function raises an assertion failed error when the maximum box coordinate
- value is larger than maximum_normalized_coordinate (in which case coordinates
- are already absolute).
-
- Args:
- boxlist: BoxList with coordinates in range [0, 1].
- height: Maximum value for height of absolute box coordinates.
- width: Maximum value for width of absolute box coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- maximum_normalized_coordinate: Maximum coordinate value to be considered
- as normalized, default to 1.01.
- scope: name scope.
-
- Returns:
- boxlist with absolute coordinates in terms of the image size.
-
- """
- with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- # Ensure range of input boxes is correct.
- if check_range:
- box_maximum = tf.reduce_max(boxlist.get())
- max_assert = tf.Assert(
- tf.greater_equal(1.1, box_maximum),
- ['maximum box coordinate value is larger '
- 'than 1.1: ', box_maximum])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(boxlist, height, width)
-
-
-def refine_boxes_multi_class(pool_boxes,
- num_classes,
- nms_iou_thresh,
- nms_max_detections,
- voting_iou_thresh=0.5):
- """Refines a pool of boxes using non max suppression and box voting.
-
- Box refinement is done independently for each class.
-
- Args:
- pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
- have a rank 1 'scores' field and a rank 1 'classes' field.
- num_classes: (int scalar) Number of classes.
- nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
- nms_max_detections: (int scalar) maximum output size for NMS.
- voting_iou_thresh: (float scalar) iou threshold for box voting.
-
- Returns:
- BoxList of refined boxes.
-
- Raises:
- ValueError: if
- a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
- b) pool_boxes is not a BoxList.
- c) pool_boxes does not have a scores and classes field.
- """
- if not 0.0 <= nms_iou_thresh <= 1.0:
- raise ValueError('nms_iou_thresh must be between 0 and 1')
- if not 0.0 <= voting_iou_thresh <= 1.0:
- raise ValueError('voting_iou_thresh must be between 0 and 1')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
- if not pool_boxes.has_field('classes'):
- raise ValueError('pool_boxes must have a \'classes\' field')
-
- refined_boxes = []
- for i in range(num_classes):
- boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
- refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
- nms_max_detections, voting_iou_thresh)
- refined_boxes.append(refined_boxes_class)
- return sort_by_field(concatenate(refined_boxes), 'scores')
-
-
-def refine_boxes(pool_boxes,
- nms_iou_thresh,
- nms_max_detections,
- voting_iou_thresh=0.5):
- """Refines a pool of boxes using non max suppression and box voting.
-
- Args:
- pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
- have a rank 1 'scores' field.
- nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
- nms_max_detections: (int scalar) maximum output size for NMS.
- voting_iou_thresh: (float scalar) iou threshold for box voting.
-
- Returns:
- BoxList of refined boxes.
-
- Raises:
- ValueError: if
- a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
- b) pool_boxes is not a BoxList.
- c) pool_boxes does not have a scores field.
- """
- if not 0.0 <= nms_iou_thresh <= 1.0:
- raise ValueError('nms_iou_thresh must be between 0 and 1')
- if not 0.0 <= voting_iou_thresh <= 1.0:
- raise ValueError('voting_iou_thresh must be between 0 and 1')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
-
- nms_boxes = non_max_suppression(
- pool_boxes, nms_iou_thresh, nms_max_detections)
- return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
-
-
-def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
- """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
-
- Performs box voting as described in 'Object detection via a multi-region &
- semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
- each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
- with iou overlap >= iou_thresh. The location of B is set to the weighted
- average location of boxes in S (scores are used for weighting). And the score
- of B is set to the average score of boxes in S.
-
- Args:
- selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
- boxes are usually selected from pool_boxes using non max suppression.
- pool_boxes: BoxList containing a set of (possibly redundant) boxes.
- iou_thresh: (float scalar) iou threshold for matching boxes in
- selected_boxes and pool_boxes.
-
- Returns:
- BoxList containing averaged locations and scores for each box in
- selected_boxes.
-
- Raises:
- ValueError: if
- a) selected_boxes or pool_boxes is not a BoxList.
- b) if iou_thresh is not in [0, 1].
- c) pool_boxes does not have a scores field.
- """
- if not 0.0 <= iou_thresh <= 1.0:
- raise ValueError('iou_thresh must be between 0 and 1')
- if not isinstance(selected_boxes, box_list.BoxList):
- raise ValueError('selected_boxes must be a BoxList')
- if not isinstance(pool_boxes, box_list.BoxList):
- raise ValueError('pool_boxes must be a BoxList')
- if not pool_boxes.has_field('scores'):
- raise ValueError('pool_boxes must have a \'scores\' field')
-
- iou_ = iou(selected_boxes, pool_boxes)
- match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
- num_matches = tf.reduce_sum(match_indicator, 1)
- # TODO: Handle the case where some boxes in selected_boxes do not
- # match to any boxes in pool_boxes. For such boxes without any matches, we
- # should return the original boxes without voting.
- match_assert = tf.Assert(
- tf.reduce_all(tf.greater(num_matches, 0)),
- ['Each box in selected_boxes must match with at least one box '
- 'in pool_boxes.'])
-
- scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
- scores_assert = tf.Assert(
- tf.reduce_all(tf.greater_equal(scores, 0)),
- ['Scores must be non negative.'])
-
- with tf.control_dependencies([scores_assert, match_assert]):
- sum_scores = tf.matmul(match_indicator, scores)
- averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
-
- box_locations = tf.matmul(match_indicator,
- pool_boxes.get() * scores) / sum_scores
- averaged_boxes = box_list.BoxList(box_locations)
- _copy_extra_fields(averaged_boxes, selected_boxes)
- averaged_boxes.add_field('scores', averaged_scores)
- return averaged_boxes
-
-
-def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
- """Pads or clips all fields of a BoxList.
-
- Args:
- boxlist: A BoxList with arbitrary of number of boxes.
- num_boxes: First num_boxes in boxlist are kept.
- The fields are zero-padded if num_boxes is bigger than the
- actual number of boxes.
- scope: name scope.
-
- Returns:
- BoxList with all fields padded or clipped.
- """
- with tf.name_scope(scope, 'PadOrClipBoxList'):
- subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
- boxlist.get(), num_boxes))
- for field in boxlist.get_extra_fields():
- subfield = shape_utils.pad_or_clip_tensor(
- boxlist.get_field(field), num_boxes)
- subboxlist.add_field(field, subfield)
- return subboxlist
diff --git a/object_detection/core/box_list_ops_test.py b/object_detection/core/box_list_ops_test.py
deleted file mode 100644
index 467bb3c6..00000000
--- a/object_detection/core/box_list_ops_test.py
+++ /dev/null
@@ -1,962 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_list_ops."""
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.framework import errors
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-
-
-class BoxListOpsTest(tf.test.TestCase):
- """Tests for common bounding box operations."""
-
- def test_area(self):
- corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
- exp_output = [200.0, 4.0]
- boxes = box_list.BoxList(corners)
- areas = box_list_ops.area(boxes)
- with self.test_session() as sess:
- areas_output = sess.run(areas)
- self.assertAllClose(areas_output, exp_output)
-
- def test_height_width(self):
- corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
- exp_output_heights = [10., 2.]
- exp_output_widths = [20., 2.]
- boxes = box_list.BoxList(corners)
- heights, widths = box_list_ops.height_width(boxes)
- with self.test_session() as sess:
- output_heights, output_widths = sess.run([heights, widths])
- self.assertAllClose(output_heights, exp_output_heights)
- self.assertAllClose(output_widths, exp_output_widths)
-
- def test_scale(self):
- corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
- dtype=tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2]]))
-
- y_scale = tf.constant(1.0/100)
- x_scale = tf.constant(1.0/200)
- scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
- exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
- with self.test_session() as sess:
- scaled_corners_out = sess.run(scaled_boxes.get())
- self.assertAllClose(scaled_corners_out, exp_output)
- extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2]])
-
- def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0],
- [-10.0, -10.0, -9.0, -9.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
- [0.0, 0.0, 9.0, 14.0]]
- pruned = box_list_ops.clip_to_window(
- boxes, window, filter_nonoverlapping=True)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
-
- def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0],
- [-10.0, -10.0, -9.0, -9.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
- [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
- pruned = box_list_ops.clip_to_window(
- boxes, window, filter_nonoverlapping=False)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
-
- def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
- self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-10.0, -10.0, -9.0, -9.0],
- [-100.0, -100.0, 300.0, 600.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0]]
- pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- keep_indices_out = sess.run(keep_indices)
- self.assertAllEqual(keep_indices_out, [0, 2, 3])
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [3], [4]])
-
- def test_prune_completely_outside_window(self):
- window = tf.constant([0, 0, 9, 14], tf.float32)
- corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-10.0, -10.0, -9.0, -9.0],
- [-100.0, -100.0, 300.0, 600.0]])
- boxes = box_list.BoxList(corners)
- boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
- exp_output = [[5.0, 5.0, 6.0, 6.0],
- [-1.0, -2.0, 4.0, 5.0],
- [2.0, 3.0, 5.0, 9.0],
- [0.0, 0.0, 9.0, 14.0],
- [-100.0, -100.0, 300.0, 600.0]]
- pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
- window)
- with self.test_session() as sess:
- pruned_output = sess.run(pruned.get())
- self.assertAllClose(pruned_output, exp_output)
- keep_indices_out = sess.run(keep_indices)
- self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
- extra_data_out = sess.run(pruned.get_field('extra_data'))
- self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
-
- def test_intersection(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- intersect = box_list_ops.intersection(boxes1, boxes2)
- with self.test_session() as sess:
- intersect_output = sess.run(intersect)
- self.assertAllClose(intersect_output, exp_output)
-
- def test_matched_intersection(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
- exp_output = [2.0, 0.0]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- intersect = box_list_ops.matched_intersection(boxes1, boxes2)
- with self.test_session() as sess:
- intersect_output = sess.run(intersect)
- self.assertAllClose(intersect_output, exp_output)
-
- def test_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou = box_list_ops.iou(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou)
- self.assertAllClose(iou_output, exp_output)
-
- def test_matched_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
- exp_output = [2.0 / 16.0, 0]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou = box_list_ops.matched_iou(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou)
- self.assertAllClose(iou_output, exp_output)
-
- def test_iouworks_on_empty_inputs(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
- iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
- iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
- iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
- with self.test_session() as sess:
- iou_output_1, iou_output_2, iou_output_3 = sess.run(
- [iou_empty_1, iou_empty_2, iou_empty_3])
- self.assertAllEqual(iou_output_1.shape, (2, 0))
- self.assertAllEqual(iou_output_2.shape, (0, 3))
- self.assertAllEqual(iou_output_3.shape, (0, 0))
-
- def test_ioa(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
- [1.0 / 12.0, 0.0, 5.0 / 400.0]]
- exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
- [0, 0],
- [6.0 / 6.0, 5.0 / 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- ioa_1 = box_list_ops.ioa(boxes1, boxes2)
- ioa_2 = box_list_ops.ioa(boxes2, boxes1)
- with self.test_session() as sess:
- ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
- self.assertAllClose(ioa_output_1, exp_output_1)
- self.assertAllClose(ioa_output_2, exp_output_2)
-
- def test_prune_non_overlapping_boxes(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- minoverlap = 0.5
-
- exp_output_1 = boxes1
- exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
- output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
- boxes1, boxes2, min_overlap=minoverlap)
- output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
- boxes2, boxes1, min_overlap=minoverlap)
- with self.test_session() as sess:
- (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
- exp_output_2_) = sess.run(
- [output_1.get(), keep_indices_1,
- output_2.get(), keep_indices_2,
- exp_output_1.get(), exp_output_2.get()])
- self.assertAllClose(output_1_, exp_output_1_)
- self.assertAllClose(output_2_, exp_output_2_)
- self.assertAllEqual(keep_indices_1_, [0, 1])
- self.assertAllEqual(keep_indices_2_, [])
-
- def test_prune_small_boxes(self):
- boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
- [5.0, 6.0, 10.0, 7.0],
- [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_boxes = [[3.0, 4.0, 6.0, 8.0],
- [0.0, 0.0, 20.0, 20.0]]
- boxes = box_list.BoxList(boxes)
- pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
- with self.test_session() as sess:
- pruned_boxes = sess.run(pruned_boxes.get())
- self.assertAllEqual(pruned_boxes, exp_boxes)
-
- def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
- boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
- [5.0, 6.0, 10.0, 7.0],
- [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0],
- [2.0, 3.0, 1.5, 7.0], # negative height
- [2.0, 3.0, 5.0, 1.7]]) # negative width
- exp_boxes = [[3.0, 4.0, 6.0, 8.0],
- [0.0, 0.0, 20.0, 20.0]]
- boxes = box_list.BoxList(boxes)
- pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
- with self.test_session() as sess:
- pruned_boxes = sess.run(pruned_boxes.get())
- self.assertAllEqual(pruned_boxes, exp_boxes)
-
- def test_change_coordinate_frame(self):
- corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
- boxes = box_list.BoxList(corners)
-
- expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
- expected_boxes = box_list.BoxList(expected_corners)
- output = box_list_ops.change_coordinate_frame(boxes, window)
-
- with self.test_session() as sess:
- output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
- self.assertAllClose(output_, expected_boxes_)
-
- def test_ioaworks_on_empty_inputs(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
- ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
- ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
- ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
- with self.test_session() as sess:
- ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
- [ioa_empty_1, ioa_empty_2, ioa_empty_3])
- self.assertAllEqual(ioa_output_1.shape, (2, 0))
- self.assertAllEqual(ioa_output_2.shape, (0, 3))
- self.assertAllEqual(ioa_output_3.shape, (0, 0))
-
- def test_pairwise_distances(self):
- corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 2.0]])
- corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
- [-4.0, 0.0, 0.0, 3.0],
- [0.0, 0.0, 0.0, 0.0]])
- exp_output = [[26, 25, 0], [18, 27, 6]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
- with self.test_session() as sess:
- dist_output = sess.run(dist_matrix)
- self.assertAllClose(dist_output, exp_output)
-
- def test_boolean_mask(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indicator = tf.constant([True, False, True, False, True], tf.bool)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- boxes = box_list.BoxList(corners)
- subset = box_list_ops.boolean_mask(boxes, indicator)
- with self.test_session() as sess:
- subset_output = sess.run(subset.get())
- self.assertAllClose(subset_output, expected_subset)
-
- def test_boolean_mask_with_field(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indicator = tf.constant([True, False, True, False, True], tf.bool)
- weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [[.1], [.5], [.9]]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run(
- [subset.get(), subset.get_field('weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_gather(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indices = tf.constant([0, 2, 4], tf.int32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- boxes = box_list.BoxList(corners)
- subset = box_list_ops.gather(boxes, indices)
- with self.test_session() as sess:
- subset_output = sess.run(subset.get())
- self.assertAllClose(subset_output, expected_subset)
-
- def test_gather_with_field(self):
- corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]])
- indices = tf.constant([0, 2, 4], tf.int32)
- weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [[.1], [.5], [.9]]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.gather(boxes, indices, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run(
- [subset.get(), subset.get_field('weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_gather_with_invalid_field(self):
- corners = tf.constant([4 * [0.0], 4 * [1.0]])
- indices = tf.constant([0, 1], tf.int32)
- weights = tf.constant([[.1], [.3]], tf.float32)
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- with self.assertRaises(ValueError):
- box_list_ops.gather(boxes, indices, ['foo', 'bar'])
-
- def test_gather_with_invalid_inputs(self):
- corners = tf.constant(
- [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
- indices_float32 = tf.constant([0, 2, 4], tf.float32)
- boxes = box_list.BoxList(corners)
- with self.assertRaises(ValueError):
- _ = box_list_ops.gather(boxes, indices_float32)
- indices_2d = tf.constant([[0, 2, 4]], tf.int32)
- boxes = box_list.BoxList(corners)
- with self.assertRaises(ValueError):
- _ = box_list_ops.gather(boxes, indices_2d)
-
- def test_gather_with_dynamic_indexing(self):
- corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
- ])
- weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
- indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
- expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
- expected_weights = [.5, .7, .9]
-
- boxes = box_list.BoxList(corners)
- boxes.add_field('weights', weights)
- subset = box_list_ops.gather(boxes, indices, ['weights'])
- with self.test_session() as sess:
- subset_output, weights_output = sess.run([subset.get(), subset.get_field(
- 'weights')])
- self.assertAllClose(subset_output, expected_subset)
- self.assertAllClose(weights_output, expected_weights)
-
- def test_sort_by_field_ascending_order(self):
- exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
- [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
- exp_scores = [.95, .9, .75, .6, .5, .3]
- exp_weights = [.2, .45, .6, .75, .8, .92]
- shuffle = [2, 4, 0, 5, 1, 3]
- corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(
- [exp_scores[i] for i in shuffle], tf.float32))
- boxes.add_field('weights', tf.constant(
- [exp_weights[i] for i in shuffle], tf.float32))
- sort_by_weight = box_list_ops.sort_by_field(
- boxes,
- 'weights',
- order=box_list_ops.SortOrder.ascend)
- with self.test_session() as sess:
- corners_out, scores_out, weights_out = sess.run([
- sort_by_weight.get(),
- sort_by_weight.get_field('scores'),
- sort_by_weight.get_field('weights')])
- self.assertAllClose(corners_out, exp_corners)
- self.assertAllClose(scores_out, exp_scores)
- self.assertAllClose(weights_out, exp_weights)
-
- def test_sort_by_field_descending_order(self):
- exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
- [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
- exp_scores = [.95, .9, .75, .6, .5, .3]
- exp_weights = [.2, .45, .6, .75, .8, .92]
- shuffle = [2, 4, 0, 5, 1, 3]
-
- corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(
- [exp_scores[i] for i in shuffle], tf.float32))
- boxes.add_field('weights', tf.constant(
- [exp_weights[i] for i in shuffle], tf.float32))
-
- sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
- with self.test_session() as sess:
- corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
- ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
- self.assertAllClose(corners_out, exp_corners)
- self.assertAllClose(scores_out, exp_scores)
- self.assertAllClose(weights_out, exp_weights)
-
- def test_sort_by_field_invalid_inputs(self):
- corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
- [3.0], 4 * [4.0]])
- misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
- weights = tf.constant([.1, .2], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('misc', misc)
- boxes.add_field('weights', weights)
-
- with self.test_session() as sess:
- with self.assertRaises(ValueError):
- box_list_ops.sort_by_field(boxes, 'area')
-
- with self.assertRaises(ValueError):
- box_list_ops.sort_by_field(boxes, 'misc')
-
- with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
- 'Incorrect field size'):
- sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
-
- def test_visualize_boxes_in_image(self):
- image = tf.zeros((6, 4, 3))
- corners = tf.constant([[0, 0, 5, 3],
- [0, 0, 3, 2]], tf.float32)
- boxes = box_list.BoxList(corners)
- image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
- image_and_boxes_bw = tf.to_float(
- tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
- exp_result = [[1, 1, 1, 0],
- [1, 1, 1, 0],
- [1, 1, 1, 0],
- [1, 0, 1, 0],
- [1, 1, 1, 0],
- [0, 0, 0, 0]]
- with self.test_session() as sess:
- output = sess.run(image_and_boxes_bw)
- self.assertAllEqual(output.astype(int), exp_result)
-
- def test_filter_field_value_equals(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
- exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
- exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
-
- filtered_boxes1 = box_list_ops.filter_field_value_equals(
- boxes, 'classes', 1)
- filtered_boxes2 = box_list_ops.filter_field_value_equals(
- boxes, 'classes', 2)
- with self.test_session() as sess:
- filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
- filtered_boxes2.get()])
- self.assertAllClose(filtered_output1, exp_output1)
- self.assertAllClose(filtered_output2, exp_output2)
-
- def test_filter_greater_than(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
- thresh = .6
- exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
-
- filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
- with self.test_session() as sess:
- filtered_output = sess.run(filtered_boxes.get())
- self.assertAllClose(filtered_output, exp_output)
-
- def test_clip_box_list(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
- num_boxes = 2
- clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 0]
- expected_scores = [0.75, 0.65]
- with self.test_session() as sess:
- boxes_out, classes_out, scores_out = sess.run(
- [clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
- clipped_boxlist.get_field('scores')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllEqual(expected_classes, classes_out)
- self.assertAllClose(expected_scores, scores_out)
-
- def test_pad_box_list(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- num_boxes = 4
- padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0, 0, 0, 0], [0, 0, 0, 0]]
- expected_classes = [0, 1, 0, 0]
- expected_scores = [0.75, 0.2, 0, 0]
- with self.test_session() as sess:
- boxes_out, classes_out, scores_out = sess.run(
- [padded_boxlist.get(), padded_boxlist.get_field('classes'),
- padded_boxlist.get_field('scores')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllEqual(expected_classes, classes_out)
- self.assertAllClose(expected_scores, scores_out)
-
-
-class ConcatenateTest(tf.test.TestCase):
-
- def test_invalid_input_box_list_list(self):
- with self.assertRaises(ValueError):
- box_list_ops.concatenate(None)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([])
- with self.assertRaises(ValueError):
- corners = tf.constant([[0, 0, 0, 0]], tf.float32)
- boxlist = box_list.BoxList(corners)
- box_list_ops.concatenate([boxlist, 2])
-
- def test_concatenate_with_missing_fields(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([boxlist1, boxlist2])
-
- def test_concatenate_with_incompatible_field_shapes(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
- scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- boxlist2.add_field('scores', scores2)
- with self.assertRaises(ValueError):
- box_list_ops.concatenate([boxlist1, boxlist2])
-
- def test_concatenate_is_correct(self):
- corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
- scores1 = tf.constant([1.0, 2.1])
- corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
- tf.float32)
- scores2 = tf.constant([1.0, 2.1, 5.6])
-
- exp_corners = [[0, 0, 0, 0],
- [1, 2, 3, 4],
- [0, 3, 1, 6],
- [2, 4, 3, 8],
- [1, 0, 5, 10]]
- exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
-
- boxlist1 = box_list.BoxList(corners1)
- boxlist1.add_field('scores', scores1)
- boxlist2 = box_list.BoxList(corners2)
- boxlist2.add_field('scores', scores2)
- result = box_list_ops.concatenate([boxlist1, boxlist2])
- with self.test_session() as sess:
- corners_output, scores_output = sess.run(
- [result.get(), result.get_field('scores')])
- self.assertAllClose(corners_output, exp_corners)
- self.assertAllClose(scores_output, exp_scores)
-
-
-class NonMaxSuppressionTest(tf.test.TestCase):
-
- def test_with_invalid_scores_field(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
- iou_thresh = .5
- max_output_size = 3
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- with self.assertRaisesWithPredicateMatch(
- errors.InvalidArgumentError, 'scores has incompatible shape'):
- sess.run(nms.get())
-
- def test_select_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_at_most_two_boxes_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 2
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_at_most_thirty_boxes_from_three_clusters(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
- iou_thresh = .5
- max_output_size = 30
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_single_box(self):
- corners = tf.constant([[0, 0, 1, 1]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant([.9]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_select_from_ten_identical_boxes(self):
- corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
- boxes = box_list.BoxList(corners)
- boxes.add_field('scores', tf.constant(10 * [.9]))
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 0, 1, 1]]
- nms = box_list_ops.non_max_suppression(
- boxes, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_copy_extra_fields(self):
- corners = tf.constant([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1]], tf.float32)
- boxes = box_list.BoxList(corners)
- tensor1 = np.array([[1], [4]])
- tensor2 = np.array([[1, 1], [2, 2]])
- boxes.add_field('tensor1', tf.constant(tensor1))
- boxes.add_field('tensor2', tf.constant(tensor2))
- new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
- [1, 3, 5, 5]], tf.float32))
- new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
- with self.test_session() as sess:
- self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
- self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
-
-
-class CoordinatesConversionTest(tf.test.TestCase):
-
- def test_to_normalized_coordinates(self):
- coordinates = tf.constant([[0, 0, 100, 100],
- [25, 25, 75, 75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- normalized_boxlist = box_list_ops.to_normalized_coordinates(
- boxlist, tf.shape(img)[1], tf.shape(img)[2])
- expected_boxes = [[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]]
-
- with self.test_session() as sess:
- normalized_boxes = sess.run(normalized_boxlist.get())
- self.assertAllClose(normalized_boxes, expected_boxes)
-
- def test_to_normalized_coordinates_already_normalized(self):
- coordinates = tf.constant([[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- normalized_boxlist = box_list_ops.to_normalized_coordinates(
- boxlist, tf.shape(img)[1], tf.shape(img)[2])
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(normalized_boxlist.get())
-
- def test_to_absolute_coordinates(self):
- coordinates = tf.constant([[0, 0, 1, 1],
- [0.25, 0.25, 0.75, 0.75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- expected_boxes = [[0, 0, 100, 100],
- [25, 25, 75, 75]]
-
- with self.test_session() as sess:
- absolute_boxes = sess.run(absolute_boxlist.get())
- self.assertAllClose(absolute_boxes, expected_boxes)
-
- def test_to_absolute_coordinates_already_abolute(self):
- coordinates = tf.constant([[0, 0, 100, 100],
- [25, 25, 75, 75]], tf.float32)
- img = tf.ones((128, 100, 100, 3))
- boxlist = box_list.BoxList(coordinates)
- absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(absolute_boxlist.get())
-
- def test_convert_to_normalized_and_back(self):
- coordinates = np.random.uniform(size=(100, 4))
- coordinates = np.round(np.sort(coordinates) * 200)
- coordinates[:, 2:4] += 1
- coordinates[99, :] = [0, 0, 201, 201]
- img = tf.ones((128, 202, 202, 3))
-
- boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- out = sess.run(boxlist.get())
- self.assertAllClose(out, coordinates)
-
- def test_convert_to_absolute_and_back(self):
- coordinates = np.random.uniform(size=(100, 4))
- coordinates = np.sort(coordinates)
- coordinates[99, :] = [0, 0, 1, 1]
- img = tf.ones((128, 202, 202, 3))
-
- boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
- boxlist = box_list_ops.to_absolute_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
- boxlist = box_list_ops.to_normalized_coordinates(boxlist,
- tf.shape(img)[1],
- tf.shape(img)[2])
-
- with self.test_session() as sess:
- out = sess.run(boxlist.get())
- self.assertAllClose(out, coordinates)
-
-
-class BoxRefinementTest(tf.test.TestCase):
-
- def test_box_voting(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
- candidates.add_field('ExtraField', tf.constant([1, 2]))
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
- expected_scores = [0.5, 0.3]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [averaged_boxes.get(), averaged_boxes.get_field('scores'),
- averaged_boxes.get_field('ExtraField')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [1, 2])
-
- def test_box_voting_fails_with_negative_scores(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool.add_field('scores', tf.constant([-0.2]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('Scores must be non negative'):
- sess.run([averaged_boxes.get()])
-
- def test_box_voting_fails_when_unmatched(self):
- candidates = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
- pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('scores', tf.constant([0.2]))
- averaged_boxes = box_list_ops.box_voting(candidates, pool)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('Each box in selected_boxes must match '
- 'with at least one box in pool_boxes.'):
- sess.run([averaged_boxes.get()])
-
- def test_refine_boxes(self):
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8]], tf.float32))
- pool.add_field('ExtraField', tf.constant([1, 2, 3]))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
- refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
-
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
- expected_scores = [0.5, 0.3]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [refined_boxes.get(), refined_boxes.get_field('scores'),
- refined_boxes.get_field('ExtraField')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [1, 3])
-
- def test_refine_boxes_multi_class(self):
- pool = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
- [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
- pool.add_field('classes', tf.constant([0, 0, 1, 1]))
- pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
- refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
-
- expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
- [0.2, 0.2, 0.3, 0.3]]
- expected_scores = [0.5, 0.3, 0.2]
- with self.test_session() as sess:
- boxes_out, scores_out, extra_field_out = sess.run(
- [refined_boxes.get(), refined_boxes.get_field('scores'),
- refined_boxes.get_field('classes')])
-
- self.assertAllClose(expected_boxes, boxes_out)
- self.assertAllClose(expected_scores, scores_out)
- self.assertAllEqual(extra_field_out, [0, 1, 1])
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/box_list_test.py b/object_detection/core/box_list_test.py
deleted file mode 100644
index edc00ebb..00000000
--- a/object_detection/core/box_list_test.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_list."""
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-
-
-class BoxListTest(tf.test.TestCase):
- """Tests for BoxList class."""
-
- def test_num_boxes(self):
- data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
- expected_num_boxes = 3
-
- boxes = box_list.BoxList(data)
- with self.test_session() as sess:
- num_boxes_output = sess.run(boxes.num_boxes())
- self.assertEquals(num_boxes_output, expected_num_boxes)
-
- def test_get_correct_center_coordinates_and_sizes(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(boxes))
- centers_sizes = boxes.get_center_coordinates_and_sizes()
- expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
- with self.test_session() as sess:
- centers_sizes_out = sess.run(centers_sizes)
- self.assertAllClose(centers_sizes_out, expected_centers_sizes)
-
- def test_create_box_list_with_dynamic_shape(self):
- data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
- indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
- data = tf.gather(data, indices)
- assert data.get_shape().as_list() == [None, 4]
- expected_num_boxes = 2
-
- boxes = box_list.BoxList(data)
- with self.test_session() as sess:
- num_boxes_output = sess.run(boxes.num_boxes())
- self.assertEquals(num_boxes_output, expected_num_boxes)
-
- def test_transpose_coordinates(self):
- boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(boxes))
- boxes.transpose_coordinates()
- expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
- with self.test_session() as sess:
- corners_out = sess.run(boxes.get())
- self.assertAllClose(corners_out, expected_corners)
-
- def test_box_list_invalid_inputs(self):
- data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
- data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
- data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
-
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data0)
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data1)
- with self.assertRaises(ValueError):
- _ = box_list.BoxList(data2)
-
- def test_num_boxes_static(self):
- box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- self.assertEquals(boxes.num_boxes_static(), 2)
- self.assertEquals(type(boxes.num_boxes_static()), int)
-
- def test_num_boxes_static_for_uninferrable_shape(self):
- placeholder = tf.placeholder(tf.float32, shape=[None, 4])
- boxes = box_list.BoxList(placeholder)
- self.assertEquals(boxes.num_boxes_static(), None)
-
- def test_as_tensor_dict(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- tensor_dict = boxlist.as_tensor_dict()
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 1]
- expected_scores = [0.75, 0.2]
-
- with self.test_session() as sess:
- tensor_dict_out = sess.run(tensor_dict)
- self.assertAllEqual(3, len(tensor_dict_out))
- self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
- self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
- self.assertAllClose(expected_scores, tensor_dict_out['scores'])
-
- def test_as_tensor_dict_with_features(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
-
- expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
- expected_classes = [0, 1]
- expected_scores = [0.75, 0.2]
-
- with self.test_session() as sess:
- tensor_dict_out = sess.run(tensor_dict)
- self.assertAllEqual(3, len(tensor_dict_out))
- self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
- self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
- self.assertAllClose(expected_scores, tensor_dict_out['scores'])
-
- def test_as_tensor_dict_missing_field(self):
- boxlist = box_list.BoxList(
- tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
- boxlist.add_field('classes', tf.constant([0, 1]))
- boxlist.add_field('scores', tf.constant([0.75, 0.2]))
- with self.assertRaises(ValueError):
- boxlist.as_tensor_dict(['foo', 'bar'])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/box_predictor.py b/object_detection/core/box_predictor.py
deleted file mode 100644
index 8378a8ea..00000000
--- a/object_detection/core/box_predictor.py
+++ /dev/null
@@ -1,566 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Box predictor for object detectors.
-
-Box predictors are classes that take a high level
-image feature map as input and produce two predictions,
-(1) a tensor encoding box locations, and
-(2) a tensor encoding classes for each box.
-
-These components are passed directly to loss functions
-in our detection models.
-
-These modules are separated from the main model since the same
-few box predictor architectures are shared across many models.
-"""
-from abc import abstractmethod
-import tensorflow as tf
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-from object_detection.utils import static_shape
-
-slim = tf.contrib.slim
-
-BOX_ENCODINGS = 'box_encodings'
-CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
-MASK_PREDICTIONS = 'mask_predictions'
-
-
-class BoxPredictor(object):
- """BoxPredictor."""
-
- def __init__(self, is_training, num_classes):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- """
- self._is_training = is_training
- self._num_classes = num_classes
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def predict(self, image_features, num_predictions_per_location, scope,
- **params):
- """Computes encoded object locations and corresponding confidences.
-
- Takes a high level image feature map as input and produce two predictions,
- (1) a tensor encoding box locations, and
- (2) a tensor encoding class scores for each corresponding box.
- In this interface, we only assume that two tensors are returned as output
- and do not assume anything about their shapes.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
- num_predictions_per_location: an integer representing the number of box
- predictions to be made per spatial location in the feature map.
- scope: Variable and Op scope name.
- **params: Additional keyword arguments for specific implementations of
- BoxPredictor.
-
- Returns:
- A dictionary containing at least the following tensors.
- box_encodings: A float tensor of shape
- [batch_size, num_anchors, q, code_size] representing the location of
- the objects, where q is 1 or the number of classes.
- class_predictions_with_background: A float tensor of shape
- [batch_size, num_anchors, num_classes + 1] representing the class
- predictions for the proposals.
- """
- with tf.variable_scope(scope):
- return self._predict(image_features, num_predictions_per_location,
- **params)
-
- # TODO: num_predictions_per_location could be moved to constructor.
- # This is currently only used by ConvolutionalBoxPredictor.
- @abstractmethod
- def _predict(self, image_features, num_predictions_per_location, **params):
- """Implementations must override this method.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
- num_predictions_per_location: an integer representing the number of box
- predictions to be made per spatial location in the feature map.
- **params: Additional keyword arguments for specific implementations of
- BoxPredictor.
-
- Returns:
- A dictionary containing at least the following tensors.
- box_encodings: A float tensor of shape
- [batch_size, num_anchors, q, code_size] representing the location of
- the objects, where q is 1 or the number of classes.
- class_predictions_with_background: A float tensor of shape
- [batch_size, num_anchors, num_classes + 1] representing the class
- predictions for the proposals.
- """
- pass
-
-
-class RfcnBoxPredictor(BoxPredictor):
- """RFCN Box Predictor.
-
- Applies a position sensitve ROI pooling on position sensitive feature maps to
- predict classes and refined locations. See https://arxiv.org/abs/1605.06409
- for details.
-
- This is used for the second stage of the RFCN meta architecture. Notice that
- locations are *not* shared across classes, thus for each anchor, a separate
- prediction is made for each class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- conv_hyperparams,
- num_spatial_bins,
- depth,
- crop_size,
- box_code_size):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
- layers.
- num_spatial_bins: A list of two integers `[spatial_bins_y,
- spatial_bins_x]`.
- depth: Target depth to reduce the input feature maps to.
- crop_size: A list of two integers `[crop_height, crop_width]`.
- box_code_size: Size of encoding for each box.
- """
- super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
- self._conv_hyperparams = conv_hyperparams
- self._num_spatial_bins = num_spatial_bins
- self._depth = depth
- self._crop_size = crop_size
- self._box_code_size = box_code_size
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def _predict(self, image_features, num_predictions_per_location,
- proposal_boxes):
- """Computes encoded object locations and corresponding confidences.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
- num_predictions_per_location: an integer representing the number of box
- predictions to be made per spatial location in the feature map.
- Currently, this must be set to 1, or an error will be raised.
- proposal_boxes: A float tensor of shape [batch_size, num_proposals,
- box_code_size].
-
- Returns:
- box_encodings: A float tensor of shape
- [batch_size, 1, num_classes, code_size] representing the
- location of the objects.
- class_predictions_with_background: A float tensor of shape
- [batch_size, 1, num_classes + 1] representing the class
- predictions for the proposals.
- Raises:
- ValueError: if num_predictions_per_location is not 1.
- """
- if num_predictions_per_location != 1:
- raise ValueError('Currently RfcnBoxPredictor only supports '
- 'predicting a single box per class per location.')
-
- batch_size = tf.shape(proposal_boxes)[0]
- num_boxes = tf.shape(proposal_boxes)[1]
- def get_box_indices(proposals):
- proposals_shape = proposals.get_shape().as_list()
- if any(dim is None for dim in proposals_shape):
- proposals_shape = tf.shape(proposals)
- ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
- multiplier = tf.expand_dims(
- tf.range(start=0, limit=proposals_shape[0]), 1)
- return tf.reshape(ones_mat * multiplier, [-1])
-
- net = image_features
- with slim.arg_scope(self._conv_hyperparams):
- net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
- # Location predictions.
- location_feature_map_depth = (self._num_spatial_bins[0] *
- self._num_spatial_bins[1] *
- self.num_classes *
- self._box_code_size)
- location_feature_map = slim.conv2d(net, location_feature_map_depth,
- [1, 1], activation_fn=None,
- scope='refined_locations')
- box_encodings = ops.position_sensitive_crop_regions(
- location_feature_map,
- boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
- box_ind=get_box_indices(proposal_boxes),
- crop_size=self._crop_size,
- num_spatial_bins=self._num_spatial_bins,
- global_pool=True)
- box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
- box_encodings = tf.reshape(box_encodings,
- [batch_size * num_boxes, 1, self.num_classes,
- self._box_code_size])
-
- # Class predictions.
- total_classes = self.num_classes + 1 # Account for background class.
- class_feature_map_depth = (self._num_spatial_bins[0] *
- self._num_spatial_bins[1] *
- total_classes)
- class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
- activation_fn=None,
- scope='class_predictions')
- class_predictions_with_background = ops.position_sensitive_crop_regions(
- class_feature_map,
- boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
- box_ind=get_box_indices(proposal_boxes),
- crop_size=self._crop_size,
- num_spatial_bins=self._num_spatial_bins,
- global_pool=True)
- class_predictions_with_background = tf.squeeze(
- class_predictions_with_background, squeeze_dims=[1, 2])
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- [batch_size * num_boxes, 1, total_classes])
-
- return {BOX_ENCODINGS: box_encodings,
- CLASS_PREDICTIONS_WITH_BACKGROUND:
- class_predictions_with_background}
-
-
-class MaskRCNNBoxPredictor(BoxPredictor):
- """Mask R-CNN Box Predictor.
-
- See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
- Mask R-CNN. arXiv preprint arXiv:1703.06870.
-
- This is used for the second stage of the Mask R-CNN detector where proposals
- cropped from an image are arranged along the batch dimension of the input
- image_features tensor. Notice that locations are *not* shared across classes,
- thus for each anchor, a separate prediction is made for each class.
-
- In addition to predicting boxes and classes, optionally this class allows
- predicting masks and/or keypoints inside detection boxes.
-
- Currently this box predictor makes per-class predictions; that is, each
- anchor makes a separate box prediction for each class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- fc_hyperparams,
- use_dropout,
- dropout_keep_prob,
- box_code_size,
- conv_hyperparams=None,
- predict_instance_masks=False,
- mask_height=14,
- mask_width=14,
- mask_prediction_conv_depth=256,
- predict_keypoints=False):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- fc_hyperparams: Slim arg_scope with hyperparameters for fully
- connected ops.
- use_dropout: Option to use dropout or not. Note that a single dropout
- op is applied here prior to both box and class predictions, which stands
- in contrast to the ConvolutionalBoxPredictor below.
- dropout_keep_prob: Keep probability for dropout.
- This is only used if use_dropout is True.
- box_code_size: Size of encoding for each box.
- conv_hyperparams: Slim arg_scope with hyperparameters for convolution
- ops.
- predict_instance_masks: Whether to predict object masks inside detection
- boxes.
- mask_height: Desired output mask height. The default value is 14.
- mask_width: Desired output mask width. The default value is 14.
- mask_prediction_conv_depth: The depth for the first conv2d_transpose op
- applied to the image_features in the mask prediciton branch.
- predict_keypoints: Whether to predict keypoints insde detection boxes.
-
-
- Raises:
- ValueError: If predict_instance_masks or predict_keypoints is true.
- """
- super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
- self._fc_hyperparams = fc_hyperparams
- self._use_dropout = use_dropout
- self._box_code_size = box_code_size
- self._dropout_keep_prob = dropout_keep_prob
- self._conv_hyperparams = conv_hyperparams
- self._predict_instance_masks = predict_instance_masks
- self._mask_height = mask_height
- self._mask_width = mask_width
- self._mask_prediction_conv_depth = mask_prediction_conv_depth
- self._predict_keypoints = predict_keypoints
- if self._predict_keypoints:
- raise ValueError('Keypoint prediction is unimplemented.')
- if ((self._predict_instance_masks or self._predict_keypoints) and
- self._conv_hyperparams is None):
- raise ValueError('`conv_hyperparams` must be provided when predicting '
- 'masks.')
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def _predict(self, image_features, num_predictions_per_location):
- """Computes encoded object locations and corresponding confidences.
-
- Flattens image_features and applies fully connected ops (with no
- non-linearity) to predict box encodings and class predictions. In this
- setting, anchors are not spatially arranged in any way and are assumed to
- have been folded into the batch dimension. Thus we output 1 for the
- anchors dimension.
-
- Also optionally predicts instance masks.
- The mask prediction head is based on the Mask RCNN paper with the following
- modifications: We replace the deconvolution layer with a bilinear resize
- and a convolution.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
- num_predictions_per_location: an integer representing the number of box
- predictions to be made per spatial location in the feature map.
- Currently, this must be set to 1, or an error will be raised.
-
- Returns:
- A dictionary containing the following tensors.
- box_encodings: A float tensor of shape
- [batch_size, 1, num_classes, code_size] representing the
- location of the objects.
- class_predictions_with_background: A float tensor of shape
- [batch_size, 1, num_classes + 1] representing the class
- predictions for the proposals.
- If predict_masks is True the dictionary also contains:
- instance_masks: A float tensor of shape
- [batch_size, 1, num_classes, image_height, image_width]
- If predict_keypoints is True the dictionary also contains:
- keypoints: [batch_size, 1, num_keypoints, 2]
-
- Raises:
- ValueError: if num_predictions_per_location is not 1.
- """
- if num_predictions_per_location != 1:
- raise ValueError('Currently FullyConnectedBoxPredictor only supports '
- 'predicting a single box per class per location.')
- spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2],
- keep_dims=True,
- name='AvgPool')
- flattened_image_features = slim.flatten(spatial_averaged_image_features)
- if self._use_dropout:
- flattened_image_features = slim.dropout(flattened_image_features,
- keep_prob=self._dropout_keep_prob,
- is_training=self._is_training)
- with slim.arg_scope(self._fc_hyperparams):
- box_encodings = slim.fully_connected(
- flattened_image_features,
- self._num_classes * self._box_code_size,
- activation_fn=None,
- scope='BoxEncodingPredictor')
- class_predictions_with_background = slim.fully_connected(
- flattened_image_features,
- self._num_classes + 1,
- activation_fn=None,
- scope='ClassPredictor')
- box_encodings = tf.reshape(
- box_encodings, [-1, 1, self._num_classes, self._box_code_size])
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background, [-1, 1, self._num_classes + 1])
-
- predictions_dict = {
- BOX_ENCODINGS: box_encodings,
- CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background
- }
-
- if self._predict_instance_masks:
- with slim.arg_scope(self._conv_hyperparams):
- upsampled_features = tf.image.resize_bilinear(
- image_features,
- [self._mask_height, self._mask_width],
- align_corners=True)
- upsampled_features = slim.conv2d(
- upsampled_features,
- num_outputs=self._mask_prediction_conv_depth,
- kernel_size=[2, 2])
- mask_predictions = slim.conv2d(upsampled_features,
- num_outputs=self.num_classes,
- activation_fn=None,
- kernel_size=[3, 3])
- instance_masks = tf.expand_dims(tf.transpose(mask_predictions,
- perm=[0, 3, 1, 2]),
- axis=1,
- name='MaskPredictor')
- predictions_dict[MASK_PREDICTIONS] = instance_masks
- return predictions_dict
-
-
-class ConvolutionalBoxPredictor(BoxPredictor):
- """Convolutional Box Predictor.
-
- Optionally add an intermediate 1x1 convolutional layer after features and
- predict in parallel branches box_encodings and
- class_predictions_with_background.
-
- Currently this box predictor assumes that predictions are "shared" across
- classes --- that is each anchor makes box predictions which do not depend
- on class.
- """
-
- def __init__(self,
- is_training,
- num_classes,
- conv_hyperparams,
- min_depth,
- max_depth,
- num_layers_before_predictor,
- use_dropout,
- dropout_keep_prob,
- kernel_size,
- box_code_size,
- apply_sigmoid_to_scores=False,
- class_prediction_bias_init=0.0):
- """Constructor.
-
- Args:
- is_training: Indicates whether the BoxPredictor is in training mode.
- num_classes: number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
- min_depth: Minumum feature depth prior to predicting box encodings
- and class predictions.
- max_depth: Maximum feature depth prior to predicting box encodings
- and class predictions. If max_depth is set to 0, no additional
- feature map will be inserted before location and class predictions.
- num_layers_before_predictor: Number of the additional conv layers before
- the predictor.
- use_dropout: Option to use dropout for class prediction or not.
- dropout_keep_prob: Keep probability for dropout.
- This is only used if use_dropout is True.
- kernel_size: Size of final convolution kernel. If the
- spatial resolution of the feature map is smaller than the kernel size,
- then the kernel size is automatically set to be
- min(feature_width, feature_height).
- box_code_size: Size of encoding for each box.
- apply_sigmoid_to_scores: if True, apply the sigmoid on the output
- class_predictions.
- class_prediction_bias_init: constant value to initialize bias of the last
- conv2d layer before class prediction.
-
- Raises:
- ValueError: if min_depth > max_depth.
- """
- super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
- if min_depth > max_depth:
- raise ValueError('min_depth should be less than or equal to max_depth')
- self._conv_hyperparams = conv_hyperparams
- self._min_depth = min_depth
- self._max_depth = max_depth
- self._num_layers_before_predictor = num_layers_before_predictor
- self._use_dropout = use_dropout
- self._kernel_size = kernel_size
- self._box_code_size = box_code_size
- self._dropout_keep_prob = dropout_keep_prob
- self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
- self._class_prediction_bias_init = class_prediction_bias_init
-
- def _predict(self, image_features, num_predictions_per_location):
- """Computes encoded object locations and corresponding confidences.
-
- Args:
- image_features: A float tensor of shape [batch_size, height, width,
- channels] containing features for a batch of images.
- num_predictions_per_location: an integer representing the number of box
- predictions to be made per spatial location in the feature map.
-
- Returns:
- A dictionary containing the following tensors.
- box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
- code_size] representing the location of the objects, where
- num_anchors = feat_height * feat_width * num_predictions_per_location
- class_predictions_with_background: A float tensor of shape
- [batch_size, num_anchors, num_classes + 1] representing the class
- predictions for the proposals.
- """
- # Add a slot for the background class.
- num_class_slots = self.num_classes + 1
- net = image_features
- with slim.arg_scope(self._conv_hyperparams), \
- slim.arg_scope([slim.dropout], is_training=self._is_training):
- # Add additional conv layers before the class predictor.
- features_depth = static_shape.get_depth(image_features.get_shape())
- depth = max(min(features_depth, self._max_depth), self._min_depth)
- tf.logging.info('depth of additional conv before box predictor: {}'.
- format(depth))
- if depth > 0 and self._num_layers_before_predictor > 0:
- for i in range(self._num_layers_before_predictor):
- net = slim.conv2d(
- net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
- with slim.arg_scope([slim.conv2d], activation_fn=None,
- normalizer_fn=None, normalizer_params=None):
- box_encodings = slim.conv2d(
- net, num_predictions_per_location * self._box_code_size,
- [self._kernel_size, self._kernel_size],
- scope='BoxEncodingPredictor')
- if self._use_dropout:
- net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
- class_predictions_with_background = slim.conv2d(
- net, num_predictions_per_location * num_class_slots,
- [self._kernel_size, self._kernel_size], scope='ClassPredictor',
- biases_initializer=tf.constant_initializer(
- self._class_prediction_bias_init))
- if self._apply_sigmoid_to_scores:
- class_predictions_with_background = tf.sigmoid(
- class_predictions_with_background)
-
- combined_feature_map_shape = shape_utils.combined_static_and_dynamic_shape(
- image_features)
- box_encodings = tf.reshape(
- box_encodings, tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- 1, self._box_code_size]))
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- tf.stack([combined_feature_map_shape[0],
- combined_feature_map_shape[1] *
- combined_feature_map_shape[2] *
- num_predictions_per_location,
- num_class_slots]))
- return {BOX_ENCODINGS: box_encodings,
- CLASS_PREDICTIONS_WITH_BACKGROUND:
- class_predictions_with_background}
diff --git a/object_detection/core/box_predictor_test.py b/object_detection/core/box_predictor_test.py
deleted file mode 100644
index e5e5a3c9..00000000
--- a/object_detection/core/box_predictor_test.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.box_predictor."""
-
-import numpy as np
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.builders import hyperparams_builder
-from object_detection.core import box_predictor
-from object_detection.protos import hyperparams_pb2
-
-
-class MaskRCNNBoxPredictorTest(tf.test.TestCase):
-
- def _build_arg_scope_with_hyperparams(self,
- op_type=hyperparams_pb2.Hyperparams.FC):
- hyperparams = hyperparams_pb2.Hyperparams()
- hyperparams_text_proto = """
- activation: NONE
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(hyperparams_text_proto, hyperparams)
- hyperparams.op = op_type
- return hyperparams_builder.build(hyperparams, is_training=True)
-
- def test_get_boxes_with_five_classes(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- )
- box_predictions = mask_box_predictor.predict(
- image_features, num_predictions_per_location=1, scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- class_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- class_predictions_with_background_shape) = sess.run(
- [tf.shape(box_encodings),
- tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
- self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
-
- def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
- with self.assertRaises(ValueError):
- box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- predict_instance_masks=True)
-
- def test_get_instance_masks(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- conv_hyperparams=self._build_arg_scope_with_hyperparams(
- op_type=hyperparams_pb2.Hyperparams.CONV),
- predict_instance_masks=True)
- box_predictions = mask_box_predictor.predict(
- image_features, num_predictions_per_location=1, scope='BoxPredictor')
- mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS]
- self.assertListEqual([2, 1, 5, 14, 14],
- mask_predictions.get_shape().as_list())
-
- def test_do_not_return_instance_masks_and_keypoints_without_request(self):
- image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
- mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4)
- box_predictions = mask_box_predictor.predict(
- image_features, num_predictions_per_location=1, scope='BoxPredictor')
- self.assertEqual(len(box_predictions), 2)
- self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions)
- self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND
- in box_predictions)
-
- def test_value_error_on_predict_keypoints(self):
- with self.assertRaises(ValueError):
- box_predictor.MaskRCNNBoxPredictor(
- is_training=False,
- num_classes=5,
- fc_hyperparams=self._build_arg_scope_with_hyperparams(),
- use_dropout=False,
- dropout_keep_prob=0.5,
- box_code_size=4,
- predict_keypoints=True)
-
-
-class RfcnBoxPredictorTest(tf.test.TestCase):
-
- def _build_arg_scope_with_conv_hyperparams(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def test_get_correct_box_encoding_and_class_prediction_shapes(self):
- image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
- proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
- rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
- is_training=False,
- num_classes=2,
- conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
- num_spatial_bins=[3, 3],
- depth=4,
- crop_size=[12, 12],
- box_code_size=4
- )
- box_predictions = rfcn_box_predictor.predict(
- image_features, num_predictions_per_location=1, scope='BoxPredictor',
- proposal_boxes=proposal_boxes)
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- class_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- class_predictions_shape) = sess.run(
- [tf.shape(box_encodings),
- tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
- self.assertAllEqual(class_predictions_shape, [8, 1, 3])
-
-
-class ConvolutionalBoxPredictorTest(tf.test.TestCase):
-
- def _build_arg_scope_with_conv_hyperparams(self):
- conv_hyperparams = hyperparams_pb2.Hyperparams()
- conv_hyperparams_text_proto = """
- activation: RELU_6
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- """
- text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
- return hyperparams_builder.build(conv_hyperparams, is_training=True)
-
- def test_get_boxes_for_five_aspect_ratios_per_location(self):
- image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- image_features, num_predictions_per_location=5, scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- objectness_predictions = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)])
- self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
- self.assertAllEqual(objectness_predictions_shape, [4, 320, 1])
-
- def test_get_boxes_for_one_aspect_ratio_per_location(self):
- image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- image_features, num_predictions_per_location=1, scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- objectness_predictions = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)])
- self.assertAllEqual(box_encodings_shape, [4, 64, 1, 4])
- self.assertAllEqual(objectness_predictions_shape, [4, 64, 1])
-
- def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
- self):
- num_classes_without_background = 6
- image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=num_classes_without_background,
- conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- image_features,
- num_predictions_per_location=5,
- scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- class_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape, class_predictions_with_background_shape
- ) = sess.run([
- tf.shape(box_encodings), tf.shape(class_predictions_with_background)])
- self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
- self.assertAllEqual(class_predictions_with_background_shape,
- [4, 320, num_classes_without_background+1])
-
- def test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional(
- self):
- image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
- conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- is_training=False,
- num_classes=0,
- conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
- min_depth=0,
- max_depth=32,
- num_layers_before_predictor=1,
- use_dropout=True,
- dropout_keep_prob=0.8,
- kernel_size=1,
- box_code_size=4
- )
- box_predictions = conv_box_predictor.predict(
- image_features, num_predictions_per_location=5, scope='BoxPredictor')
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- objectness_predictions = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
- init_op = tf.global_variables_initializer()
-
- resolution = 32
- expected_num_anchors = resolution*resolution*5
- with self.test_session() as sess:
- sess.run(init_op)
- (box_encodings_shape,
- objectness_predictions_shape) = sess.run(
- [tf.shape(box_encodings), tf.shape(objectness_predictions)],
- feed_dict={image_features:
- np.random.rand(4, resolution, resolution, 64)})
- self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
- self.assertAllEqual(objectness_predictions_shape,
- [4, expected_num_anchors, 1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/data_decoder.py b/object_detection/core/data_decoder.py
deleted file mode 100644
index 9ae18c1f..00000000
--- a/object_detection/core/data_decoder.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Interface for data decoders.
-
-Data decoders decode the input data and return a dictionary of tensors keyed by
-the entries in core.reader.Fields.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-
-class DataDecoder(object):
- """Interface for data decoders."""
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def decode(self, data):
- """Return a single image and associated labels.
-
- Args:
- data: a string tensor holding a serialized protocol buffer corresponding
- to data for a single image.
-
- Returns:
- tensor_dict: a dictionary containing tensors. Possible keys are defined in
- reader.Fields.
- """
- pass
diff --git a/object_detection/core/data_parser.py b/object_detection/core/data_parser.py
deleted file mode 100644
index 3dac4de2..00000000
--- a/object_detection/core/data_parser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Interface for data parsers.
-
-Data parser parses input data and returns a dictionary of numpy arrays
-keyed by the entries in standard_fields.py. Since the parser parses records
-to numpy arrays (materialized tensors) directly, it is used to read data for
-evaluation/visualization; to parse the data during training, DataDecoder should
-be used.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-
-class DataToNumpyParser(object):
- __metaclass__ = ABCMeta
-
- @abstractmethod
- def parse(self, input_data):
- """Parses input and returns a numpy array or a dictionary of numpy arrays.
-
- Args:
- input_data: an input data
-
- Returns:
- A numpy array or a dictionary of numpy arrays or None, if input
- cannot be parsed.
- """
- pass
diff --git a/object_detection/core/keypoint_ops.py b/object_detection/core/keypoint_ops.py
deleted file mode 100644
index e520845f..00000000
--- a/object_detection/core/keypoint_ops.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Keypoint operations.
-
-Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
-where the last dimension holds rank 2 tensors of the form [y, x] representing
-the coordinates of the keypoint.
-"""
-import numpy as np
-import tensorflow as tf
-
-
-def scale(keypoints, y_scale, x_scale, scope=None):
- """Scales keypoint coordinates in x and y dimensions.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- y_scale: (float) scalar tensor
- x_scale: (float) scalar tensor
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'Scale'):
- y_scale = tf.cast(y_scale, tf.float32)
- x_scale = tf.cast(x_scale, tf.float32)
- new_keypoints = keypoints * [[[y_scale, x_scale]]]
- return new_keypoints
-
-
-def clip_to_window(keypoints, window, scope=None):
- """Clips keypoints to a window.
-
- This op clips any input keypoints to a window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window to which the op should clip the keypoints.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'ClipToWindow'):
- y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
- y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
- x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
- new_keypoints = tf.concat([y, x], 2)
- return new_keypoints
-
-
-def prune_outside_window(keypoints, window, scope=None):
- """Prunes keypoints that fall outside a given window.
-
- This function replaces keypoints that fall outside the given window with nan.
- See also clip_to_window which clips any keypoints that fall outside the given
- window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window outside of which the op should prune the keypoints.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'PruneOutsideWindow'):
- y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
-
- valid_indices = tf.logical_and(
- tf.logical_and(y >= win_y_min, y <= win_y_max),
- tf.logical_and(x >= win_x_min, x <= win_x_max))
-
- new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
- new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
- new_keypoints = tf.concat([new_y, new_x], 2)
-
- return new_keypoints
-
-
-def change_coordinate_frame(keypoints, window, scope=None):
- """Changes coordinate frame of the keypoints to be relative to window's frame.
-
- Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
- coordinates from keypoints of shape [num_instances, num_keypoints, 2]
- to be relative to this window.
-
- An example use case is data augmentation: where we are given groundtruth
- keypoints and would like to randomly crop the image to some window. In this
- case we need to change the coordinate frame of each groundtruth keypoint to be
- relative to this new window.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
- window we should change the coordinate frame to.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'ChangeCoordinateFrame'):
- win_height = window[2] - window[0]
- win_width = window[3] - window[1]
- new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
- 1.0 / win_width)
- return new_keypoints
-
-
-def to_normalized_coordinates(keypoints, height, width,
- check_range=True, scope=None):
- """Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
-
- Usually one uses the dynamic shape of the image or conv-layer tensor:
- keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
- tf.shape(images)[1],
- tf.shape(images)[2]),
-
- This function raises an assertion failed error at graph execution time when
- the maximum coordinate is smaller than 1.01 (which means that coordinates are
- already normalized). The value 1.01 is to deal with small rounding errors.
-
- Args:
- keypoints: A tensor of shape [num_instances, num_keypoints, 2].
- height: Maximum value for y coordinate of absolute keypoint coordinates.
- width: Maximum value for x coordinate of absolute keypoint coordinates.
- check_range: If True, checks if the coordinates are normalized.
- scope: name scope.
-
- Returns:
- tensor of shape [num_instances, num_keypoints, 2] with normalized
- coordinates in [0, 1].
- """
- with tf.name_scope(scope, 'ToNormalizedCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- if check_range:
- max_val = tf.reduce_max(keypoints)
- max_assert = tf.Assert(tf.greater(max_val, 1.01),
- ['max value is lower than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(keypoints, 1.0 / height, 1.0 / width)
-
-
-def to_absolute_coordinates(keypoints, height, width,
- check_range=True, scope=None):
- """Converts normalized keypoint coordinates to absolute pixel coordinates.
-
- This function raises an assertion failed error when the maximum keypoint
- coordinate value is larger than 1.01 (in which case coordinates are already
- absolute).
-
- Args:
- keypoints: A tensor of shape [num_instances, num_keypoints, 2]
- height: Maximum value for y coordinate of absolute keypoint coordinates.
- width: Maximum value for x coordinate of absolute keypoint coordinates.
- check_range: If True, checks if the coordinates are normalized or not.
- scope: name scope.
-
- Returns:
- tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
- in terms of the image size.
-
- """
- with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
- height = tf.cast(height, tf.float32)
- width = tf.cast(width, tf.float32)
-
- # Ensure range of input keypoints is correct.
- if check_range:
- max_val = tf.reduce_max(keypoints)
- max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
- ['maximum keypoint coordinate value is larger '
- 'than 1.01: ', max_val])
- with tf.control_dependencies([max_assert]):
- width = tf.identity(width)
-
- return scale(keypoints, height, width)
-
-
-def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
- """Flips the keypoints horizontally around the flip_point.
-
- This operation flips the x coordinate for each keypoint around the flip_point
- and also permutes the keypoints in a manner specified by flip_permutation.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- flip_point: (float) scalar tensor representing the x coordinate to flip the
- keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'FlipHorizontal'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
- v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- u = flip_point * 2.0 - u
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
-
-
-def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
- """Flips the keypoints vertically around the flip_point.
-
- This operation flips the y coordinate for each keypoint around the flip_point
- and also permutes the keypoints in a manner specified by flip_permutation.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- flip_point: (float) scalar tensor representing the y coordinate to flip the
- keypoints around.
- flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation. This specifies the mapping from original keypoint indices
- to the flipped keypoint indices. This is used primarily for keypoints
- that are not reflection invariant. E.g. Suppose there are 3 keypoints
- representing ['head', 'right_eye', 'left_eye'], then a logical choice for
- flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
- and 'right_eye' after a horizontal flip.
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'FlipVertical'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- keypoints = tf.gather(keypoints, flip_permutation)
- v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
- v = flip_point * 2.0 - v
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
-
-
-def rot90(keypoints, scope=None):
- """Rotates the keypoints counter-clockwise by 90 degrees.
-
- Args:
- keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- scope: name scope.
-
- Returns:
- new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
- """
- with tf.name_scope(scope, 'Rot90'):
- keypoints = tf.transpose(keypoints, [1, 0, 2])
- v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
- v = 1.0 - v
- new_keypoints = tf.concat([v, u], 2)
- new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
- return new_keypoints
diff --git a/object_detection/core/keypoint_ops_test.py b/object_detection/core/keypoint_ops_test.py
deleted file mode 100644
index 1c09c55a..00000000
--- a/object_detection/core/keypoint_ops_test.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.keypoint_ops."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import keypoint_ops
-
-
-class KeypointOpsTest(tf.test.TestCase):
- """Tests for common keypoint operations."""
-
- def test_scale(self):
- keypoints = tf.constant([
- [[0.0, 0.0], [100.0, 200.0]],
- [[50.0, 120.0], [100.0, 140.0]]
- ])
- y_scale = tf.constant(1.0 / 100)
- x_scale = tf.constant(1.0 / 200)
-
- expected_keypoints = tf.constant([
- [[0., 0.], [1.0, 1.0]],
- [[0.5, 0.6], [1.0, 0.7]]
- ])
- output = keypoint_ops.scale(keypoints, y_scale, x_scale)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_clip_to_window(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.25], [0.75, 0.75]]
- ])
- output = keypoint_ops.clip_to_window(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_prune_outside_window(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
- [[np.nan, np.nan], [np.nan, np.nan]]])
- output = keypoint_ops.prune_outside_window(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_change_coordinate_frame(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- window = tf.constant([0.25, 0.25, 0.75, 0.75])
-
- expected_keypoints = tf.constant([
- [[0, 0.5], [1.0, 1.0]],
- [[0.5, -0.5], [1.5, 1.5]]
- ])
- output = keypoint_ops.change_coordinate_frame(keypoints, window)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_normalized_coordinates(self):
- keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
- output = keypoint_ops.to_normalized_coordinates(
- keypoints, 40, 60)
- expected_keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_normalized_coordinates_already_normalized(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- output = keypoint_ops.to_normalized_coordinates(
- keypoints, 40, 60)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(output)
-
- def test_to_absolute_coordinates(self):
- keypoints = tf.constant([
- [[0.25, 0.5], [0.75, 0.75]],
- [[0.5, 0.0], [1.0, 1.0]]
- ])
- output = keypoint_ops.to_absolute_coordinates(
- keypoints, 40, 60)
- expected_keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_to_absolute_coordinates_already_absolute(self):
- keypoints = tf.constant([
- [[10., 30.], [30., 45.]],
- [[20., 0.], [40., 60.]]
- ])
- output = keypoint_ops.to_absolute_coordinates(
- keypoints, 40, 60)
-
- with self.test_session() as sess:
- with self.assertRaisesOpError('assertion failed'):
- sess.run(output)
-
- def test_flip_horizontal(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
- ])
- flip_permutation = [0, 2, 1]
-
- expected_keypoints = tf.constant([
- [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
- [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
- ])
- output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_flip_vertical(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
- ])
- flip_permutation = [0, 2, 1]
-
- expected_keypoints = tf.constant([
- [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
- [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
- ])
- output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
- def test_rot90(self):
- keypoints = tf.constant([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
- ])
- expected_keypoints = tf.constant([
- [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
- [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
- ])
- output = keypoint_ops.rot90(keypoints)
-
- with self.test_session() as sess:
- output_, expected_keypoints_ = sess.run([output, expected_keypoints])
- self.assertAllClose(output_, expected_keypoints_)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/losses.py b/object_detection/core/losses.py
deleted file mode 100644
index b8478c15..00000000
--- a/object_detection/core/losses.py
+++ /dev/null
@@ -1,621 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Classification and regression loss functions for object detection.
-
-Localization losses:
- * WeightedL2LocalizationLoss
- * WeightedSmoothL1LocalizationLoss
- * WeightedIOULocalizationLoss
-
-Classification losses:
- * WeightedSigmoidClassificationLoss
- * WeightedSoftmaxClassificationLoss
- * BootstrappedSigmoidClassificationLoss
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.utils import ops
-
-slim = tf.contrib.slim
-
-
-class Loss(object):
- """Abstract base class for loss functions."""
- __metaclass__ = ABCMeta
-
- def __call__(self,
- prediction_tensor,
- target_tensor,
- ignore_nan_targets=False,
- scope=None,
- **params):
- """Call the loss function.
-
- Args:
- prediction_tensor: a tensor representing predicted quantities.
- target_tensor: a tensor representing regression or classification targets.
- ignore_nan_targets: whether to ignore nan targets in the loss computation.
- E.g. can be used if the target tensor is missing groundtruth data that
- shouldn't be factored into the loss.
- scope: Op scope name. Defaults to 'Loss' if None.
- **params: Additional keyword arguments for specific implementations of
- the Loss.
-
- Returns:
- loss: a tensor representing the value of the loss function.
- """
- with tf.name_scope(scope, 'Loss',
- [prediction_tensor, target_tensor, params]) as scope:
- if ignore_nan_targets:
- target_tensor = tf.where(tf.is_nan(target_tensor),
- prediction_tensor,
- target_tensor)
- return self._compute_loss(prediction_tensor, target_tensor, **params)
-
- @abstractmethod
- def _compute_loss(self, prediction_tensor, target_tensor, **params):
- """Method to be overridden by implementations.
-
- Args:
- prediction_tensor: a tensor representing predicted quantities
- target_tensor: a tensor representing regression or classification targets
- **params: Additional keyword arguments for specific implementations of
- the Loss.
-
- Returns:
- loss: a tensor representing the value of the loss function
- """
- pass
-
-
-class WeightedL2LocalizationLoss(Loss):
- """L2 localization loss function with anchorwise output support.
-
- Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
- """
-
- def __init__(self, anchorwise_output=False):
- """Constructor.
-
- Args:
- anchorwise_output: Outputs loss per anchor. (default False)
-
- """
- self._anchorwise_output = anchorwise_output
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the (encoded) predicted locations of objects.
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the regression targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- or a float tensor of shape [batch_size, num_anchors]
- """
- weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
- weights, 2)
- square_diff = 0.5 * tf.square(weighted_diff)
- if self._anchorwise_output:
- return tf.reduce_sum(square_diff, 2)
- return tf.reduce_sum(square_diff)
-
-
-class WeightedSmoothL1LocalizationLoss(Loss):
- """Smooth L1 localization loss function.
-
- The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
- otherwise, where x is the difference between predictions and target.
-
- See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
- """
-
- def __init__(self, anchorwise_output=False):
- """Constructor.
-
- Args:
- anchorwise_output: Outputs loss per anchor. (default False)
-
- """
- self._anchorwise_output = anchorwise_output
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the (encoded) predicted locations of objects.
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- code_size] representing the regression targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- """
- diff = prediction_tensor - target_tensor
- abs_diff = tf.abs(diff)
- abs_diff_lt_1 = tf.less(abs_diff, 1)
- anchorwise_smooth_l1norm = tf.reduce_sum(
- tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5),
- 2) * weights
- if self._anchorwise_output:
- return anchorwise_smooth_l1norm
- return tf.reduce_sum(anchorwise_smooth_l1norm)
-
-
-class WeightedIOULocalizationLoss(Loss):
- """IOU localization loss function.
-
- Sums the IOU for corresponding pairs of predicted/groundtruth boxes
- and for each pair assign a loss of 1 - IOU. We then compute a weighted
- sum over all pairs which is returned as the total loss.
- """
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
- representing the decoded predicted boxes
- target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
- representing the decoded target boxes
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- """
- predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
- target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
- per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
- target_boxes)
- return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss)
-
-
-class WeightedSigmoidClassificationLoss(Loss):
- """Sigmoid cross entropy classification loss function."""
-
- def __init__(self, anchorwise_output=False):
- """Constructor.
-
- Args:
- anchorwise_output: Outputs loss per anchor. (default False)
-
- """
- self._anchorwise_output = anchorwise_output
-
- def _compute_loss(self,
- prediction_tensor,
- target_tensor,
- weights,
- class_indices=None):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
- class_indices: (Optional) A 1-D integer tensor of class indices.
- If provided, computes loss only for the specified class indices.
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- or a float tensor of shape [batch_size, num_anchors]
- """
- weights = tf.expand_dims(weights, 2)
- if class_indices is not None:
- weights *= tf.reshape(
- ops.indices_to_dense_vector(class_indices,
- tf.shape(prediction_tensor)[2]),
- [1, 1, -1])
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=target_tensor, logits=prediction_tensor))
- if self._anchorwise_output:
- return tf.reduce_sum(per_entry_cross_ent * weights, 2)
- return tf.reduce_sum(per_entry_cross_ent * weights)
-
-
-class SigmoidFocalClassificationLoss(Loss):
- """Sigmoid focal cross entropy loss.
-
- Focal loss down-weights well classified examples and focusses on the hard
- examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
- """
-
- def __init__(self, anchorwise_output=False, gamma=2.0, alpha=0.25):
- """Constructor.
-
- Args:
- anchorwise_output: Outputs loss per anchor. (default False)
- gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
- alpha: optional alpha weighting factor to balance positives vs negatives.
- """
- self._anchorwise_output = anchorwise_output
- self._alpha = alpha
- self._gamma = gamma
-
- def _compute_loss(self,
- prediction_tensor,
- target_tensor,
- weights,
- class_indices=None):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
- class_indices: (Optional) A 1-D integer tensor of class indices.
- If provided, computes loss only for the specified class indices.
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- or a float tensor of shape [batch_size, num_anchors]
- """
- weights = tf.expand_dims(weights, 2)
- if class_indices is not None:
- weights *= tf.reshape(
- ops.indices_to_dense_vector(class_indices,
- tf.shape(prediction_tensor)[2]),
- [1, 1, -1])
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=target_tensor, logits=prediction_tensor))
- prediction_probabilities = tf.sigmoid(prediction_tensor)
- p_t = ((target_tensor * prediction_probabilities) +
- ((1 - target_tensor) * (1 - prediction_probabilities)))
- modulating_factor = 1.0
- if self._gamma:
- modulating_factor = tf.pow(1.0 - p_t, self._gamma)
- alpha_weight_factor = 1.0
- if self._alpha is not None:
- alpha_weight_factor = (target_tensor * self._alpha +
- (1 - target_tensor) * (1 - self._alpha))
- focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
- per_entry_cross_ent)
- if self._anchorwise_output:
- return tf.reduce_sum(focal_cross_entropy_loss * weights, 2)
- return tf.reduce_sum(focal_cross_entropy_loss * weights)
-
-
-class WeightedSoftmaxClassificationLoss(Loss):
- """Softmax loss function."""
-
- def __init__(self, anchorwise_output=False, logit_scale=1.0):
- """Constructor.
-
- Args:
- anchorwise_output: Whether to output loss per anchor (default False)
- logit_scale: When this value is high, the prediction is "diffused" and
- when this value is low, the prediction is made peakier.
- (default 1.0)
-
- """
- self._anchorwise_output = anchorwise_output
- self._logit_scale = logit_scale
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- """
- num_classes = prediction_tensor.get_shape().as_list()[-1]
- prediction_tensor = tf.divide(
- prediction_tensor, self._logit_scale, name='scale_logit')
- per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
- labels=tf.reshape(target_tensor, [-1, num_classes]),
- logits=tf.reshape(prediction_tensor, [-1, num_classes])))
- if self._anchorwise_output:
- return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
- return tf.reduce_sum(per_row_cross_ent * tf.reshape(weights, [-1]))
-
-
-class BootstrappedSigmoidClassificationLoss(Loss):
- """Bootstrapped sigmoid cross entropy classification loss function.
-
- This loss uses a convex combination of training labels and the current model's
- predictions as training targets in the classification loss. The idea is that
- as the model improves over time, its predictions can be trusted more and we
- can use these predictions to mitigate the damage of noisy/incorrect labels,
- because incorrect labels are likely to be eventually highly inconsistent with
- other stimuli predicted to have the same label by the model.
-
- In "soft" bootstrapping, we use all predicted class probabilities, whereas in
- "hard" bootstrapping, we use the single class favored by the model.
-
- See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
- Reed et al. (ICLR 2015).
- """
-
- def __init__(self, alpha, bootstrap_type='soft', anchorwise_output=False):
- """Constructor.
-
- Args:
- alpha: a float32 scalar tensor between 0 and 1 representing interpolation
- weight
- bootstrap_type: set to either 'hard' or 'soft' (default)
- anchorwise_output: Outputs loss per anchor. (default False)
-
- Raises:
- ValueError: if bootstrap_type is not either 'hard' or 'soft'
- """
- if bootstrap_type != 'hard' and bootstrap_type != 'soft':
- raise ValueError('Unrecognized bootstrap_type: must be one of '
- '\'hard\' or \'soft.\'')
- self._alpha = alpha
- self._bootstrap_type = bootstrap_type
- self._anchorwise_output = anchorwise_output
-
- def _compute_loss(self, prediction_tensor, target_tensor, weights):
- """Compute loss function.
-
- Args:
- prediction_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing the predicted logits for each class
- target_tensor: A float tensor of shape [batch_size, num_anchors,
- num_classes] representing one-hot encoded classification targets
- weights: a float tensor of shape [batch_size, num_anchors]
-
- Returns:
- loss: a (scalar) tensor representing the value of the loss function
- or a float tensor of shape [batch_size, num_anchors]
- """
- if self._bootstrap_type == 'soft':
- bootstrap_target_tensor = self._alpha * target_tensor + (
- 1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
- else:
- bootstrap_target_tensor = self._alpha * target_tensor + (
- 1.0 - self._alpha) * tf.cast(
- tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
- per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
- labels=bootstrap_target_tensor, logits=prediction_tensor))
- if self._anchorwise_output:
- return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2), 2)
- return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2))
-
-
-class HardExampleMiner(object):
- """Hard example mining for regions in a list of images.
-
- Implements hard example mining to select a subset of regions to be
- back-propagated. For each image, selects the regions with highest losses,
- subject to the condition that a newly selected region cannot have
- an IOU > iou_threshold with any of the previously selected regions.
- This can be achieved by re-using a greedy non-maximum suppression algorithm.
- A constraint on the number of negatives mined per positive region can also be
- enforced.
-
- Reference papers: "Training Region-based Object Detectors with Online
- Hard Example Mining" (CVPR 2016) by Srivastava et al., and
- "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
- """
-
- def __init__(self,
- num_hard_examples=64,
- iou_threshold=0.7,
- loss_type='both',
- cls_loss_weight=0.05,
- loc_loss_weight=0.06,
- max_negatives_per_positive=None,
- min_negatives_per_image=0):
- """Constructor.
-
- The hard example mining implemented by this class can replicate the behavior
- in the two aforementioned papers (Srivastava et al., and Liu et al).
- To replicate the A2 paper (Srivastava et al), num_hard_examples is set
- to a fixed parameter (64 by default) and iou_threshold is set to .7 for
- running non-max-suppression the predicted boxes prior to hard mining.
- In order to replicate the SSD paper (Liu et al), num_hard_examples should
- be set to None, max_negatives_per_positive should be 3 and iou_threshold
- should be 1.0 (in order to effectively turn off NMS).
-
- Args:
- num_hard_examples: maximum number of hard examples to be
- selected per image (prior to enforcing max negative to positive ratio
- constraint). If set to None, all examples obtained after NMS are
- considered.
- iou_threshold: minimum intersection over union for an example
- to be discarded during NMS.
- loss_type: use only classification losses ('cls', default),
- localization losses ('loc') or both losses ('both').
- In the last case, cls_loss_weight and loc_loss_weight are used to
- compute weighted sum of the two losses.
- cls_loss_weight: weight for classification loss.
- loc_loss_weight: weight for location loss.
- max_negatives_per_positive: maximum number of negatives to retain for
- each positive anchor. By default, num_negatives_per_positive is None,
- which means that we do not enforce a prespecified negative:positive
- ratio. Note also that num_negatives_per_positives can be a float
- (and will be converted to be a float even if it is passed in otherwise).
- min_negatives_per_image: minimum number of negative anchors to sample for
- a given image. Setting this to a positive number allows sampling
- negatives in an image without any positive anchors and thus not biased
- towards at least one detection per image.
- """
- self._num_hard_examples = num_hard_examples
- self._iou_threshold = iou_threshold
- self._loss_type = loss_type
- self._cls_loss_weight = cls_loss_weight
- self._loc_loss_weight = loc_loss_weight
- self._max_negatives_per_positive = max_negatives_per_positive
- self._min_negatives_per_image = min_negatives_per_image
- if self._max_negatives_per_positive is not None:
- self._max_negatives_per_positive = float(self._max_negatives_per_positive)
- self._num_positives_list = None
- self._num_negatives_list = None
-
- def __call__(self,
- location_losses,
- cls_losses,
- decoded_boxlist_list,
- match_list=None):
- """Computes localization and classification losses after hard mining.
-
- Args:
- location_losses: a float tensor of shape [num_images, num_anchors]
- representing anchorwise localization losses.
- cls_losses: a float tensor of shape [num_images, num_anchors]
- representing anchorwise classification losses.
- decoded_boxlist_list: a list of decoded BoxList representing location
- predictions for each image.
- match_list: an optional list of matcher.Match objects encoding the match
- between anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors. Match objects in match_list are
- used to reference which anchors are positive, negative or ignored. If
- self._max_negatives_per_positive exists, these are then used to enforce
- a prespecified negative to positive ratio.
-
- Returns:
- mined_location_loss: a float scalar with sum of localization losses from
- selected hard examples.
- mined_cls_loss: a float scalar with sum of classification losses from
- selected hard examples.
- Raises:
- ValueError: if location_losses, cls_losses and decoded_boxlist_list do
- not have compatible shapes (i.e., they must correspond to the same
- number of images).
- ValueError: if match_list is specified but its length does not match
- len(decoded_boxlist_list).
- """
- mined_location_losses = []
- mined_cls_losses = []
- location_losses = tf.unstack(location_losses)
- cls_losses = tf.unstack(cls_losses)
- num_images = len(decoded_boxlist_list)
- if not match_list:
- match_list = num_images * [None]
- if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
- raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
- 'do not have compatible shapes.')
- if not isinstance(match_list, list):
- raise ValueError('match_list must be a list.')
- if len(match_list) != len(decoded_boxlist_list):
- raise ValueError('match_list must either be None or have '
- 'length=len(decoded_boxlist_list).')
- num_positives_list = []
- num_negatives_list = []
- for ind, detection_boxlist in enumerate(decoded_boxlist_list):
- box_locations = detection_boxlist.get()
- match = match_list[ind]
- image_losses = cls_losses[ind]
- if self._loss_type == 'loc':
- image_losses = location_losses[ind]
- elif self._loss_type == 'both':
- image_losses *= self._cls_loss_weight
- image_losses += location_losses[ind] * self._loc_loss_weight
- if self._num_hard_examples is not None:
- num_hard_examples = self._num_hard_examples
- else:
- num_hard_examples = detection_boxlist.num_boxes()
- selected_indices = tf.image.non_max_suppression(
- box_locations, image_losses, num_hard_examples, self._iou_threshold)
- if self._max_negatives_per_positive is not None and match:
- (selected_indices, num_positives,
- num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
- selected_indices, match, self._max_negatives_per_positive,
- self._min_negatives_per_image)
- num_positives_list.append(num_positives)
- num_negatives_list.append(num_negatives)
- mined_location_losses.append(
- tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
- mined_cls_losses.append(
- tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
- location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
- cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
- if match and self._max_negatives_per_positive:
- self._num_positives_list = num_positives_list
- self._num_negatives_list = num_negatives_list
- return (location_loss, cls_loss)
-
- def summarize(self):
- """Summarize the number of positives and negatives after mining."""
- if self._num_positives_list and self._num_negatives_list:
- avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
- avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
- tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
- tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
-
- def _subsample_selection_to_desired_neg_pos_ratio(self,
- indices,
- match,
- max_negatives_per_positive,
- min_negatives_per_image=0):
- """Subsample a collection of selected indices to a desired neg:pos ratio.
-
- This function takes a subset of M indices (indexing into a large anchor
- collection of N anchors where M=0,
- meaning that column i is matched with row match_results[i].
- (2) match_results[i]=-1, meaning that column i is not matched.
- (3) match_results[i]=-2, meaning that column i is ignored.
-
- Raises:
- ValueError: if match_results does not have rank 1 or is not an
- integer int32 scalar tensor
- """
- if match_results.shape.ndims != 1:
- raise ValueError('match_results should have rank 1')
- if match_results.dtype != tf.int32:
- raise ValueError('match_results should be an int32 or int64 scalar '
- 'tensor')
- self._match_results = match_results
-
- @property
- def match_results(self):
- """The accessor for match results.
-
- Returns:
- the tensor which encodes the match results.
- """
- return self._match_results
-
- def matched_column_indices(self):
- """Returns column indices that match to some row.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
-
- def matched_column_indicator(self):
- """Returns column indices that are matched.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return tf.greater_equal(self._match_results, 0)
-
- def num_matched_columns(self):
- """Returns number (int32 scalar tensor) of matched columns."""
- return tf.size(self.matched_column_indices())
-
- def unmatched_column_indices(self):
- """Returns column indices that do not match any row.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
-
- def unmatched_column_indicator(self):
- """Returns column indices that are unmatched.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return tf.equal(self._match_results, -1)
-
- def num_unmatched_columns(self):
- """Returns number (int32 scalar tensor) of unmatched columns."""
- return tf.size(self.unmatched_column_indices())
-
- def ignored_column_indices(self):
- """Returns column indices that are ignored (neither Matched nor Unmatched).
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
-
- def ignored_column_indicator(self):
- """Returns boolean column indicator where True means the colum is ignored.
-
- Returns:
- column_indicator: boolean vector which is True for all ignored column
- indices.
- """
- return tf.equal(self._match_results, -2)
-
- def num_ignored_columns(self):
- """Returns number (int32 scalar tensor) of matched columns."""
- return tf.size(self.ignored_column_indices())
-
- def unmatched_or_ignored_column_indices(self):
- """Returns column indices that are unmatched or ignored.
-
- The indices returned by this op are always sorted in increasing order.
-
- Returns:
- column_indices: int32 tensor of shape [K] with column indices.
- """
- return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
-
- def matched_row_indices(self):
- """Returns row indices that match some column.
-
- The indices returned by this op are ordered so as to be in correspondence
- with the output of matched_column_indicator(). For example if
- self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
- [7, 3], then we know that column 0 was matched to row 7 and column 2 was
- matched to row 3.
-
- Returns:
- row_indices: int32 tensor of shape [K] with row indices.
- """
- return self._reshape_and_cast(
- tf.gather(self._match_results, self.matched_column_indices()))
-
- def _reshape_and_cast(self, t):
- return tf.cast(tf.reshape(t, [-1]), tf.int32)
-
-
-class Matcher(object):
- """Abstract base class for matcher.
- """
- __metaclass__ = ABCMeta
-
- def match(self, similarity_matrix, scope=None, **params):
- """Computes matches among row and column indices and returns the result.
-
- Computes matches among the row and column indices based on the similarity
- matrix and optional arguments.
-
- Args:
- similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
- where higher value means more similar.
- scope: Op scope name. Defaults to 'Match' if None.
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- A Match object with the results of matching.
- """
- with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
- return Match(self._match(similarity_matrix, **params))
-
- @abstractmethod
- def _match(self, similarity_matrix, **params):
- """Method to be overriden by implementations.
-
- Args:
- similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
- where higher value means more similar.
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- match_results: Integer tensor of shape [M]: match_results[i]>=0 means
- that column i is matched to row match_results[i], match_results[i]=-1
- means that the column is not matched. match_results[i]=-2 means that
- the column is ignored (usually this happens when there is a very weak
- match which one neither wants as positive nor negative example).
- """
- pass
diff --git a/object_detection/core/matcher_test.py b/object_detection/core/matcher_test.py
deleted file mode 100644
index 7054015f..00000000
--- a/object_detection/core/matcher_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.matcher."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import matcher
-
-
-class AnchorMatcherTest(tf.test.TestCase):
-
- def test_get_correct_matched_columnIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [0, 1, 3, 5]
- matched_column_indices = match.matched_column_indices()
- self.assertEquals(matched_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- matched_column_indices = sess.run(matched_column_indices)
- self.assertAllEqual(matched_column_indices, expected_column_indices)
-
- def test_get_correct_counts(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- exp_num_matched_columns = 4
- exp_num_unmatched_columns = 2
- exp_num_ignored_columns = 1
- num_matched_columns = match.num_matched_columns()
- num_unmatched_columns = match.num_unmatched_columns()
- num_ignored_columns = match.num_ignored_columns()
- self.assertEquals(num_matched_columns.dtype, tf.int32)
- self.assertEquals(num_unmatched_columns.dtype, tf.int32)
- self.assertEquals(num_ignored_columns.dtype, tf.int32)
- with self.test_session() as sess:
- (num_matched_columns_out, num_unmatched_columns_out,
- num_ignored_columns_out) = sess.run(
- [num_matched_columns, num_unmatched_columns, num_ignored_columns])
- self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
- self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
- self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
-
- def testGetCorrectUnmatchedColumnIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [2, 4]
- unmatched_column_indices = match.unmatched_column_indices()
- self.assertEquals(unmatched_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- unmatched_column_indices = sess.run(unmatched_column_indices)
- self.assertAllEqual(unmatched_column_indices, expected_column_indices)
-
- def testGetCorrectMatchedRowIndices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_row_indices = [3, 1, 0, 5]
- matched_row_indices = match.matched_row_indices()
- self.assertEquals(matched_row_indices.dtype, tf.int32)
- with self.test_session() as sess:
- matched_row_inds = sess.run(matched_row_indices)
- self.assertAllEqual(matched_row_inds, expected_row_indices)
-
- def test_get_correct_ignored_column_indices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [6]
- ignored_column_indices = match.ignored_column_indices()
- self.assertEquals(ignored_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- ignored_column_indices = sess.run(ignored_column_indices)
- self.assertAllEqual(ignored_column_indices, expected_column_indices)
-
- def test_get_correct_matched_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [True, True, False, True, False, True, False]
- matched_column_indicator = match.matched_column_indicator()
- self.assertEquals(matched_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- matched_column_indicator = sess.run(matched_column_indicator)
- self.assertAllEqual(matched_column_indicator, expected_column_indicator)
-
- def test_get_correct_unmatched_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [False, False, True, False, True, False, False]
- unmatched_column_indicator = match.unmatched_column_indicator()
- self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- unmatched_column_indicator = sess.run(unmatched_column_indicator)
- self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
-
- def test_get_correct_ignored_column_indicator(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indicator = [False, False, False, False, False, False, True]
- ignored_column_indicator = match.ignored_column_indicator()
- self.assertEquals(ignored_column_indicator.dtype, tf.bool)
- with self.test_session() as sess:
- ignored_column_indicator = sess.run(ignored_column_indicator)
- self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
-
- def test_get_correct_unmatched_ignored_column_indices(self):
- match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
- match = matcher.Match(match_results)
- expected_column_indices = [2, 4, 6]
- unmatched_ignored_column_indices = (match.
- unmatched_or_ignored_column_indices())
- self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
- with self.test_session() as sess:
- unmatched_ignored_column_indices = sess.run(
- unmatched_ignored_column_indices)
- self.assertAllEqual(unmatched_ignored_column_indices,
- expected_column_indices)
-
- def test_all_columns_accounted_for(self):
- # Note: deliberately setting to small number so not always
- # all possibilities appear (matched, unmatched, ignored)
- num_matches = 10
- match_results = tf.random_uniform(
- [num_matches], minval=-2, maxval=5, dtype=tf.int32)
- match = matcher.Match(match_results)
- matched_column_indices = match.matched_column_indices()
- unmatched_column_indices = match.unmatched_column_indices()
- ignored_column_indices = match.ignored_column_indices()
- with self.test_session() as sess:
- matched, unmatched, ignored = sess.run([
- matched_column_indices, unmatched_column_indices,
- ignored_column_indices
- ])
- all_indices = np.hstack((matched, unmatched, ignored))
- all_indices_sorted = np.sort(all_indices)
- self.assertAllEqual(all_indices_sorted,
- np.arange(num_matches, dtype=np.int32))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/minibatch_sampler.py b/object_detection/core/minibatch_sampler.py
deleted file mode 100644
index dc622221..00000000
--- a/object_detection/core/minibatch_sampler.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base minibatch sampler module.
-
-The job of the minibatch_sampler is to subsample a minibatch based on some
-criterion.
-
-The main function call is:
- subsample(indicator, batch_size, **params).
-Indicator is a 1d boolean tensor where True denotes which examples can be
-sampled. It returns a boolean indicator where True denotes an example has been
-sampled..
-
-Subclasses should implement the Subsample function and can make use of the
-@staticmethod SubsampleIndicator.
-"""
-
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.utils import ops
-
-
-class MinibatchSampler(object):
- """Abstract base class for subsampling minibatches."""
- __metaclass__ = ABCMeta
-
- def __init__(self):
- """Constructs a minibatch sampler."""
- pass
-
- @abstractmethod
- def subsample(self, indicator, batch_size, **params):
- """Returns subsample of entries in indicator.
-
- Args:
- indicator: boolean tensor of shape [N] whose True entries can be sampled.
- batch_size: desired batch size.
- **params: additional keyword arguments for specific implementations of
- the MinibatchSampler.
-
- Returns:
- sample_indicator: boolean tensor of shape [N] whose True entries have been
- sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
- """
- pass
-
- @staticmethod
- def subsample_indicator(indicator, num_samples):
- """Subsample indicator vector.
-
- Given a boolean indicator vector with M elements set to `True`, the function
- assigns all but `num_samples` of these previously `True` elements to
- `False`. If `num_samples` is greater than M, the original indicator vector
- is returned.
-
- Args:
- indicator: a 1-dimensional boolean tensor indicating which elements
- are allowed to be sampled and which are not.
- num_samples: int32 scalar tensor
-
- Returns:
- a boolean tensor with the same shape as input (indicator) tensor
- """
- indices = tf.where(indicator)
- indices = tf.random_shuffle(indices)
- indices = tf.reshape(indices, [-1])
-
- num_samples = tf.minimum(tf.size(indices), num_samples)
- selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
-
- selected_indicator = ops.indices_to_dense_vector(selected_indices,
- tf.shape(indicator)[0])
-
- return tf.equal(selected_indicator, 1)
diff --git a/object_detection/core/minibatch_sampler_test.py b/object_detection/core/minibatch_sampler_test.py
deleted file mode 100644
index 7420ae5d..00000000
--- a/object_detection/core/minibatch_sampler_test.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import minibatch_sampler
-
-
-class MinibatchSamplerTest(tf.test.TestCase):
-
- def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 3)
- with self.test_session() as sess:
- samples_out = sess.run(samples)
- self.assertTrue(np.sum(samples_out), 3)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.placeholder(tf.bool)
- feed_dict = {indicator: np_indicator}
-
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 3)
- with self.test_session() as sess:
- samples_out = sess.run(samples, feed_dict=feed_dict)
- self.assertTrue(np.sum(samples_out), 3)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 5)
- with self.test_session() as sess:
- samples_out = sess.run(samples)
- self.assertTrue(np.sum(samples_out), 4)
- self.assertAllEqual(samples_out,
- np.logical_and(samples_out, np_indicator))
-
- def test_subsample_indicator_when_num_samples_is_zero(self):
- np_indicator = [True, False, True, False, True, True, False]
- indicator = tf.constant(np_indicator)
- samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator, 0)
- with self.test_session() as sess:
- samples_none_out = sess.run(samples_none)
- self.assertAllEqual(
- np.zeros_like(samples_none_out, dtype=bool),
- samples_none_out)
-
- def test_subsample_indicator_when_indicator_all_false(self):
- indicator_empty = tf.zeros([0], dtype=tf.bool)
- samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
- indicator_empty, 4)
- with self.test_session() as sess:
- samples_empty_out = sess.run(samples_empty)
- self.assertEqual(0, samples_empty_out.size)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/model.py b/object_detection/core/model.py
deleted file mode 100644
index 08843944..00000000
--- a/object_detection/core/model.py
+++ /dev/null
@@ -1,265 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Abstract detection model.
-
-This file defines a generic base class for detection models. Programs that are
-designed to work with arbitrary detection models should only depend on this
-class. We intend for the functions in this class to follow tensor-in/tensor-out
-design, thus all functions have tensors or lists/dictionaries holding tensors as
-inputs and outputs.
-
-Abstractly, detection models predict output tensors given input images
-which can be passed to a loss function at training time or passed to a
-postprocessing function at eval time. The computation graphs at a high level
-consequently look as follows:
-
-Training time:
-inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
-
-Evaluation time:
-inputs (images tensor) -> preprocess -> predict -> postprocess
- -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
-
-DetectionModels must thus implement four functions (1) preprocess, (2) predict,
-(3) postprocess and (4) loss. DetectionModels should make no assumptions about
-the input size or aspect ratio --- they are responsible for doing any
-resize/reshaping necessary (see docstring for the preprocess function).
-Output classes are always integers in the range [0, num_classes). Any mapping
-of these integers to semantic labels is to be handled outside of this class.
-
-By default, DetectionModels produce bounding box detections; However, we support
-a handful of auxiliary annotations associated with each bounding box, namely,
-instance masks and keypoints.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-from object_detection.core import standard_fields as fields
-
-
-class DetectionModel(object):
- """Abstract base class for detection models."""
- __metaclass__ = ABCMeta
-
- def __init__(self, num_classes):
- """Constructor.
-
- Args:
- num_classes: number of classes. Note that num_classes *does not* include
- background categories that might be implicitly be predicted in various
- implementations.
- """
- self._num_classes = num_classes
- self._groundtruth_lists = {}
-
- @property
- def num_classes(self):
- return self._num_classes
-
- def groundtruth_lists(self, field):
- """Access list of groundtruth tensors.
-
- Args:
- field: a string key, options are
- fields.BoxListFields.{boxes,classes,masks,keypoints}
-
- Returns:
- a list of tensors holding groundtruth information (see also
- provide_groundtruth function below), with one entry for each image in the
- batch.
- Raises:
- RuntimeError: if the field has not been provided via provide_groundtruth.
- """
- if field not in self._groundtruth_lists:
- raise RuntimeError('Groundtruth tensor %s has not been provided', field)
- return self._groundtruth_lists[field]
-
- def groundtruth_has_field(self, field):
- """Determines whether the groundtruth includes the given field.
-
- Args:
- field: a string key, options are
- fields.BoxListFields.{boxes,classes,masks,keypoints}
-
- Returns:
- True if the groundtruth includes the given field, False otherwise.
- """
- return field in self._groundtruth_lists
-
- @abstractmethod
- def preprocess(self, inputs):
- """Input preprocessing.
-
- To be overridden by implementations.
-
- This function is responsible for any scaling/shifting of input values that
- is necessary prior to running the detector on an input image.
- It is also responsible for any resizing that might be necessary as images
- are assumed to arrive in arbitrary sizes. While this function could
- conceivably be part of the predict method (below), it is often convenient
- to keep these separate --- for example, we may want to preprocess on one
- device, place onto a queue, and let another device (e.g., the GPU) handle
- prediction.
-
- A few important notes about the preprocess function:
- + We assume that this operation does not have any trainable variables nor
- does it affect the groundtruth annotations in any way (thus data
- augmentation operations such as random cropping should be performed
- externally).
- + There is no assumption that the batchsize in this function is the same as
- the batch size in the predict function. In fact, we recommend calling the
- preprocess function prior to calling any batching operations (which should
- happen outside of the model) and thus assuming that batch sizes are equal
- to 1 in the preprocess function.
- + There is also no explicit assumption that the output resolutions
- must be fixed across inputs --- this is to support "fully convolutional"
- settings in which input images can have different shapes/resolutions.
-
- Args:
- inputs: a [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: a [batch, height_out, width_out, channels] float32
- tensor representing a batch of images.
- """
- pass
-
- @abstractmethod
- def predict(self, preprocessed_inputs):
- """Predict prediction tensors from inputs tensor.
-
- Outputs of this function can be passed to loss or postprocess functions.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float32 tensor
- representing a batch of images.
-
- Returns:
- prediction_dict: a dictionary holding prediction tensors to be
- passed to the Loss or Postprocess functions.
- """
- pass
-
- @abstractmethod
- def postprocess(self, prediction_dict, **params):
- """Convert predicted output tensors to final detections.
-
- Outputs adhere to the following conventions:
- * Classes are integers in [0, num_classes); background classes are removed
- and the first non-background class is mapped to 0. If the model produces
- class-agnostic detections, then no output is produced for classes.
- * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
- format and normalized relative to the image window.
- * `num_detections` is provided for settings where detections are padded to a
- fixed number of boxes.
- * We do not specifically assume any kind of probabilistic interpretation
- of the scores --- the only important thing is their relative ordering.
- Thus implementations of the postprocess function are free to output
- logits, probabilities, calibrated probabilities, or anything else.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors.
- **params: Additional keyword arguments for specific implementations of
- DetectionModel.
-
- Returns:
- detections: a dictionary containing the following fields
- detection_boxes: [batch, max_detections, 4]
- detection_scores: [batch, max_detections]
- detection_classes: [batch, max_detections]
- (If a model is producing class-agnostic detections, this field may be
- missing)
- instance_masks: [batch, max_detections, image_height, image_width]
- (optional)
- keypoints: [batch, max_detections, num_keypoints, 2] (optional)
- num_detections: [batch]
- """
- pass
-
- @abstractmethod
- def loss(self, prediction_dict):
- """Compute scalar loss tensors with respect to provided groundtruth.
-
- Calling this function requires that groundtruth tensors have been
- provided via the provide_groundtruth function.
-
- Args:
- prediction_dict: a dictionary holding predicted tensors
-
- Returns:
- a dictionary mapping strings (loss names) to scalar tensors representing
- loss values.
- """
- pass
-
- def provide_groundtruth(self,
- groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list=None,
- groundtruth_keypoints_list=None):
- """Provide groundtruth tensors.
-
- Args:
- groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
- [num_boxes, 4] containing coordinates of the groundtruth boxes.
- Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
- format and assumed to be normalized and clipped
- relative to the image window with y_min <= y_max and x_min <= x_max.
- groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
- tensors of shape [num_boxes, num_classes] containing the class targets
- with the 0th index assumed to map to the first non-background class.
- groundtruth_masks_list: a list of 3-D tf.float32 tensors of
- shape [num_boxes, height_in, width_in] containing instance
- masks with values in {0, 1}. If None, no masks are provided.
- Mask resolution `height_in`x`width_in` must agree with the resolution
- of the input image tensor provided to the `preprocess` function.
- groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
- shape [num_boxes, num_keypoints, 2] containing keypoints.
- Keypoints are assumed to be provided in normalized coordinates and
- missing keypoints should be encoded as NaN.
- """
- self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
- self._groundtruth_lists[
- fields.BoxListFields.classes] = groundtruth_classes_list
- if groundtruth_masks_list:
- self._groundtruth_lists[
- fields.BoxListFields.masks] = groundtruth_masks_list
- if groundtruth_keypoints_list:
- self._groundtruth_lists[
- fields.BoxListFields.keypoints] = groundtruth_keypoints_list
-
- @abstractmethod
- def restore_map(self, from_detection_checkpoint=True):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Returns a map of variable names to load from a checkpoint to variables in
- the model graph. This enables the model to initialize based on weights from
- another task. For example, the feature extractor variables from a
- classification model can be used to bootstrap training of an object
- detector. When loading from an object detection model, the checkpoint model
- should have the same parameters as this detection model with exception of
- the num_classes parameter.
-
- Args:
- from_detection_checkpoint: whether to restore from a full detection
- checkpoint (with compatible variable names) or to restore from a
- classification checkpoint for initialization prior to training.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- pass
diff --git a/object_detection/core/post_processing.py b/object_detection/core/post_processing.py
deleted file mode 100644
index d34f0683..00000000
--- a/object_detection/core/post_processing.py
+++ /dev/null
@@ -1,395 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Post-processing operations on detected boxes."""
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import standard_fields as fields
-
-
-def multiclass_non_max_suppression(boxes,
- scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size=0,
- clip_window=None,
- change_coordinate_frame=False,
- masks=None,
- additional_fields=None,
- scope=None):
- """Multi-class version of non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. It operates independently for each class for
- which scores are provided (via the scores field of the input box_list),
- pruning boxes with score less than a provided threshold prior to
- applying NMS.
-
- Please note that this operation is performed on *all* classes, therefore any
- background classes should be removed prior to calling this function.
-
- Args:
- boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
- number of classes or 1 depending on whether a separate box is predicted
- per class.
- scores: A [k, num_classes] float32 tensor containing the scores for each of
- the k detections.
- score_thresh: scalar threshold for score (low scoring boxes are removed).
- iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
- with previously selected boxes are removed).
- max_size_per_class: maximum number of retained boxes per class.
- max_total_size: maximum number of boxes retained over all classes. By
- default returns all boxes retained after capping boxes per class.
- clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
- representing the window to clip and normalize boxes to before performing
- non-max suppression.
- change_coordinate_frame: Whether to normalize coordinates after clipping
- relative to clip_window (this can only be set to True if a clip_window
- is provided)
- masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
- containing box masks. `q` can be either number of classes or 1 depending
- on whether a separate mask is predicted per class.
- additional_fields: (optional) If not None, a dictionary that maps keys to
- tensors whose first dimensions are all of size `k`. After non-maximum
- suppression, all tensors corresponding to the selected boxes will be
- added to resulting BoxList.
- scope: name scope.
-
- Returns:
- a BoxList holding M boxes with a rank-1 scores field representing
- corresponding scores for each box with scores sorted in decreasing order
- and a rank-1 classes field representing a class label for each box.
-
- Raises:
- ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
- a valid scores field.
- """
- if not 0 <= iou_thresh <= 1.0:
- raise ValueError('iou_thresh must be between 0 and 1')
- if scores.shape.ndims != 2:
- raise ValueError('scores field must be of rank 2')
- if scores.shape[1].value is None:
- raise ValueError('scores must have statically defined second '
- 'dimension')
- if boxes.shape.ndims != 3:
- raise ValueError('boxes must be of rank 3.')
- if not (boxes.shape[1].value == scores.shape[1].value or
- boxes.shape[1].value == 1):
- raise ValueError('second dimension of boxes must be either 1 or equal '
- 'to the second dimension of scores')
- if boxes.shape[2].value != 4:
- raise ValueError('last dimension of boxes must be of size 4.')
- if change_coordinate_frame and clip_window is None:
- raise ValueError('if change_coordinate_frame is True, then a clip_window'
- 'must be specified.')
-
- with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
- num_boxes = tf.shape(boxes)[0]
- num_scores = tf.shape(scores)[0]
- num_classes = scores.get_shape()[1]
-
- length_assert = tf.Assert(
- tf.equal(num_boxes, num_scores),
- ['Incorrect scores field length: actual vs expected.',
- num_scores, num_boxes])
-
- selected_boxes_list = []
- per_class_boxes_list = tf.unstack(boxes, axis=1)
- if masks is not None:
- per_class_masks_list = tf.unstack(masks, axis=1)
- boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
- else [0] * num_classes)
- for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
- per_class_boxes = per_class_boxes_list[boxes_idx]
- boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
- with tf.control_dependencies([length_assert]):
- class_scores = tf.reshape(
- tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
- boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
- class_scores)
- if masks is not None:
- per_class_masks = per_class_masks_list[boxes_idx]
- boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
- per_class_masks)
- if additional_fields is not None:
- for key, tensor in additional_fields.items():
- boxlist_and_class_scores.add_field(key, tensor)
- boxlist_filtered = box_list_ops.filter_greater_than(
- boxlist_and_class_scores, score_thresh)
- if clip_window is not None:
- boxlist_filtered = box_list_ops.clip_to_window(
- boxlist_filtered, clip_window)
- if change_coordinate_frame:
- boxlist_filtered = box_list_ops.change_coordinate_frame(
- boxlist_filtered, clip_window)
- max_selection_size = tf.minimum(max_size_per_class,
- boxlist_filtered.num_boxes())
- selected_indices = tf.image.non_max_suppression(
- boxlist_filtered.get(),
- boxlist_filtered.get_field(fields.BoxListFields.scores),
- max_selection_size,
- iou_threshold=iou_thresh)
- nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
- nms_result.add_field(
- fields.BoxListFields.classes, (tf.zeros_like(
- nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
- selected_boxes_list.append(nms_result)
- selected_boxes = box_list_ops.concatenate(selected_boxes_list)
- sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
- fields.BoxListFields.scores)
- if max_total_size:
- max_total_size = tf.minimum(max_total_size,
- sorted_boxes.num_boxes())
- sorted_boxes = box_list_ops.gather(sorted_boxes,
- tf.range(max_total_size))
- return sorted_boxes
-
-
-def batch_multiclass_non_max_suppression(boxes,
- scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size=0,
- clip_window=None,
- change_coordinate_frame=False,
- num_valid_boxes=None,
- masks=None,
- additional_fields=None,
- scope=None,
- parallel_iterations=32):
- """Multi-class version of non maximum suppression that operates on a batch.
-
- This op is similar to `multiclass_non_max_suppression` but operates on a batch
- of boxes and scores. See documentation for `multiclass_non_max_suppression`
- for details.
-
- Args:
- boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
- detections. If `q` is 1 then same boxes are used for all classes
- otherwise, if `q` is equal to number of classes, class-specific boxes
- are used.
- scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
- the scores for each of the `num_anchors` detections.
- score_thresh: scalar threshold for score (low scoring boxes are removed).
- iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
- with previously selected boxes are removed).
- max_size_per_class: maximum number of retained boxes per class.
- max_total_size: maximum number of boxes retained over all classes. By
- default returns all boxes retained after capping boxes per class.
- clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
- representing the window to clip boxes to before performing non-max
- suppression.
- change_coordinate_frame: Whether to normalize coordinates after clipping
- relative to clip_window (this can only be set to True if a clip_window
- is provided)
- num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
- [batch_size] representing the number of valid boxes to be considered
- for each image in the batch. This parameter allows for ignoring zero
- paddings.
- masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
- float32 tensor containing box masks. `q` can be either number of classes
- or 1 depending on whether a separate mask is predicted per class.
- additional_fields: (optional) If not None, a dictionary that maps keys to
- tensors whose dimensions are [batch_size, num_anchors, ...].
- scope: tf scope name.
- parallel_iterations: (optional) number of batch items to process in
- parallel.
-
- Returns:
- 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
- containing the non-max suppressed boxes.
- 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
- the scores for the boxes.
- 'nmsed_classes': A [batch_size, max_detections] float32 tensor
- containing the class for boxes.
- 'nmsed_masks': (optional) a
- [batch_size, max_detections, mask_height, mask_width] float32 tensor
- containing masks for each selected box. This is set to None if input
- `masks` is None.
- 'nmsed_additional_fields': (optional) a dictionary of
- [batch_size, max_detections, ...] float32 tensors corresponding to the
- tensors specified in the input `additional_fields`. This is not returned
- if input `additional_fields` is None.
- 'num_detections': A [batch_size] int32 tensor indicating the number of
- valid detections per batch item. Only the top num_detections[i] entries in
- nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
- entries are zero paddings.
-
- Raises:
- ValueError: if `q` in boxes.shape is not 1 or not equal to number of
- classes as inferred from scores.shape.
- """
- q = boxes.shape[2].value
- num_classes = scores.shape[2].value
- if q != 1 and q != num_classes:
- raise ValueError('third dimension of boxes must be either 1 or equal '
- 'to the third dimension of scores')
-
- original_masks = masks
- original_additional_fields = additional_fields
- with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
- boxes_shape = boxes.shape
- batch_size = boxes_shape[0].value
- num_anchors = boxes_shape[1].value
-
- if batch_size is None:
- batch_size = tf.shape(boxes)[0]
- if num_anchors is None:
- num_anchors = tf.shape(boxes)[1]
-
- # If num valid boxes aren't provided, create one and mark all boxes as
- # valid.
- if num_valid_boxes is None:
- num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors
-
- # If masks aren't provided, create dummy masks so we can only have one copy
- # of _single_image_nms_fn and discard the dummy masks after map_fn.
- if masks is None:
- masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
- masks = tf.zeros(masks_shape)
-
- if additional_fields is None:
- additional_fields = {}
-
- def _single_image_nms_fn(args):
- """Runs NMS on a single image and returns padded output.
-
- Args:
- args: A list of tensors consisting of the following:
- per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
- detections. If `q` is 1 then same boxes are used for all classes
- otherwise, if `q` is equal to number of classes, class-specific
- boxes are used.
- per_image_scores - A [num_anchors, num_classes] float32 tensor
- containing the scores for each of the `num_anchors` detections.
- per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
- tensor containing box masks. `q` can be either number of classes
- or 1 depending on whether a separate mask is predicted per class.
- per_image_additional_fields - (optional) A variable number of float32
- tensors each with size [num_anchors, ...].
- per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
- shape [batch_size] representing the number of valid boxes to be
- considered for each image in the batch. This parameter allows for
- ignoring zero paddings.
-
- Returns:
- 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
- non-max suppressed boxes.
- 'nmsed_scores': A [max_detections] float32 tensor containing the scores
- for the boxes.
- 'nmsed_classes': A [max_detections] float32 tensor containing the class
- for boxes.
- 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
- float32 tensor containing masks for each selected box. This is set to
- None if input `masks` is None.
- 'nmsed_additional_fields': (optional) A variable number of float32
- tensors each with size [max_detections, ...] corresponding to the
- input `per_image_additional_fields`.
- 'num_detections': A [batch_size] int32 tensor indicating the number of
- valid detections per batch item. Only the top num_detections[i]
- entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
- rest of the entries are zero paddings.
- """
- per_image_boxes = args[0]
- per_image_scores = args[1]
- per_image_masks = args[2]
- per_image_additional_fields = {
- key: value
- for key, value in zip(additional_fields, args[3:-1])
- }
- per_image_num_valid_boxes = args[-1]
- per_image_boxes = tf.reshape(
- tf.slice(per_image_boxes, 3 * [0],
- tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
- per_image_scores = tf.reshape(
- tf.slice(per_image_scores, [0, 0],
- tf.stack([per_image_num_valid_boxes, -1])),
- [-1, num_classes])
- per_image_masks = tf.reshape(
- tf.slice(per_image_masks, 4 * [0],
- tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
- [-1, q, per_image_masks.shape[2].value,
- per_image_masks.shape[3].value])
- if per_image_additional_fields is not None:
- for key, tensor in per_image_additional_fields.items():
- additional_field_shape = tensor.get_shape()
- additional_field_dim = len(additional_field_shape)
- per_image_additional_fields[key] = tf.reshape(
- tf.slice(per_image_additional_fields[key],
- additional_field_dim * [0],
- tf.stack([per_image_num_valid_boxes] +
- (additional_field_dim - 1) * [-1])),
- [-1] + [dim.value for dim in additional_field_shape[1:]])
- nmsed_boxlist = multiclass_non_max_suppression(
- per_image_boxes,
- per_image_scores,
- score_thresh,
- iou_thresh,
- max_size_per_class,
- max_total_size,
- clip_window=clip_window,
- change_coordinate_frame=change_coordinate_frame,
- masks=per_image_masks,
- additional_fields=per_image_additional_fields)
- padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
- max_total_size)
- num_detections = nmsed_boxlist.num_boxes()
- nmsed_boxes = padded_boxlist.get()
- nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
- nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
- nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
- nmsed_additional_fields = [
- padded_boxlist.get_field(key) for key in per_image_additional_fields
- ]
- return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
- nmsed_additional_fields + [num_detections])
-
- num_additional_fields = 0
- if additional_fields is not None:
- num_additional_fields = len(additional_fields)
- num_nmsed_outputs = 4 + num_additional_fields
-
- batch_outputs = tf.map_fn(
- _single_image_nms_fn,
- elems=([boxes, scores, masks] + list(additional_fields.values()) +
- [num_valid_boxes]),
- dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
- parallel_iterations=parallel_iterations)
-
- batch_nmsed_boxes = batch_outputs[0]
- batch_nmsed_scores = batch_outputs[1]
- batch_nmsed_classes = batch_outputs[2]
- batch_nmsed_masks = batch_outputs[3]
- batch_nmsed_additional_fields = {
- key: value
- for key, value in zip(additional_fields, batch_outputs[4:-1])
- }
- batch_num_detections = batch_outputs[-1]
-
- if original_masks is None:
- batch_nmsed_masks = None
-
- if original_additional_fields is None:
- batch_nmsed_additional_fields = None
-
- return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
- batch_nmsed_masks, batch_nmsed_additional_fields,
- batch_num_detections)
diff --git a/object_detection/core/post_processing_test.py b/object_detection/core/post_processing_test.py
deleted file mode 100644
index 542f8e18..00000000
--- a/object_detection/core/post_processing_test.py
+++ /dev/null
@@ -1,959 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for tensorflow_models.object_detection.core.post_processing."""
-import numpy as np
-import tensorflow as tf
-from object_detection.core import post_processing
-from object_detection.core import standard_fields as fields
-
-
-class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
-
- def test_with_invalid_scores_size(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]]], tf.float32)
- scores = tf.constant([[.9], [.75], [.6], [.95], [.5]])
- iou_thresh = .5
- score_thresh = 0.6
- max_output_size = 3
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- with self.assertRaisesWithPredicateMatch(
- tf.errors.InvalidArgumentError, 'Incorrect scores field length'):
- sess.run(nms.get())
-
- def test_multiclass_nms_select_with_shared_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- num_keypoints = 6
- keypoints = tf.tile(
- tf.reshape(tf.range(8), [8, 1, 1]),
- [1, num_keypoints, 2])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_keypoints_tensor = tf.tile(
- tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
- [1, num_keypoints, 2])
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={
- fields.BoxListFields.keypoints: keypoints})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_keypoints,
- exp_nms_keypoints) = sess.run([
- nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.keypoints),
- exp_nms_keypoints_tensor
- ])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
-
- def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
-
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
-
- num_boxes = tf.shape(boxes)[0]
- heatmap_height = 5
- heatmap_width = 5
- num_keypoints = 17
- keypoint_heatmaps = tf.ones(
- [num_boxes, heatmap_height, heatmap_width, num_keypoints],
- dtype=tf.float32)
-
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
-
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_keypoint_heatmaps = np.ones(
- (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={
- fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_keypoint_heatmaps) = sess.run(
- [nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
-
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
-
- def test_multiclass_nms_with_additional_fields(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
-
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
-
- coarse_boxes_key = 'coarse_boxes'
- coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
- [0.1, 0.2, 1.1, 1.2],
- [0.1, -0.2, 1.1, 1.0],
- [0.1, 10.1, 1.1, 11.1],
- [0.1, 10.2, 1.1, 11.2],
- [0.1, 100.1, 1.1, 101.1],
- [0.1, 1000.1, 1.1, 1002.1],
- [0.1, 1000.1, 1.1, 1002.2]], tf.float32)
-
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]], dtype=np.float32)
-
- exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
- [0.1, 0.1, 1.1, 1.1],
- [0.1, 1000.1, 1.1, 1002.1],
- [0.1, 100.1, 1.1, 101.1]],
- dtype=np.float32)
-
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- additional_fields={coarse_boxes_key: coarse_boxes})
-
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_coarse_corners) = sess.run(
- [nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(coarse_boxes_key)])
-
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
-
- def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- num_classes = 2
- mask_height = 3
- mask_width = 3
- masks = tf.tile(
- tf.reshape(tf.range(8), [8, 1, 1, 1]),
- [1, num_classes, mask_height, mask_width])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
- exp_nms_masks_tensor = tf.tile(
- tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
- [1, mask_height, mask_width])
-
- nms = post_processing.multiclass_non_max_suppression(boxes, scores,
- score_thresh,
- iou_thresh,
- max_output_size,
- masks=masks)
- with self.test_session() as sess:
- (nms_corners_output,
- nms_scores_output,
- nms_classes_output,
- nms_masks,
- exp_nms_masks) = sess.run([nms.get(),
- nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes),
- nms.get_field(fields.BoxListFields.masks),
- exp_nms_masks_tensor])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
- self.assertAllEqual(nms_masks, exp_nms_masks)
-
- def test_multiclass_nms_select_with_clip_window(self):
- boxes = tf.constant([[[0, 0, 10, 10]],
- [[1, 1, 11, 11]]], tf.float32)
- scores = tf.constant([[.9], [.75]])
- clip_window = tf.constant([5, 4, 8, 7], tf.float32)
- score_thresh = 0.0
- iou_thresh = 0.5
- max_output_size = 100
-
- exp_nms_corners = [[5, 4, 8, 7]]
- exp_nms_scores = [.9]
- exp_nms_classes = [0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- clip_window=clip_window)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
- boxes = tf.constant([[[0, 0, 10, 10]],
- [[1, 1, 11, 11]]], tf.float32)
- scores = tf.constant([[.9], [.75]])
- clip_window = tf.constant([5, 4, 8, 7], tf.float32)
- score_thresh = 0.0
- iou_thresh = 0.5
- max_output_size = 100
-
- exp_nms_corners = [[0, 0, 1, 1]]
- exp_nms_scores = [.9]
- exp_nms_classes = [0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size,
- clip_window=clip_window, change_coordinate_frame=True)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_per_class_cap(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_size_per_class = 2
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 1000, 1, 1002]]
- exp_nms_scores = [.95, .9, .85]
- exp_nms_classes = [0, 0, 1]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_size_per_class)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_select_with_total_cap(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_size_per_class = 4
- max_total_size = 2
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1]]
- exp_nms_scores = [.95, .9]
- exp_nms_classes = [0, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_size_per_class,
- max_total_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1]],
- [[0, 0.1, 1, 1.1]],
- [[0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101]],
- [[0, 1000, 1, 1002]],
- [[0, 1000, 1, 1002.1]]], tf.float32)
- scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 3
-
- exp_nms = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 100, 1, 101]]
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_output = sess.run(nms.get())
- self.assertAllClose(nms_output, exp_nms)
-
- def test_multiclass_nms_select_with_separate_boxes(self):
- boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
- tf.float32)
- scores = tf.constant([[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 999, 2, 1004],
- [0, 100, 1, 101]]
- exp_nms_scores = [.95, .9, .85, .3]
- exp_nms_classes = [0, 0, 1, 0]
-
- nms = post_processing.multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh, max_output_size)
- with self.test_session() as sess:
- nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
- [nms.get(), nms.get_field(fields.BoxListFields.scores),
- nms.get_field(fields.BoxListFields.classes)])
- self.assertAllClose(nms_corners_output, exp_nms_corners)
- self.assertAllClose(nms_scores_output, exp_nms_scores)
- self.assertAllClose(nms_classes_output, exp_nms_classes)
-
- def test_batch_multiclass_nms_with_batch_size_1(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]],
- [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0],
- [.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 999, 2, 1004],
- [0, 100, 1, 101]]]
- exp_nms_scores = [[.95, .9, .85, .3]]
- exp_nms_classes = [[0, 0, 1, 0]]
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertEqual(num_detections, [4])
-
- def test_batch_multiclass_nms_with_batch_size_2(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size)
-
- self.assertIsNone(nmsed_masks)
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(),
- exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(),
- exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(),
- exp_nms_classes.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
-
- def test_batch_multiclass_nms_with_masks(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_masks = np.array([[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_additional_fields(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_additional_fields = {
- 'keypoints': np.array([[[[0, 0], [0, 0]],
- [[6, 7], [8, 9]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[10, 11], [12, 13]],
- [[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[0, 0], [0, 0]]]])
- }
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
- self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
- self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
- self.assertEqual(len(nmsed_additional_fields),
- len(exp_nms_additional_fields))
- for key in exp_nms_additional_fields:
- self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
- exp_nms_additional_fields[key].shape)
- self.assertEqual(num_detections.shape.as_list(), [2])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [2, 3])
-
- def test_batch_multiclass_nms_with_dynamic_batch_size(self):
- boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
- scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
- masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2))
-
- boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]])
- scores = np.array([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]])
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = np.array([[[0, 10, 1, 11],
- [0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 999, 2, 1004],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101],
- [0, 0, 0, 0]]])
- exp_nms_scores = np.array([[.95, .9, 0, 0],
- [.85, .5, .3, 0]])
- exp_nms_classes = np.array([[0, 0, 0, 0],
- [1, 0, 0, 0]])
- exp_nms_masks = np.array([[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]])
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- masks=masks_placeholder)
-
- self.assertIsNone(nmsed_additional_fields)
- # Check static shapes
- self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
- self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4])
- self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2])
- self.assertEqual(num_detections.shape.as_list(), [None])
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections],
- feed_dict={boxes_placeholder: boxes,
- scores_placeholder: scores,
- masks_placeholder: masks})
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [2, 3])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
- [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
- [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
- [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
- [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
- [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
- [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
- [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
- tf.float32)
- num_valid_boxes = tf.constant([1, 1], tf.int32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_nms_scores = [[.9, 0, 0, 0],
- [.5, 0, 0, 0]]
- exp_nms_classes = [[0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_nms_masks = [[[[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[8, 9], [10, 11]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]]]
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes, masks=masks)
-
- self.assertIsNone(nmsed_additional_fields)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_masks, num_detections])
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- self.assertAllClose(num_detections, [1, 1])
- self.assertAllClose(nmsed_masks, exp_nms_masks)
-
- def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
- self):
- boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
- [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
- [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
- [[0, 10, 1, 11], [0, 10, 1, 11]]],
- [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
- [[0, 100, 1, 101], [0, 100, 1, 101]],
- [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
- [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
- tf.float32)
- scores = tf.constant([[[.9, 0.01], [.75, 0.05],
- [.6, 0.01], [.95, 0]],
- [[.5, 0.01], [.3, 0.01],
- [.01, .85], [.01, .5]]])
- additional_fields = {
- 'keypoints': tf.constant(
- [[[[6, 7], [8, 9]],
- [[0, 1], [2, 3]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[8, 9], [10, 11]],
- [[10, 11], [12, 13]],
- [[0, 0], [0, 0]]]],
- tf.float32)
- }
- num_valid_boxes = tf.constant([1, 1], tf.int32)
- score_thresh = 0.1
- iou_thresh = .5
- max_output_size = 4
-
- exp_nms_corners = [[[0, 0, 1, 1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 10.1, 1, 11.1],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_nms_scores = [[.9, 0, 0, 0],
- [.5, 0, 0, 0]]
- exp_nms_classes = [[0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_nms_additional_fields = {
- 'keypoints': np.array([[[[6, 7], [8, 9]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]],
- [[[13, 14], [15, 16]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]],
- [[0, 0], [0, 0]]]])
- }
-
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
- nmsed_additional_fields, num_detections
- ) = post_processing.batch_multiclass_non_max_suppression(
- boxes, scores, score_thresh, iou_thresh,
- max_size_per_class=max_output_size, max_total_size=max_output_size,
- num_valid_boxes=num_valid_boxes,
- additional_fields=additional_fields)
-
- self.assertIsNone(nmsed_masks)
-
- with self.test_session() as sess:
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
- num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
- nmsed_additional_fields, num_detections])
-
- self.assertAllClose(nmsed_boxes, exp_nms_corners)
- self.assertAllClose(nmsed_scores, exp_nms_scores)
- self.assertAllClose(nmsed_classes, exp_nms_classes)
- for key in exp_nms_additional_fields:
- self.assertAllClose(nmsed_additional_fields[key],
- exp_nms_additional_fields[key])
- self.assertAllClose(num_detections, [1, 1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/prefetcher.py b/object_detection/core/prefetcher.py
deleted file mode 100644
index e690c599..00000000
--- a/object_detection/core/prefetcher.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Provides functions to prefetch tensors to feed into models."""
-import tensorflow as tf
-
-
-def prefetch(tensor_dict, capacity):
- """Creates a prefetch queue for tensors.
-
- Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
- dequeue op that evaluates to a tensor_dict. This function is useful in
- prefetching preprocessed tensors so that the data is readily available for
- consumers.
-
- Example input pipeline when you don't need batching:
- ----------------------------------------------------
- key, string_tensor = slim.parallel_reader.parallel_read(...)
- tensor_dict = decoder.decode(string_tensor)
- tensor_dict = preprocessor.preprocess(tensor_dict, ...)
- prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
- tensor_dict = prefetch_queue.dequeue()
- outputs = Model(tensor_dict)
- ...
- ----------------------------------------------------
-
- For input pipelines with batching, refer to core/batcher.py
-
- Args:
- tensor_dict: a dictionary of tensors to prefetch.
- capacity: the size of the prefetch queue.
-
- Returns:
- a FIFO prefetcher queue
- """
- names = list(tensor_dict.keys())
- dtypes = [t.dtype for t in tensor_dict.values()]
- shapes = [t.get_shape() for t in tensor_dict.values()]
- prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
- shapes=shapes,
- names=names,
- name='prefetch_queue')
- enqueue_op = prefetch_queue.enqueue(tensor_dict)
- tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
- prefetch_queue, [enqueue_op]))
- tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
- capacity),
- tf.to_float(prefetch_queue.size()) * (1. / capacity))
- return prefetch_queue
diff --git a/object_detection/core/prefetcher_test.py b/object_detection/core/prefetcher_test.py
deleted file mode 100644
index 63f557e3..00000000
--- a/object_detection/core/prefetcher_test.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.prefetcher."""
-import tensorflow as tf
-
-from object_detection.core import prefetcher
-
-slim = tf.contrib.slim
-
-
-class PrefetcherTest(tf.test.TestCase):
-
- def test_prefetch_tensors_with_fully_defined_shapes(self):
- with self.test_session() as sess:
- batch_size = 10
- image_size = 32
- num_batches = 5
- examples = tf.Variable(tf.constant(0, dtype=tf.int64))
- counter = examples.count_up_to(num_batches)
- image = tf.random_normal([batch_size, image_size,
- image_size, 3],
- dtype=tf.float32,
- name='images')
- label = tf.random_uniform([batch_size, 1], 0, 10,
- dtype=tf.int32, name='labels')
-
- prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
- 'image': image,
- 'label': label},
- capacity=100)
- tensor_dict = prefetch_queue.dequeue()
-
- self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
- [batch_size, image_size, image_size, 3])
- self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
- [batch_size, 1])
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- for _ in range(num_batches):
- results = sess.run(tensor_dict)
- self.assertEquals(results['image'].shape,
- (batch_size, image_size, image_size, 3))
- self.assertEquals(results['label'].shape, (batch_size, 1))
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(tensor_dict)
-
- def test_prefetch_tensors_with_partially_defined_shapes(self):
- with self.test_session() as sess:
- batch_size = 10
- image_size = 32
- num_batches = 5
- examples = tf.Variable(tf.constant(0, dtype=tf.int64))
- counter = examples.count_up_to(num_batches)
- image = tf.random_normal([batch_size,
- tf.Variable(image_size),
- tf.Variable(image_size), 3],
- dtype=tf.float32,
- name='image')
- image.set_shape([batch_size, None, None, 3])
- label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
- 10, dtype=tf.int32, name='label')
- label.set_shape([batch_size, None])
-
- prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
- 'image': image,
- 'label': label},
- capacity=100)
- tensor_dict = prefetch_queue.dequeue()
-
- self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
- [batch_size, None, None, 3])
- self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
- [batch_size, None])
-
- tf.initialize_all_variables().run()
- with slim.queues.QueueRunners(sess):
- for _ in range(num_batches):
- results = sess.run(tensor_dict)
- self.assertEquals(results['image'].shape,
- (batch_size, image_size, image_size, 3))
- self.assertEquals(results['label'].shape, (batch_size, 1))
- with self.assertRaises(tf.errors.OutOfRangeError):
- sess.run(tensor_dict)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/preprocessor.py b/object_detection/core/preprocessor.py
deleted file mode 100644
index 33435f7b..00000000
--- a/object_detection/core/preprocessor.py
+++ /dev/null
@@ -1,2562 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Preprocess images and bounding boxes for detection.
-
-We perform two sets of operations in preprocessing stage:
-(a) operations that are applied to both training and testing data,
-(b) operations that are applied only to training data for the purpose of
- data augmentation.
-
-A preprocessing function receives a set of inputs,
-e.g. an image and bounding boxes,
-performs an operation on them, and returns them.
-Some examples are: randomly cropping the image, randomly mirroring the image,
- randomly changing the brightness, contrast, hue and
- randomly jittering the bounding boxes.
-
-The preprocess function receives a tensor_dict which is a dictionary that maps
-different field names to their tensors. For example,
-tensor_dict[fields.InputDataFields.image] holds the image tensor.
-The image is a rank 4 tensor: [1, height, width, channels] with
-dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
-in each row there is a box with [ymin xmin ymax xmax].
-Boxes are in normalized coordinates meaning
-their coordinate values range in [0, 1]
-
-Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
-functions receive a rank 3 tensor for processing the image. Thus, inside the
-preprocess function we squeeze the image to become a rank 3 tensor and then
-we pass it to the functions. At the end of the preprocess we expand the image
-back to rank 4.
-"""
-
-import sys
-import tensorflow as tf
-
-from tensorflow.python.ops import control_flow_ops
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import keypoint_ops
-from object_detection.core import standard_fields as fields
-
-
-def _apply_with_random_selector(x, func, num_cases):
- """Computes func(x, sel), with sel sampled from [0...num_cases-1].
-
- Args:
- x: input Tensor.
- func: Python function to apply.
- num_cases: Python int32, number of cases to sample sel from.
-
- Returns:
- The result of func(x, sel), where func receives the value of the
- selector as a python integer, but sel is sampled dynamically.
- """
- rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
- # Pass the real x only to one of the func calls.
- return control_flow_ops.merge([func(
- control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
- for case in range(num_cases)])[0]
-
-
-def _apply_with_random_selector_tuples(x, func, num_cases):
- """Computes func(x, sel), with sel sampled from [0...num_cases-1].
-
- Args:
- x: A tuple of input tensors.
- func: Python function to apply.
- num_cases: Python int32, number of cases to sample sel from.
-
- Returns:
- The result of func(x, sel), where func receives the value of the
- selector as a python integer, but sel is sampled dynamically.
- """
- num_inputs = len(x)
- rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
- # Pass the real x only to one of the func calls.
-
- tuples = [list() for t in x]
- for case in range(num_cases):
- new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
- output = func(tuple(new_x), case)
- for j in range(num_inputs):
- tuples[j].append(output[j])
-
- for i in range(num_inputs):
- tuples[i] = control_flow_ops.merge(tuples[i])[0]
- return tuple(tuples)
-
-
-def _random_integer(minval, maxval, seed):
- """Returns a random 0-D tensor between minval and maxval.
-
- Args:
- minval: minimum value of the random tensor.
- maxval: maximum value of the random tensor.
- seed: random seed.
-
- Returns:
- A random 0-D tensor between minval and maxval.
- """
- return tf.random_uniform(
- [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
-
-
-def normalize_image(image, original_minval, original_maxval, target_minval,
- target_maxval):
- """Normalizes pixel values in the image.
-
- Moves the pixel values from the current [original_minval, original_maxval]
- range to a the [target_minval, target_maxval] range.
-
- Args:
- image: rank 3 float32 tensor containing 1
- image -> [height, width, channels].
- original_minval: current image minimum value.
- original_maxval: current image maximum value.
- target_minval: target image minimum value.
- target_maxval: target image maximum value.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('NormalizeImage', values=[image]):
- original_minval = float(original_minval)
- original_maxval = float(original_maxval)
- target_minval = float(target_minval)
- target_maxval = float(target_maxval)
- image = tf.to_float(image)
- image = tf.subtract(image, original_minval)
- image = tf.multiply(image, (target_maxval - target_minval) /
- (original_maxval - original_minval))
- image = tf.add(image, target_minval)
- return image
-
-
-def retain_boxes_above_threshold(boxes,
- labels,
- label_scores,
- masks=None,
- keypoints=None,
- threshold=0.0):
- """Retains boxes whose label score is above a given threshold.
-
- If the label score for a box is missing (represented by NaN), the box is
- retained. The boxes that don't pass the threshold will not appear in the
- returned tensor.
-
- Args:
- boxes: float32 tensor of shape [num_instance, 4] representing boxes
- location in normalized coordinates.
- labels: rank 1 int32 tensor of shape [num_instance] containing the object
- classes.
- label_scores: float32 tensor of shape [num_instance] representing the
- score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks are of
- the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
- coordinates.
- threshold: scalar python float.
-
- Returns:
- retained_boxes: [num_retained_instance, 4]
- retianed_labels: [num_retained_instance]
- retained_label_scores: [num_retained_instance]
-
- If masks, or keypoints are not None, the function also returns:
-
- retained_masks: [num_retained_instance, height, width]
- retained_keypoints: [num_retained_instance, num_keypoints, 2]
- """
- with tf.name_scope('RetainBoxesAboveThreshold',
- values=[boxes, labels, label_scores]):
- indices = tf.where(
- tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
- indices = tf.squeeze(indices, axis=1)
- retained_boxes = tf.gather(boxes, indices)
- retained_labels = tf.gather(labels, indices)
- retained_label_scores = tf.gather(label_scores, indices)
- result = [retained_boxes, retained_labels, retained_label_scores]
-
- if masks is not None:
- retained_masks = tf.gather(masks, indices)
- result.append(retained_masks)
-
- if keypoints is not None:
- retained_keypoints = tf.gather(keypoints, indices)
- result.append(retained_keypoints)
-
- return result
-
-
-def _flip_boxes_left_right(boxes):
- """Left-right flip the boxes.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Flipped boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- flipped_xmin = tf.subtract(1.0, xmax)
- flipped_xmax = tf.subtract(1.0, xmin)
- flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
- return flipped_boxes
-
-
-def _flip_boxes_up_down(boxes):
- """Up-down flip the boxes.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Flipped boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- flipped_ymin = tf.subtract(1.0, ymax)
- flipped_ymax = tf.subtract(1.0, ymin)
- flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1)
- return flipped_boxes
-
-
-def _rot90_boxes(boxes):
- """Rotate boxes counter-clockwise by 90 degrees.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
-
- Returns:
- Rotated boxes.
- """
- ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
- rotated_ymin = tf.subtract(1.0, xmax)
- rotated_ymax = tf.subtract(1.0, xmin)
- rotated_xmin = ymin
- rotated_xmax = ymax
- rotated_boxes = tf.concat(
- [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1)
- return rotated_boxes
-
-
-def _flip_masks_left_right(masks):
- """Left-right flip masks.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- flipped masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- return masks[:, :, ::-1]
-
-
-def _flip_masks_up_down(masks):
- """Up-down flip masks.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- flipped masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- return masks[:, ::-1, :]
-
-
-def _rot90_masks(masks):
- """Rotate masks counter-clockwise by 90 degrees.
-
- Args:
- masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
-
- Returns:
- rotated masks: rank 3 float32 tensor with shape
- [num_instances, height, width] representing instance masks.
- """
- masks = tf.transpose(masks, [0, 2, 1])
- return masks[:, ::-1, :]
-
-
-def random_horizontal_flip(image,
- boxes=None,
- masks=None,
- keypoints=None,
- keypoint_flip_permutation=None,
- seed=None):
- """Randomly flips the image and detections horizontally.
-
- The probability of flipping the image is 50%.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation.
- seed: random seed
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: if keypoints are provided but keypoint_flip_permutation is not.
- """
-
- def _flip_image(image):
- # flip image
- image_flipped = tf.image.flip_left_right(image)
- return image_flipped
-
- if keypoints is not None and keypoint_flip_permutation is None:
- raise ValueError(
- 'keypoints are provided but keypoints_flip_permutation is not provided')
-
- with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
- result = []
- # random variable defining whether to do flip or not
- do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
-
- # flip image
- image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None and keypoint_flip_permutation is not None:
- permutation = keypoint_flip_permutation
- keypoints = tf.cond(
- do_a_flip_random,
- lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_vertical_flip(image,
- boxes=None,
- masks=None,
- keypoints=None,
- keypoint_flip_permutation=None,
- seed=None):
- """Randomly flips the image and detections vertically.
-
- The probability of flipping the image is 50%.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
- permutation.
- seed: random seed
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: if keypoints are provided but keypoint_flip_permutation is not.
- """
-
- def _flip_image(image):
- # flip image
- image_flipped = tf.image.flip_up_down(image)
- return image_flipped
-
- if keypoints is not None and keypoint_flip_permutation is None:
- raise ValueError(
- 'keypoints are provided but keypoints_flip_permutation is not provided')
-
- with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
- result = []
- # random variable defining whether to do flip or not
- do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
-
- # flip image
- image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None and keypoint_flip_permutation is not None:
- permutation = keypoint_flip_permutation
- keypoints = tf.cond(
- do_a_flip_random,
- lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_rotation90(image,
- boxes=None,
- masks=None,
- keypoints=None,
- seed=None):
- """Randomly rotates the image and detections 90 degrees counter-clockwise.
-
- The probability of rotating the image is 50%. This can be combined with
- random_horizontal_flip and random_vertical_flip to produce an output with a
- uniform distribution of the eight possible 90 degree rotation / reflection
- combinations.
-
- Args:
- image: rank 3 float32 tensor with shape [height, width, channels].
- boxes: (optional) rank 2 float32 tensor with shape [N, 4]
- containing the bounding boxes.
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- seed: random seed
-
- Returns:
- image: image which is the same shape as input image.
-
- If boxes, masks, and keypoints, are not None,
- the function also returns the following tensors.
-
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def _rot90_image(image):
- # flip image
- image_rotated = tf.image.rot90(image)
- return image_rotated
-
- with tf.name_scope('RandomRotation90', values=[image, boxes]):
- result = []
-
- # random variable defining whether to rotate by 90 degrees or not
- do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
-
- # flip image
- image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
- lambda: image)
- result.append(image)
-
- # flip boxes
- if boxes is not None:
- boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes),
- lambda: boxes)
- result.append(boxes)
-
- # flip masks
- if masks is not None:
- masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks),
- lambda: masks)
- result.append(masks)
-
- # flip keypoints
- if keypoints is not None:
- keypoints = tf.cond(
- do_a_rot90_random,
- lambda: keypoint_ops.rot90(keypoints),
- lambda: keypoints)
- result.append(keypoints)
-
- return tuple(result)
-
-
-def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
- """Scales each value in the pixels of the image.
-
- This function scales each pixel independent of the other ones.
- For each value in image tensor, draws a random number between
- minval and maxval and multiples the values with them.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- minval: lower ratio of scaling pixel values.
- maxval: upper ratio of scaling pixel values.
- seed: random seed.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomPixelValueScale', values=[image]):
- color_coef = tf.random_uniform(
- tf.shape(image),
- minval=minval,
- maxval=maxval,
- dtype=tf.float32,
- seed=seed)
- image = tf.multiply(image, color_coef)
- image = tf.clip_by_value(image, 0.0, 1.0)
-
- return image
-
-
-def random_image_scale(image,
- masks=None,
- min_scale_ratio=0.5,
- max_scale_ratio=2.0,
- seed=None):
- """Scales the image size.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
- masks: (optional) rank 3 float32 tensor containing masks with
- size [height, width, num_masks]. The value is set to None if there are no
- masks.
- min_scale_ratio: minimum scaling ratio.
- max_scale_ratio: maximum scaling ratio.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- masks: If masks is not none, resized masks which are the same rank as input
- masks will be returned.
- """
- with tf.name_scope('RandomImageScale', values=[image]):
- result = []
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
- size_coef = tf.random_uniform([],
- minval=min_scale_ratio,
- maxval=max_scale_ratio,
- dtype=tf.float32, seed=seed)
- image_newysize = tf.to_int32(
- tf.multiply(tf.to_float(image_height), size_coef))
- image_newxsize = tf.to_int32(
- tf.multiply(tf.to_float(image_width), size_coef))
- image = tf.image.resize_images(
- image, [image_newysize, image_newxsize], align_corners=True)
- result.append(image)
- if masks:
- masks = tf.image.resize_nearest_neighbor(
- masks, [image_newysize, image_newxsize], align_corners=True)
- result.append(masks)
- return tuple(result)
-
-
-def random_rgb_to_gray(image, probability=0.1, seed=None):
- """Changes the image from RGB to Grayscale with the given probability.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- probability: the probability of returning a grayscale image.
- The probability should be a number between [0, 1].
- seed: random seed.
-
- Returns:
- image: image which is the same shape as input image.
- """
- def _image_to_gray(image):
- image_gray1 = tf.image.rgb_to_grayscale(image)
- image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
- return image_gray3
-
- with tf.name_scope('RandomRGBtoGray', values=[image]):
- # random variable defining whether to do flip or not
- do_gray_random = tf.random_uniform([], seed=seed)
-
- image = tf.cond(
- tf.greater(do_gray_random, probability), lambda: image,
- lambda: _image_to_gray(image))
-
- return image
-
-
-def random_adjust_brightness(image, max_delta=0.2):
- """Randomly adjusts brightness.
-
- Makes sure the output image is still between 0 and 1.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- max_delta: how much to change the brightness. A value between [0, 1).
-
- Returns:
- image: image which is the same shape as input image.
- boxes: boxes which is the same shape as input boxes.
- """
- with tf.name_scope('RandomAdjustBrightness', values=[image]):
- image = tf.image.random_brightness(image, max_delta)
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
- return image
-
-
-def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
- """Randomly adjusts contrast.
-
- Makes sure the output image is still between 0 and 1.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- min_delta: see max_delta.
- max_delta: how much to change the contrast. Contrast will change with a
- value between min_delta and max_delta. This value will be
- multiplied to the current contrast of the image.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustContrast', values=[image]):
- image = tf.image.random_contrast(image, min_delta, max_delta)
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
- return image
-
-
-def random_adjust_hue(image, max_delta=0.02):
- """Randomly adjusts hue.
-
- Makes sure the output image is still between 0 and 1.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- max_delta: change hue randomly with a value between 0 and max_delta.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustHue', values=[image]):
- image = tf.image.random_hue(image, max_delta)
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
- return image
-
-
-def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
- """Randomly adjusts saturation.
-
- Makes sure the output image is still between 0 and 1.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- min_delta: see max_delta.
- max_delta: how much to change the saturation. Saturation will change with a
- value between min_delta and max_delta. This value will be
- multiplied to the current saturation of the image.
-
- Returns:
- image: image which is the same shape as input image.
- """
- with tf.name_scope('RandomAdjustSaturation', values=[image]):
- image = tf.image.random_saturation(image, min_delta, max_delta)
- image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
- return image
-
-
-def random_distort_color(image, color_ordering=0):
- """Randomly distorts color.
-
- Randomly distorts color using a combination of brightness, hue, contrast
- and saturation changes. Makes sure the output image is still between 0 and 1.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- color_ordering: Python int, a type of distortion (valid values: 0, 1).
-
- Returns:
- image: image which is the same shape as input image.
-
- Raises:
- ValueError: if color_ordering is not in {0, 1}.
- """
- with tf.name_scope('RandomDistortColor', values=[image]):
- if color_ordering == 0:
- image = tf.image.random_brightness(image, max_delta=32. / 255.)
- image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
- image = tf.image.random_hue(image, max_delta=0.2)
- image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
- elif color_ordering == 1:
- image = tf.image.random_brightness(image, max_delta=32. / 255.)
- image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
- image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
- image = tf.image.random_hue(image, max_delta=0.2)
- else:
- raise ValueError('color_ordering must be in {0, 1}')
-
- # The random_* ops do not necessarily clamp.
- image = tf.clip_by_value(image, 0.0, 1.0)
- return image
-
-
-def random_jitter_boxes(boxes, ratio=0.05, seed=None):
- """Randomly jitter boxes in image.
-
- Args:
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- ratio: The ratio of the box width and height that the corners can jitter.
- For example if the width is 100 pixels and ratio is 0.05,
- the corners can jitter up to 5 pixels in the x direction.
- seed: random seed.
-
- Returns:
- boxes: boxes which is the same shape as input boxes.
- """
- def random_jitter_box(box, ratio, seed):
- """Randomly jitter box.
-
- Args:
- box: bounding box [1, 1, 4].
- ratio: max ratio between jittered box and original box,
- a number between [0, 0.5].
- seed: random seed.
-
- Returns:
- jittered_box: jittered box.
- """
- rand_numbers = tf.random_uniform(
- [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
- box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
- box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
- hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
- hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
- jittered_box = tf.add(box, hw_rand_coefs)
- jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
- return jittered_box
-
- with tf.name_scope('RandomJitterBoxes', values=[boxes]):
- # boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
- boxes_shape = tf.shape(boxes)
- boxes = tf.expand_dims(boxes, 1)
- boxes = tf.expand_dims(boxes, 2)
-
- distorted_boxes = tf.map_fn(
- lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
-
- distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
-
- return distorted_boxes
-
-
-def _strict_random_crop_image(image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3):
- """Performs random crop.
-
- Note: boxes will be clipped to the crop. Keypoint coordinates that are
- outside the crop will be set to NaN, which is consistent with the original
- keypoint encoding for non-existing keypoints. This function always crops
- the image and is supposed to be used by `random_crop_image` function which
- sometimes returns image unchanged.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes with shape
- [num_instances, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, masks, or keypoints is not None, the function also returns:
- label_scores: rank 1 float32 tensor with shape [num_instances].
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- with tf.name_scope('RandomCropImage', values=[image, boxes]):
- image_shape = tf.shape(image)
-
- # boxes are [N, 4]. Lets first make them [N, 1, 4].
- boxes_expanded = tf.expand_dims(
- tf.clip_by_value(
- boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
-
- sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
- image_shape,
- bounding_boxes=boxes_expanded,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- max_attempts=100,
- use_image_if_no_bounding_boxes=True)
-
- im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
-
- new_image = tf.slice(image, im_box_begin, im_box_size)
- new_image.set_shape([None, None, image.get_shape()[2]])
-
- # [1, 4]
- im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
- # [4]
- im_box_rank1 = tf.squeeze(im_box)
-
- boxlist = box_list.BoxList(boxes)
- boxlist.add_field('labels', labels)
-
- if label_scores is not None:
- boxlist.add_field('label_scores', label_scores)
-
- im_boxlist = box_list.BoxList(im_box_rank2)
-
- # remove boxes that are outside cropped image
- boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
- boxlist, im_box_rank1)
-
- # remove boxes that are outside image
- overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
- boxlist, im_boxlist, overlap_thresh)
-
- # change the coordinate of the remaining boxes
- new_labels = overlapping_boxlist.get_field('labels')
- new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
- im_box_rank1)
- new_boxes = new_boxlist.get()
- new_boxes = tf.clip_by_value(
- new_boxes, clip_value_min=0.0, clip_value_max=1.0)
-
- result = [new_image, new_boxes, new_labels]
-
- if label_scores is not None:
- new_label_scores = overlapping_boxlist.get_field('label_scores')
- result.append(new_label_scores)
-
- if masks is not None:
- masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
- masks_of_boxes_completely_inside_window = tf.gather(
- masks_of_boxes_inside_window, keep_ids)
- masks_box_begin = [0, im_box_begin[0], im_box_begin[1]]
- masks_box_size = [-1, im_box_size[0], im_box_size[1]]
- new_masks = tf.slice(
- masks_of_boxes_completely_inside_window,
- masks_box_begin, masks_box_size)
- result.append(new_masks)
-
- if keypoints is not None:
- keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
- keypoints_of_boxes_completely_inside_window = tf.gather(
- keypoints_of_boxes_inside_window, keep_ids)
- new_keypoints = keypoint_ops.change_coordinate_frame(
- keypoints_of_boxes_completely_inside_window, im_box_rank1)
- new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
- [0.0, 0.0, 1.0, 1.0])
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_crop_image(image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3,
- random_coef=0.0,
- seed=None):
- """Randomly crops the image.
-
- Given the input image and its bounding boxes, this op randomly
- crops a subimage. Given a user-provided set of input constraints,
- the crop window is resampled until it satisfies these constraints.
- If within 100 trials it is unable to find a valid crop, the original
- image is returned. See the Args section for a description of the input
- constraints. Both input boxes and returned Boxes are in normalized
- form (e.g., lie in the unit square [0, 1]).
- This function will return the original image with probability random_coef.
-
- Note: boxes will be clipped to the crop. Keypoint coordinates that are
- outside the crop will be set to NaN, which is consistent with the original
- keypoint encoding for non-existing keypoints.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes with shape
- [num_instances, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances].
- representing the score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- labels: new labels.
-
- If label_scores, masks, or keypoints are not None, the function also
- returns:
- label_scores: new scores.
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def strict_random_crop_image_fn():
- return _strict_random_crop_image(
- image,
- boxes,
- labels,
- label_scores=label_scores,
- masks=masks,
- keypoints=keypoints,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh)
-
- # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
- if random_coef < sys.float_info.min:
- result = strict_random_crop_image_fn()
- else:
- do_a_crop_random = tf.random_uniform([], seed=seed)
- do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
-
- outputs = [image, boxes, labels]
-
- if label_scores is not None:
- outputs.append(label_scores)
- if masks is not None:
- outputs.append(masks)
- if keypoints is not None:
- outputs.append(keypoints)
-
- result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
- lambda: tuple(outputs))
- return result
-
-
-def random_pad_image(image,
- boxes,
- min_image_size=None,
- max_image_size=None,
- pad_color=None,
- seed=None):
- """Randomly pads the image.
-
- This function randomly pads the image with zeros. The final size of the
- padded image will be between min_image_size and max_image_size.
- if min_image_size is smaller than the input image size, min_image_size will
- be set to the input image size. The same for max_image_size. The input image
- will be located at a uniformly random location inside the padded image.
- The relative location of the boxes to the original image will remain the same.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- min_image_size: a tensor of size [min_height, min_width], type tf.int32.
- If passed as None, will be set to image size
- [height, width].
- max_image_size: a tensor of size [max_height, max_width], type tf.int32.
- If passed as None, will be set to twice the
- image [height * 2, width * 2].
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the input
- image.
-
- seed: random seed.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- """
- if pad_color is None:
- pad_color = tf.reduce_mean(image, axis=[0, 1])
-
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
-
- if max_image_size is None:
- max_image_size = tf.stack([image_height * 2, image_width * 2])
- max_image_size = tf.maximum(max_image_size,
- tf.stack([image_height, image_width]))
-
- if min_image_size is None:
- min_image_size = tf.stack([image_height, image_width])
- min_image_size = tf.maximum(min_image_size,
- tf.stack([image_height, image_width]))
-
- target_height = tf.cond(
- max_image_size[0] > min_image_size[0],
- lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
- lambda: max_image_size[0])
-
- target_width = tf.cond(
- max_image_size[1] > min_image_size[1],
- lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
- lambda: max_image_size[1])
-
- offset_height = tf.cond(
- target_height > image_height,
- lambda: _random_integer(0, target_height - image_height, seed),
- lambda: tf.constant(0, dtype=tf.int32))
-
- offset_width = tf.cond(
- target_width > image_width,
- lambda: _random_integer(0, target_width - image_width, seed),
- lambda: tf.constant(0, dtype=tf.int32))
-
- new_image = tf.image.pad_to_bounding_box(
- image,
- offset_height=offset_height,
- offset_width=offset_width,
- target_height=target_height,
- target_width=target_width)
-
- # Setting color of the padded pixels
- image_ones = tf.ones_like(image)
- image_ones_padded = tf.image.pad_to_bounding_box(
- image_ones,
- offset_height=offset_height,
- offset_width=offset_width,
- target_height=target_height,
- target_width=target_width)
- image_color_padded = (1.0 - image_ones_padded) * pad_color
- new_image += image_color_padded
-
- # setting boxes
- new_window = tf.to_float(
- tf.stack([
- -offset_height, -offset_width, target_height - offset_height,
- target_width - offset_width
- ]))
- new_window /= tf.to_float(
- tf.stack([image_height, image_width, image_height, image_width]))
- boxlist = box_list.BoxList(boxes)
- new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
- new_boxes = new_boxlist.get()
-
- return new_image, new_boxes
-
-
-def random_crop_pad_image(image,
- boxes,
- labels,
- label_scores=None,
- min_object_covered=1.0,
- aspect_ratio_range=(0.75, 1.33),
- area_range=(0.1, 1.0),
- overlap_thresh=0.3,
- random_coef=0.0,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- pad_color=None,
- seed=None):
- """Randomly crops and pads the image.
-
- Given an input image and its bounding boxes, this op first randomly crops
- the image and then randomly pads the image with background values. Parameters
- min_padded_size_ratio and max_padded_size_ratio, determine the range of the
- final output image size. Specifically, the final image size will have a size
- in the range of min_padded_size_ratio * tf.shape(image) and
- max_padded_size_ratio * tf.shape(image). Note that these ratios are with
- respect to the size of the original image, so we can't capture the same
- effect easily by independently applying RandomCropImage
- followed by RandomPadImage.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: rank 1 float32 containing the label scores.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the randomly
- cropped image.
- seed: random seed.
-
- Returns:
- padded_image: padded image.
- padded_boxes: boxes which is the same rank as input boxes. Boxes are in
- normalized form.
- cropped_labels: cropped labels.
- if label_scores is not None also returns:
- cropped_label_scores: cropped label scores.
- """
- image_size = tf.shape(image)
- image_height = image_size[0]
- image_width = image_size[1]
- result = random_crop_image(
- image=image,
- boxes=boxes,
- labels=labels,
- label_scores=label_scores,
- min_object_covered=min_object_covered,
- aspect_ratio_range=aspect_ratio_range,
- area_range=area_range,
- overlap_thresh=overlap_thresh,
- random_coef=random_coef,
- seed=seed)
-
- cropped_image, cropped_boxes, cropped_labels = result[:3]
-
- min_image_size = tf.to_int32(
- tf.to_float(tf.stack([image_height, image_width])) *
- min_padded_size_ratio)
- max_image_size = tf.to_int32(
- tf.to_float(tf.stack([image_height, image_width])) *
- max_padded_size_ratio)
-
- padded_image, padded_boxes = random_pad_image(
- cropped_image,
- cropped_boxes,
- min_image_size=min_image_size,
- max_image_size=max_image_size,
- pad_color=pad_color,
- seed=seed)
-
- cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
-
- if label_scores is not None:
- cropped_label_scores = result[3]
- cropped_padded_output += (cropped_label_scores,)
-
- return cropped_padded_output
-
-
-def random_crop_to_aspect_ratio(image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- aspect_ratio=1.0,
- overlap_thresh=0.3,
- seed=None):
- """Randomly crops an image to the specified aspect ratio.
-
- Randomly crops the a portion of the image such that the crop is of the
- specified aspect ratio, and the crop is as large as possible. If the specified
- aspect ratio is larger than the aspect ratio of the image, this op will
- randomly remove rows from the top and bottom of the image. If the specified
- aspect ratio is less than the aspect ratio of the image, this op will randomly
- remove cols from the left and right of the image. If the specified aspect
- ratio is the same as the aspect ratio of the image, this op will return the
- image.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- aspect_ratio: the aspect ratio of cropped image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, masks, or keypoints is not None, the function also returns:
- label_scores: new label scores.
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: If image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('RandomCropToAspectRatio', values=[image]):
- image_shape = tf.shape(image)
- orig_height = image_shape[0]
- orig_width = image_shape[1]
- orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
- new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
- def target_height_fn():
- return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio))
-
- target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio,
- lambda: orig_height, target_height_fn)
-
- def target_width_fn():
- return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio))
-
- target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio,
- lambda: orig_width, target_width_fn)
-
- # either offset_height = 0 and offset_width is randomly chosen from
- # [0, offset_width - target_width), or else offset_width = 0 and
- # offset_height is randomly chosen from [0, offset_height - target_height)
- offset_height = _random_integer(0, orig_height - target_height + 1, seed)
- offset_width = _random_integer(0, orig_width - target_width + 1, seed)
- new_image = tf.image.crop_to_bounding_box(
- image, offset_height, offset_width, target_height, target_width)
-
- im_box = tf.stack([
- tf.to_float(offset_height) / tf.to_float(orig_height),
- tf.to_float(offset_width) / tf.to_float(orig_width),
- tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
- tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
- ])
-
- boxlist = box_list.BoxList(boxes)
- boxlist.add_field('labels', labels)
-
- if label_scores is not None:
- boxlist.add_field('label_scores', label_scores)
-
- im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
-
- # remove boxes whose overlap with the image is less than overlap_thresh
- overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
- boxlist, im_boxlist, overlap_thresh)
-
- # change the coordinate of the remaining boxes
- new_labels = overlapping_boxlist.get_field('labels')
- new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
- im_box)
- new_boxlist = box_list_ops.clip_to_window(new_boxlist,
- tf.constant([0.0, 0.0, 1.0, 1.0],
- tf.float32))
- new_boxes = new_boxlist.get()
-
- result = [new_image, new_boxes, new_labels]
-
- if label_scores is not None:
- new_label_scores = overlapping_boxlist.get_field('label_scores')
- result.append(new_label_scores)
-
- if masks is not None:
- masks_inside_window = tf.gather(masks, keep_ids)
- masks_box_begin = tf.stack([0, offset_height, offset_width])
- masks_box_size = tf.stack([-1, target_height, target_width])
- new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
- result.append(new_masks)
-
- if keypoints is not None:
- keypoints_inside_window = tf.gather(keypoints, keep_ids)
- new_keypoints = keypoint_ops.change_coordinate_frame(
- keypoints_inside_window, im_box)
- new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
- [0.0, 0.0, 1.0, 1.0])
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_pad_to_aspect_ratio(image,
- boxes,
- masks=None,
- keypoints=None,
- aspect_ratio=1.0,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- seed=None):
- """Randomly zero pads an image to the specified aspect ratio.
-
- Pads the image so that the resulting image will have the specified aspect
- ratio without scaling less than the min_padded_size_ratio or more than the
- max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio
- is lower than what is possible to maintain the aspect ratio, then this method
- will use the least padding to achieve the specified aspect ratio.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- aspect_ratio: aspect ratio of the final image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, masks, or keypoints is not None, the function also returns:
- label_scores: new label scores.
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
-
- Raises:
- ValueError: If image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('RandomPadToAspectRatio', values=[image]):
- image_shape = tf.shape(image)
- image_height = tf.to_float(image_shape[0])
- image_width = tf.to_float(image_shape[1])
- image_aspect_ratio = image_width / image_height
- new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
- target_height = tf.cond(
- image_aspect_ratio <= new_aspect_ratio,
- lambda: image_height,
- lambda: image_width / new_aspect_ratio)
- target_width = tf.cond(
- image_aspect_ratio >= new_aspect_ratio,
- lambda: image_width,
- lambda: image_height * new_aspect_ratio)
-
- min_height = tf.maximum(
- min_padded_size_ratio[0] * image_height, target_height)
- min_width = tf.maximum(
- min_padded_size_ratio[1] * image_width, target_width)
- max_height = tf.maximum(
- max_padded_size_ratio[0] * image_height, target_height)
- max_width = tf.maximum(
- max_padded_size_ratio[1] * image_width, target_width)
-
- min_scale = tf.maximum(min_height / target_height, min_width / target_width)
- max_scale = tf.minimum(max_height / target_height, max_width / target_width)
- scale = tf.random_uniform([], min_scale, max_scale, seed=seed)
-
- target_height = scale * target_height
- target_width = scale * target_width
-
- new_image = tf.image.pad_to_bounding_box(
- image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width))
-
- im_box = tf.stack([
- 0.0,
- 0.0,
- target_height / image_height,
- target_width / image_width
- ])
- boxlist = box_list.BoxList(boxes)
- new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box)
- new_boxes = new_boxlist.get()
-
- result = [new_image, new_boxes]
-
- if masks is not None:
- new_masks = tf.expand_dims(masks, -1)
- new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0,
- tf.to_int32(target_height),
- tf.to_int32(target_width))
- new_masks = tf.squeeze(new_masks, [-1])
- result.append(new_masks)
-
- if keypoints is not None:
- new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box)
- result.append(new_keypoints)
-
- return tuple(result)
-
-
-def random_black_patches(image,
- max_black_patches=10,
- probability=0.5,
- size_to_image_ratio=0.1,
- random_seed=None):
- """Randomly adds some black patches to the image.
-
- This op adds up to max_black_patches square black patches of a fixed size
- to the image where size is specified via the size_to_image_ratio parameter.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- max_black_patches: number of times that the function tries to add a
- black box to the image.
- probability: at each try, what is the chance of adding a box.
- size_to_image_ratio: Determines the ratio of the size of the black patches
- to the size of the image.
- box_size = size_to_image_ratio *
- min(image_width, image_height)
- random_seed: random seed.
-
- Returns:
- image
- """
- def add_black_patch_to_image(image):
- """Function for adding one patch to the image.
-
- Args:
- image: image
-
- Returns:
- image with a randomly added black box
- """
- image_shape = tf.shape(image)
- image_height = image_shape[0]
- image_width = image_shape[1]
- box_size = tf.to_int32(
- tf.multiply(
- tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
- size_to_image_ratio))
- normalized_y_min = tf.random_uniform(
- [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
- normalized_x_min = tf.random_uniform(
- [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
- y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
- x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
- black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
- mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
- image_height, image_width)
- image = tf.multiply(image, mask)
- return image
-
- with tf.name_scope('RandomBlackPatchInImage', values=[image]):
- for _ in range(max_black_patches):
- random_prob = tf.random_uniform(
- [], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed)
- image = tf.cond(
- tf.greater(random_prob, probability), lambda: image,
- lambda: add_black_patch_to_image(image))
-
- return image
-
-
-def image_to_float(image):
- """Used in Faster R-CNN. Casts image pixel values to float.
-
- Args:
- image: input image which might be in tf.uint8 or sth else format
-
- Returns:
- image: image in tf.float32 format.
- """
- with tf.name_scope('ImageToFloat', values=[image]):
- image = tf.to_float(image)
- return image
-
-
-def random_resize_method(image, target_size):
- """Uses a random resize method to resize the image to target size.
-
- Args:
- image: a rank 3 tensor.
- target_size: a list of [target_height, target_width]
-
- Returns:
- resized image.
- """
-
- resized_image = _apply_with_random_selector(
- image,
- lambda x, method: tf.image.resize_images(x, target_size, method),
- num_cases=4)
-
- return resized_image
-
-
-def _compute_new_static_size(image, min_dimension, max_dimension):
- """Compute new static shape for resize_to_range method."""
- image_shape = image.get_shape().as_list()
- orig_height = image_shape[0]
- orig_width = image_shape[1]
- orig_min_dim = min(orig_height, orig_width)
- # Calculates the larger of the possible sizes
- large_scale_factor = min_dimension / float(orig_min_dim)
- # Scaling orig_(height|width) by large_scale_factor will make the smaller
- # dimension equal to min_dimension, save for floating point rounding errors.
- # For reasonably-sized images, taking the nearest integer will reliably
- # eliminate this error.
- large_height = int(round(orig_height * large_scale_factor))
- large_width = int(round(orig_width * large_scale_factor))
- large_size = [large_height, large_width]
- if max_dimension:
- # Calculates the smaller of the possible sizes, use that if the larger
- # is too big.
- orig_max_dim = max(orig_height, orig_width)
- small_scale_factor = max_dimension / float(orig_max_dim)
- # Scaling orig_(height|width) by small_scale_factor will make the larger
- # dimension equal to max_dimension, save for floating point rounding
- # errors. For reasonably-sized images, taking the nearest integer will
- # reliably eliminate this error.
- small_height = int(round(orig_height * small_scale_factor))
- small_width = int(round(orig_width * small_scale_factor))
- small_size = [small_height, small_width]
- new_size = large_size
- if max(large_size) > max_dimension:
- new_size = small_size
- else:
- new_size = large_size
- return tf.constant(new_size)
-
-
-def _compute_new_dynamic_size(image, min_dimension, max_dimension):
- """Compute new dynamic shape for resize_to_range method."""
- image_shape = tf.shape(image)
- orig_height = tf.to_float(image_shape[0])
- orig_width = tf.to_float(image_shape[1])
- orig_min_dim = tf.minimum(orig_height, orig_width)
- # Calculates the larger of the possible sizes
- min_dimension = tf.constant(min_dimension, dtype=tf.float32)
- large_scale_factor = min_dimension / orig_min_dim
- # Scaling orig_(height|width) by large_scale_factor will make the smaller
- # dimension equal to min_dimension, save for floating point rounding errors.
- # For reasonably-sized images, taking the nearest integer will reliably
- # eliminate this error.
- large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
- large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
- large_size = tf.stack([large_height, large_width])
- if max_dimension:
- # Calculates the smaller of the possible sizes, use that if the larger
- # is too big.
- orig_max_dim = tf.maximum(orig_height, orig_width)
- max_dimension = tf.constant(max_dimension, dtype=tf.float32)
- small_scale_factor = max_dimension / orig_max_dim
- # Scaling orig_(height|width) by small_scale_factor will make the larger
- # dimension equal to max_dimension, save for floating point rounding
- # errors. For reasonably-sized images, taking the nearest integer will
- # reliably eliminate this error.
- small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
- small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
- small_size = tf.stack([small_height, small_width])
- new_size = tf.cond(
- tf.to_float(tf.reduce_max(large_size)) > max_dimension,
- lambda: small_size, lambda: large_size)
- else:
- new_size = large_size
- return new_size
-
-
-def resize_to_range(image,
- masks=None,
- min_dimension=None,
- max_dimension=None,
- method=tf.image.ResizeMethod.BILINEAR,
- align_corners=False):
- """Resizes an image so its dimensions are within the provided value.
-
- The output size can be described by two cases:
- 1. If the image can be rescaled so its minimum dimension is equal to the
- provided value without the other dimension exceeding max_dimension,
- then do so.
- 2. Otherwise, resize so the largest dimension is equal to max_dimension.
-
- Args:
- image: A 3D tensor of shape [height, width, channels]
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks.
- min_dimension: (optional) (scalar) desired size of the smaller image
- dimension.
- max_dimension: (optional) (scalar) maximum allowed size
- of the larger image dimension.
- method: (optional) interpolation method used in resizing. Defaults to
- BILINEAR.
- align_corners: bool. If true, exactly align all 4 corners of the input
- and output. Defaults to False.
-
- Returns:
- A 3D tensor of shape [new_height, new_width, channels],
- where the image has been resized (with bilinear interpolation) so that
- min(new_height, new_width) == min_dimension or
- max(new_height, new_width) == max_dimension.
-
- If masks is not None, also outputs masks:
- A 3D tensor of shape [num_instances, new_height, new_width]
-
- Raises:
- ValueError: if the image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
- if image.get_shape().is_fully_defined():
- new_size = _compute_new_static_size(image, min_dimension, max_dimension)
- else:
- new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
- new_image = tf.image.resize_images(
- image, new_size, method=method, align_corners=align_corners)
-
- result = new_image
- if masks is not None:
- new_masks = tf.expand_dims(masks, 3)
- new_masks = tf.image.resize_nearest_neighbor(
- new_masks, new_size, align_corners=align_corners)
- new_masks = tf.squeeze(new_masks, 3)
- result = [new_image, new_masks]
-
- return result
-
-
-# TODO: Make sure the static shapes are preserved.
-def resize_to_min_dimension(image, masks=None, min_dimension=600):
- """Resizes image and masks given the min size maintaining the aspect ratio.
-
- If one of the image dimensions is smaller that min_dimension, it will scale
- the image such that its smallest dimension is equal to min_dimension.
- Otherwise, will keep the image size as is.
-
- Args:
- image: a tensor of size [height, width, channels].
- masks: (optional) a tensors of size [num_instances, height, width].
- min_dimension: minimum image dimension.
-
- Returns:
- a tuple containing the following:
- Resized image. A tensor of size [new_height, new_width, channels].
- (optional) Resized masks. A tensor of
- size [num_instances, new_height, new_width].
-
- Raises:
- ValueError: if the image is not a 3D tensor.
- """
- if len(image.get_shape()) != 3:
- raise ValueError('Image should be 3D tensor')
-
- with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- min_image_dimension = tf.minimum(image_height, image_width)
- min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
- target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
- min_image_dimension)
- target_height = tf.to_int32(tf.to_float(image_height) * target_ratio)
- target_width = tf.to_int32(tf.to_float(image_width) * target_ratio)
- image = tf.image.resize_bilinear(
- tf.expand_dims(image, axis=0),
- size=[target_height, target_width],
- align_corners=True)
- result = tf.squeeze(image, axis=0)
- if masks is not None:
- masks = tf.image.resize_nearest_neighbor(
- tf.expand_dims(masks, axis=3),
- size=[target_height, target_width],
- align_corners=True)
- result = (result, tf.squeeze(masks, axis=3))
- return result
-
-
-def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
- """Scales boxes from normalized to pixel coordinates.
-
- Args:
- image: A 3D float32 tensor of shape [height, width, channels].
- boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
- boxes in normalized coordinates. Each row is of the form
- [ymin, xmin, ymax, xmax].
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
- coordinates.
-
- Returns:
- image: unchanged input image.
- scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
- bounding boxes in pixel coordinates.
- scaled_keypoints: a 3D float32 tensor with shape
- [num_instances, num_keypoints, 2] containing the keypoints in pixel
- coordinates.
- """
- boxlist = box_list.BoxList(boxes)
- image_height = tf.shape(image)[0]
- image_width = tf.shape(image)[1]
- scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
- result = [image, scaled_boxes]
- if keypoints is not None:
- scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
- result.append(scaled_keypoints)
- return tuple(result)
-
-
-# pylint: disable=g-doc-return-or-yield
-def resize_image(image,
- masks=None,
- new_height=600,
- new_width=1024,
- method=tf.image.ResizeMethod.BILINEAR,
- align_corners=False):
- """See `tf.image.resize_images` for detailed doc."""
- with tf.name_scope(
- 'ResizeImage',
- values=[image, new_height, new_width, method, align_corners]):
- new_image = tf.image.resize_images(
- image, [new_height, new_width],
- method=method,
- align_corners=align_corners)
- result = new_image
- if masks is not None:
- num_instances = tf.shape(masks)[0]
- new_size = tf.constant([new_height, new_width], dtype=tf.int32)
- def resize_masks_branch():
- new_masks = tf.expand_dims(masks, 3)
- new_masks = tf.image.resize_nearest_neighbor(
- new_masks, new_size, align_corners=align_corners)
- new_masks = tf.squeeze(new_masks, axis=3)
- return new_masks
-
- def reshape_masks_branch():
- new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
- return new_masks
-
- masks = tf.cond(num_instances > 0, resize_masks_branch,
- reshape_masks_branch)
- result = [new_image, masks]
-
- return result
-
-
-def subtract_channel_mean(image, means=None):
- """Normalizes an image by subtracting a mean from each channel.
-
- Args:
- image: A 3D tensor of shape [height, width, channels]
- means: float list containing a mean for each channel
- Returns:
- normalized_images: a tensor of shape [height, width, channels]
- Raises:
- ValueError: if images is not a 4D tensor or if the number of means is not
- equal to the number of channels.
- """
- with tf.name_scope('SubtractChannelMean', values=[image, means]):
- if len(image.get_shape()) != 3:
- raise ValueError('Input must be of size [height, width, channels]')
- if len(means) != image.get_shape()[-1]:
- raise ValueError('len(means) must match the number of channels')
- return image - [[means]]
-
-
-def one_hot_encoding(labels, num_classes=None):
- """One-hot encodes the multiclass labels.
-
- Example usage:
- labels = tf.constant([1, 4], dtype=tf.int32)
- one_hot = OneHotEncoding(labels, num_classes=5)
- one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
-
- Args:
- labels: A tensor of shape [None] corresponding to the labels.
- num_classes: Number of classes in the dataset.
- Returns:
- onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
- encoding of the labels.
- Raises:
- ValueError: if num_classes is not specified.
- """
- with tf.name_scope('OneHotEncoding', values=[labels]):
- if num_classes is None:
- raise ValueError('num_classes must be specified')
-
- labels = tf.one_hot(labels, num_classes, 1, 0)
- return tf.reduce_max(labels, 0)
-
-
-def rgb_to_gray(image):
- """Converts a 3 channel RGB image to a 1 channel grayscale image.
-
- Args:
- image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
- with pixel values varying between [0, 1].
-
- Returns:
- image: A single channel grayscale image -> [image, height, 1].
- """
- return tf.image.rgb_to_grayscale(image)
-
-
-def ssd_random_crop(image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio_range=((0.5, 2.0),) * 7,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- seed=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: rank 1 float32 tensor containing the scores.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If label_scores, masks, or keypoints is not None, the function also returns:
- label_scores: new label scores.
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
-
- def random_crop_selector(selected_result, index):
- """Applies random_crop_image to selected result.
-
- Args:
- selected_result: A tuple containing image, boxes, labels, keypoints (if
- not None), and masks (if not None).
- index: The index that was randomly selected.
-
- Returns: A tuple containing image, boxes, labels, keypoints (if not None),
- and masks (if not None).
- """
- i = 3
- image, boxes, labels = selected_result[:i]
- selected_label_scores = None
- selected_masks = None
- selected_keypoints = None
- if label_scores is not None:
- selected_label_scores = selected_result[i]
- i += 1
- if masks is not None:
- selected_masks = selected_result[i]
- i += 1
- if keypoints is not None:
- selected_keypoints = selected_result[i]
-
- return random_crop_image(
- image=image,
- boxes=boxes,
- labels=labels,
- label_scores=selected_label_scores,
- masks=selected_masks,
- keypoints=selected_keypoints,
- min_object_covered=min_object_covered[index],
- aspect_ratio_range=aspect_ratio_range[index],
- area_range=area_range[index],
- overlap_thresh=overlap_thresh[index],
- random_coef=random_coef[index],
- seed=seed)
-
- result = _apply_with_random_selector_tuples(
- tuple(
- t for t in (image, boxes, labels, label_scores, masks, keypoints)
- if t is not None),
- random_crop_selector,
- num_cases=len(min_object_covered))
- return result
-
-
-def ssd_random_crop_pad(image,
- boxes,
- labels,
- label_scores=None,
- min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio_range=((0.5, 2.0),) * 6,
- area_range=((0.1, 1.0),) * 6,
- overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 6,
- min_padded_size_ratio=((1.0, 1.0),) * 6,
- max_padded_size_ratio=((2.0, 2.0),) * 6,
- pad_color=(None,) * 6,
- seed=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- Args:
- image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: float32 tensor of shape [num_instances] representing the
- score for each box.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
- if set as None, it will be set to average color of the randomly
- cropped image.
- seed: random seed.
-
- Returns:
- image: Image shape will be [new_height, new_width, channels].
- boxes: boxes which is the same rank as input boxes. Boxes are in normalized
- form.
- new_labels: new labels.
- new_label_scores: new label scores.
- """
-
- def random_crop_pad_selector(image_boxes_labels, index):
- i = 3
- image, boxes, labels = image_boxes_labels[:i]
- selected_label_scores = None
- if label_scores is not None:
- selected_label_scores = image_boxes_labels[i]
-
- return random_crop_pad_image(
- image,
- boxes,
- labels,
- selected_label_scores,
- min_object_covered=min_object_covered[index],
- aspect_ratio_range=aspect_ratio_range[index],
- area_range=area_range[index],
- overlap_thresh=overlap_thresh[index],
- random_coef=random_coef[index],
- min_padded_size_ratio=min_padded_size_ratio[index],
- max_padded_size_ratio=max_padded_size_ratio[index],
- pad_color=pad_color[index],
- seed=seed)
-
- return _apply_with_random_selector_tuples(
- tuple(t for t in (image, boxes, labels, label_scores) if t is not None),
- random_crop_pad_selector,
- num_cases=len(min_object_covered))
-
-
-def ssd_random_crop_fixed_aspect_ratio(
- image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio=1.0,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- seed=None):
- """Random crop preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- The only difference is that the aspect ratio of the crops are fixed.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio: aspect ratio of the cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If masks or keypoints is not None, the function also returns:
-
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
-
- crop_result = ssd_random_crop(
- image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
- aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
- i = 3
- new_image, new_boxes, new_labels = crop_result[:i]
- new_label_scores = None
- new_masks = None
- new_keypoints = None
- if label_scores is not None:
- new_label_scores = crop_result[i]
- i += 1
- if masks is not None:
- new_masks = crop_result[i]
- i += 1
- if keypoints is not None:
- new_keypoints = crop_result[i]
- result = random_crop_to_aspect_ratio(
- new_image,
- new_boxes,
- new_labels,
- new_label_scores,
- new_masks,
- new_keypoints,
- aspect_ratio=aspect_ratio,
- seed=seed)
-
- return result
-
-
-def ssd_random_crop_pad_fixed_aspect_ratio(
- image,
- boxes,
- labels,
- label_scores=None,
- masks=None,
- keypoints=None,
- min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- aspect_ratio=1.0,
- aspect_ratio_range=((0.5, 2.0),) * 7,
- area_range=((0.1, 1.0),) * 7,
- overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
- random_coef=(0.15,) * 7,
- min_padded_size_ratio=(1.0, 1.0),
- max_padded_size_ratio=(2.0, 2.0),
- seed=None):
- """Random crop and pad preprocessing with default parameters as in SSD paper.
-
- Liu et al., SSD: Single shot multibox detector.
- For further information on random crop preprocessing refer to RandomCrop
- function above.
-
- The only difference is that after the initial crop, images are zero-padded
- to a fixed aspect ratio instead of being resized to that aspect ratio.
-
- Args:
- image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
- with pixel values varying between [0, 1].
- boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning their coordinates vary
- between [0, 1].
- Each row is in the form of [ymin, xmin, ymax, xmax].
- labels: rank 1 int32 tensor containing the object classes.
- label_scores: (optional) float32 tensor of shape [num_instances]
- representing the score for each box.
- masks: (optional) rank 3 float32 tensor with shape
- [num_instances, height, width] containing instance masks. The masks
- are of the same height, width as the input `image`.
- keypoints: (optional) rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]. The keypoints are in y-x
- normalized coordinates.
- min_object_covered: the cropped image must cover at least this fraction of
- at least one of the input bounding boxes.
- aspect_ratio: the final aspect ratio to pad to.
- aspect_ratio_range: allowed range for aspect ratio of cropped image.
- area_range: allowed range for area ratio between cropped image and the
- original image.
- overlap_thresh: minimum overlap thresh with new cropped
- image to keep the box.
- random_coef: a random coefficient that defines the chance of getting the
- original image. If random_coef is 0, we will always get the
- cropped image, and if it is 1.0, we will always get the
- original image.
- min_padded_size_ratio: min ratio of padded image height and width to the
- input image's height and width.
- max_padded_size_ratio: max ratio of padded image height and width to the
- input image's height and width.
- seed: random seed.
-
- Returns:
- image: image which is the same rank as input image.
- boxes: boxes which is the same rank as input boxes.
- Boxes are in normalized form.
- labels: new labels.
-
- If masks or keypoints is not None, the function also returns:
-
- masks: rank 3 float32 tensor with shape [num_instances, height, width]
- containing instance masks.
- keypoints: rank 3 float32 tensor with shape
- [num_instances, num_keypoints, 2]
- """
- crop_result = ssd_random_crop(
- image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
- aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
- i = 3
- new_image, new_boxes, new_labels = crop_result[:i]
- new_label_scores = None
- new_masks = None
- new_keypoints = None
- if label_scores is not None:
- new_label_scores = crop_result[i]
- i += 1
- if masks is not None:
- new_masks = crop_result[i]
- i += 1
- if keypoints is not None:
- new_keypoints = crop_result[i]
- result = random_pad_to_aspect_ratio(
- new_image,
- new_boxes,
- new_masks,
- new_keypoints,
- aspect_ratio=aspect_ratio,
- min_padded_size_ratio=min_padded_size_ratio,
- max_padded_size_ratio=max_padded_size_ratio,
- seed=seed)
-
- result = list(result)
- if new_label_scores is not None:
- result.insert(2, new_label_scores)
- result.insert(2, new_labels)
- result = tuple(result)
-
- return result
-
-
-def get_default_func_arg_map(include_label_scores=False,
- include_instance_masks=False,
- include_keypoints=False):
- """Returns the default mapping from a preprocessor function to its args.
-
- Args:
- include_label_scores: If True, preprocessing functions will modify the
- label scores, too.
- include_instance_masks: If True, preprocessing functions will modify the
- instance masks, too.
- include_keypoints: If True, preprocessing functions will modify the
- keypoints, too.
-
- Returns:
- A map from preprocessing functions to the arguments they receive.
- """
- groundtruth_label_scores = None
- if include_label_scores:
- groundtruth_label_scores = (fields.InputDataFields.groundtruth_label_scores)
-
- groundtruth_instance_masks = None
- if include_instance_masks:
- groundtruth_instance_masks = (
- fields.InputDataFields.groundtruth_instance_masks)
-
- groundtruth_keypoints = None
- if include_keypoints:
- groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
-
- prep_func_arg_map = {
- normalize_image: (fields.InputDataFields.image,),
- random_horizontal_flip: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_vertical_flip: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_rotation90: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_pixel_value_scale: (fields.InputDataFields.image,),
- random_image_scale: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,),
- random_rgb_to_gray: (fields.InputDataFields.image,),
- random_adjust_brightness: (fields.InputDataFields.image,),
- random_adjust_contrast: (fields.InputDataFields.image,),
- random_adjust_hue: (fields.InputDataFields.image,),
- random_adjust_saturation: (fields.InputDataFields.image,),
- random_distort_color: (fields.InputDataFields.image,),
- random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
- random_crop_image: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_pad_image: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes),
- random_crop_pad_image: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores),
- random_crop_to_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_pad_to_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- random_black_patches: (fields.InputDataFields.image,),
- retain_boxes_above_threshold: (
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- image_to_float: (fields.InputDataFields.image,),
- random_resize_method: (fields.InputDataFields.image,),
- resize_to_range: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,),
- resize_to_min_dimension: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,),
- scale_boxes_to_pixel_coordinates: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- groundtruth_keypoints,),
- resize_image: (
- fields.InputDataFields.image,
- groundtruth_instance_masks,),
- subtract_channel_mean: (fields.InputDataFields.image,),
- one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
- rgb_to_gray: (fields.InputDataFields.image,),
- ssd_random_crop: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- ssd_random_crop_pad: (fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores),
- ssd_random_crop_fixed_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- ssd_random_crop_pad_fixed_aspect_ratio: (
- fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- groundtruth_label_scores,
- groundtruth_instance_masks,
- groundtruth_keypoints,),
- }
-
- return prep_func_arg_map
-
-
-def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
- """Preprocess images and bounding boxes.
-
- Various types of preprocessing (to be implemented) based on the
- preprocess_options dictionary e.g. "crop image" (affects image and possibly
- boxes), "white balance image" (affects only image), etc. If self._options
- is None, no preprocessing is done.
-
- Args:
- tensor_dict: dictionary that contains images, boxes, and can contain other
- things as well.
- images-> rank 4 float32 tensor contains
- 1 image -> [1, height, width, 3].
- with pixel values varying between [0, 1]
- boxes-> rank 2 float32 tensor containing
- the bounding boxes -> [N, 4].
- Boxes are in normalized form meaning
- their coordinates vary between [0, 1].
- Each row is in the form
- of [ymin, xmin, ymax, xmax].
- preprocess_options: It is a list of tuples, where each tuple contains a
- function and a dictionary that contains arguments and
- their values.
- func_arg_map: mapping from preprocessing functions to arguments that they
- expect to receive and return.
-
- Returns:
- tensor_dict: which contains the preprocessed images, bounding boxes, etc.
-
- Raises:
- ValueError: (a) If the functions passed to Preprocess
- are not in func_arg_map.
- (b) If the arguments that a function needs
- do not exist in tensor_dict.
- (c) If image in tensor_dict is not rank 4
- """
- if func_arg_map is None:
- func_arg_map = get_default_func_arg_map()
-
- # changes the images to image (rank 4 to rank 3) since the functions
- # receive rank 3 tensor for image
- if fields.InputDataFields.image in tensor_dict:
- images = tensor_dict[fields.InputDataFields.image]
- if len(images.get_shape()) != 4:
- raise ValueError('images in tensor_dict should be rank 4')
- image = tf.squeeze(images, squeeze_dims=[0])
- tensor_dict[fields.InputDataFields.image] = image
-
- # Preprocess inputs based on preprocess_options
- for option in preprocess_options:
- func, params = option
- if func not in func_arg_map:
- raise ValueError('The function %s does not exist in func_arg_map' %
- (func.__name__))
- arg_names = func_arg_map[func]
- for a in arg_names:
- if a is not None and a not in tensor_dict:
- raise ValueError('The function %s requires argument %s' %
- (func.__name__, a))
-
- def get_arg(key):
- return tensor_dict[key] if key is not None else None
-
- args = [get_arg(a) for a in arg_names]
- results = func(*args, **params)
- if not isinstance(results, (list, tuple)):
- results = (results,)
- # Removes None args since the return values will not contain those.
- arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
- for res, arg_name in zip(results, arg_names):
- tensor_dict[arg_name] = res
-
- # changes the image to images (rank 3 to rank 4) to be compatible to what
- # we received in the first place
- if fields.InputDataFields.image in tensor_dict:
- image = tensor_dict[fields.InputDataFields.image]
- images = tf.expand_dims(image, 0)
- tensor_dict[fields.InputDataFields.image] = images
-
- return tensor_dict
diff --git a/object_detection/core/preprocessor_test.py b/object_detection/core/preprocessor_test.py
deleted file mode 100644
index a163bea0..00000000
--- a/object_detection/core/preprocessor_test.py
+++ /dev/null
@@ -1,2288 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.preprocessor."""
-
-import numpy as np
-import six
-
-import tensorflow as tf
-
-from object_detection.core import preprocessor
-from object_detection.core import standard_fields as fields
-
-if six.PY2:
- import mock # pylint: disable=g-import-not-at-top
-else:
- from unittest import mock # pylint: disable=g-import-not-at-top
-
-
-class PreprocessorTest(tf.test.TestCase):
-
- def createColorfulTestImage(self):
- ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
- ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
- ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
- imr = tf.concat([ch255, ch0, ch0], 3)
- img = tf.concat([ch255, ch255, ch0], 3)
- imb = tf.concat([ch255, ch0, ch255], 3)
- imw = tf.concat([ch128, ch128, ch128], 3)
- imu = tf.concat([imr, img], 2)
- imd = tf.concat([imb, imw], 2)
- im = tf.concat([imu, imd], 1)
- return im
-
- def createTestImages(self):
- images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
- [0, 128, 128, 128], [192, 192, 128, 128]]],
- dtype=tf.uint8)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
- [0, 128, 192, 192], [192, 192, 128, 192]]],
- dtype=tf.uint8)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
- [0, 128, 128, 0], [192, 192, 192, 128]]],
- dtype=tf.uint8)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def createEmptyTestBoxes(self):
- boxes = tf.constant([[]], dtype=tf.float32)
- return boxes
-
- def createTestBoxes(self):
- boxes = tf.constant(
- [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
- return boxes
-
- def createTestLabelScores(self):
- return tf.constant([1.0, 0.5], dtype=tf.float32)
-
- def createTestLabelScoresWithMissingScore(self):
- return tf.constant([0.5, np.nan], dtype=tf.float32)
-
- def createTestMasks(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]],
- [[255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def createTestKeypoints(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createTestKeypointsInsideCrop(self):
- keypoints = np.array([
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createTestKeypointsOutsideCrop(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def createKeypointFlipPermutation(self):
- return np.array([0, 2, 1], dtype=np.int32)
-
- def createTestLabels(self):
- labels = tf.constant([1, 2], dtype=tf.int32)
- return labels
-
- def createTestBoxesOutOfImage(self):
- boxes = tf.constant(
- [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
- return boxes
-
- def expectedImagesAfterNormalization(self):
- images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
- [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
- [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
- [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedMaxImageAfterColorScale(self):
- images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
- [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
- [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
- [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedMinImageAfterColorScale(self):
- images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
- [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
- [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
- [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterLeftRightFlip(self):
- images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
- [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
- [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
- [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterUpDownFlip(self):
- images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0],
- [-1, -1, 0, 0], [0, 0, 0, 0]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5],
- [-1, -1, 0, 0], [-1, -1, 0, 0]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1],
- [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedImagesAfterRot90(self):
- images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0],
- [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_r = tf.expand_dims(images_r, 3)
- images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0],
- [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_g = tf.expand_dims(images_g, 3)
- images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5],
- [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
- dtype=tf.float32)
- images_b = tf.expand_dims(images_b, 3)
- images = tf.concat([images_r, images_g, images_b], 3)
- return images
-
- def expectedBoxesAfterLeftRightFlip(self):
- boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
- dtype=tf.float32)
- return boxes
-
- def expectedBoxesAfterUpDownFlip(self):
- boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]],
- dtype=tf.float32)
- return boxes
-
- def expectedBoxesAfterRot90(self):
- boxes = tf.constant(
- [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32)
- return boxes
-
- def expectedMasksAfterLeftRightFlip(self):
- mask = np.array([
- [[0.0, 0.0, 255.0],
- [0.0, 0.0, 255.0],
- [0.0, 0.0, 255.0]],
- [[0.0, 255.0, 255.0],
- [0.0, 255.0, 255.0],
- [0.0, 255.0, 255.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedMasksAfterUpDownFlip(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]],
- [[255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0],
- [255.0, 255.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedMasksAfterRot90(self):
- mask = np.array([
- [[0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0],
- [255.0, 255.0, 255.0]],
- [[0.0, 0.0, 0.0],
- [255.0, 255.0, 255.0],
- [255.0, 255.0, 255.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedLabelScoresAfterThresholding(self):
- return tf.constant([1.0], dtype=tf.float32)
-
- def expectedBoxesAfterThresholding(self):
- return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
-
- def expectedLabelsAfterThresholding(self):
- return tf.constant([1], dtype=tf.float32)
-
- def expectedMasksAfterThresholding(self):
- mask = np.array([
- [[255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0],
- [255.0, 0.0, 0.0]]])
- return tf.constant(mask, dtype=tf.float32)
-
- def expectedKeypointsAfterThresholding(self):
- keypoints = np.array([
- [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
- ])
- return tf.constant(keypoints, dtype=tf.float32)
-
- def expectedLabelScoresAfterThresholdingWithMissingScore(self):
- return tf.constant([np.nan], dtype=tf.float32)
-
- def expectedBoxesAfterThresholdingWithMissingScore(self):
- return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
-
- def expectedLabelsAfterThresholdingWithMissingScore(self):
- return tf.constant([2], dtype=tf.float32)
-
- def testNormalizeImage(self):
- preprocess_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 256,
- 'target_minval': -1,
- 'target_maxval': 1
- })]
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- images_expected = self.expectedImagesAfterNormalization()
-
- with self.test_session() as sess:
- (images_, images_expected_) = sess.run(
- [images, images_expected])
- images_shape_ = images_.shape
- images_expected_shape_ = images_expected_.shape
- expected_shape = [1, 4, 4, 3]
- self.assertAllEqual(images_expected_shape_, images_shape_)
- self.assertAllEqual(images_shape_, expected_shape)
- self.assertAllClose(images_, images_expected_)
-
- def testRetainBoxesAboveThreshold(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- (retained_boxes, retained_labels,
- retained_label_scores) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, threshold=0.6)
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_, retained_label_scores_,
- expected_retained_boxes_, expected_retained_labels_,
- expected_retained_label_scores_) = sess.run([
- retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholding(),
- self.expectedLabelsAfterThresholding(),
- self.expectedLabelScoresAfterThresholding()])
- self.assertAllClose(
- retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(
- retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testRetainBoxesAboveThresholdWithMasks(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- masks = self.createTestMasks()
- _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, masks, threshold=0.6)
- with self.test_session() as sess:
- retained_masks_, expected_retained_masks_ = sess.run([
- retained_masks, self.expectedMasksAfterThresholding()])
-
- self.assertAllClose(
- retained_masks_, expected_retained_masks_)
-
- def testRetainBoxesAboveThresholdWithKeypoints(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- keypoints = self.createTestKeypoints()
- (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
- with self.test_session() as sess:
- (retained_keypoints_,
- expected_retained_keypoints_) = sess.run([
- retained_keypoints,
- self.expectedKeypointsAfterThresholding()])
-
- self.assertAllClose(
- retained_keypoints_, expected_retained_keypoints_)
-
- def testRetainBoxesAboveThresholdWithMissingScore(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScoresWithMissingScore()
- (retained_boxes, retained_labels,
- retained_label_scores) = preprocessor.retain_boxes_above_threshold(
- boxes, labels, label_scores, threshold=0.6)
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_, retained_label_scores_,
- expected_retained_boxes_, expected_retained_labels_,
- expected_retained_label_scores_) = sess.run([
- retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholdingWithMissingScore(),
- self.expectedLabelsAfterThresholdingWithMissingScore(),
- self.expectedLabelScoresAfterThresholdingWithMissingScore()])
- self.assertAllClose(
- retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(
- retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testFlipBoxesLeftRight(self):
- boxes = self.createTestBoxes()
- flipped_boxes = preprocessor._flip_boxes_left_right(boxes)
- expected_boxes = self.expectedBoxesAfterLeftRightFlip()
- with self.test_session() as sess:
- flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
- self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-
- def testFlipBoxesUpDown(self):
- boxes = self.createTestBoxes()
- flipped_boxes = preprocessor._flip_boxes_up_down(boxes)
- expected_boxes = self.expectedBoxesAfterUpDownFlip()
- with self.test_session() as sess:
- flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
- self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-
- def testRot90Boxes(self):
- boxes = self.createTestBoxes()
- rotated_boxes = preprocessor._rot90_boxes(boxes)
- expected_boxes = self.expectedBoxesAfterRot90()
- with self.test_session() as sess:
- rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes])
- self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten())
-
- def testFlipMasksLeftRight(self):
- test_mask = self.createTestMasks()
- flipped_mask = preprocessor._flip_masks_left_right(test_mask)
- expected_mask = self.expectedMasksAfterLeftRightFlip()
- with self.test_session() as sess:
- flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
- self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
-
- def testFlipMasksUpDown(self):
- test_mask = self.createTestMasks()
- flipped_mask = preprocessor._flip_masks_up_down(test_mask)
- expected_mask = self.expectedMasksAfterUpDownFlip()
- with self.test_session() as sess:
- flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
- self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
-
- def testRot90Masks(self):
- test_mask = self.createTestMasks()
- rotated_mask = preprocessor._rot90_masks(test_mask)
- expected_mask = self.expectedMasksAfterRot90()
- with self.test_session() as sess:
- rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
- self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
-
- def testRandomHorizontalFlip(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterLeftRightFlip()
- boxes_expected1 = self.expectedBoxesAfterLeftRightFlip()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomHorizontalFlipWithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterLeftRightFlip()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_horizontal_flip, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocess_options = [
- (preprocessor.random_horizontal_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomVerticalFlip(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterUpDownFlip()
- boxes_expected1 = self.expectedBoxesAfterUpDownFlip()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomVerticalFlipWithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterUpDownFlip()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_vertical_flip, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- keypoint_flip_permutation = self.createKeypointFlipPermutation()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocess_options = [
- (preprocessor.random_vertical_flip,
- {'keypoint_flip_permutation': keypoint_flip_permutation})]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomRotation90(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterRot90()
- boxes_expected1 = self.expectedBoxesAfterRot90()
- images_expected2 = images
- boxes_expected2 = boxes
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
- boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
- boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
- boxes_diff_expected = tf.zeros_like(boxes_diff)
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_diff_,
- boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
- boxes_diff, boxes_diff_expected])
- self.assertAllClose(boxes_diff_, boxes_diff_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRandomRotation90WithEmptyBoxes(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- images = self.expectedImagesAfterNormalization()
- boxes = self.createEmptyTestBoxes()
- tensor_dict = {fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes}
- images_expected1 = self.expectedImagesAfterRot90()
- boxes_expected = self.createEmptyTestBoxes()
- images_expected2 = images
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images = tensor_dict[fields.InputDataFields.image]
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-
- images_diff1 = tf.squared_difference(images, images_expected1)
- images_diff2 = tf.squared_difference(images, images_expected2)
- images_diff = tf.multiply(images_diff1, images_diff2)
- images_diff_expected = tf.zeros_like(images_diff)
-
- with self.test_session() as sess:
- (images_diff_, images_diff_expected_, boxes_,
- boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
- boxes_expected])
- self.assertAllClose(boxes_, boxes_expected_)
- self.assertAllClose(images_diff_, images_diff_expected_)
-
- def testRunRandomRotation90WithMaskAndKeypoints(self):
- preprocess_options = [(preprocessor.random_rotation90, {})]
- image_height = 3
- image_width = 3
- images = tf.random_uniform([1, image_height, image_width, 3])
- boxes = self.createTestBoxes()
- masks = self.createTestMasks()
- keypoints = self.createTestKeypoints()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True, include_keypoints=True)
- tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
- boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
- keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
- self.assertTrue(boxes is not None)
- self.assertTrue(masks is not None)
- self.assertTrue(keypoints is not None)
-
- def testRandomPixelValueScale(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_min = tf.to_float(images) * 0.9 / 255.0
- images_max = tf.to_float(images) * 1.1 / 255.0
- images = tensor_dict[fields.InputDataFields.image]
- values_greater = tf.greater_equal(images, images_min)
- values_less = tf.less_equal(images, images_max)
- values_true = tf.fill([1, 4, 4, 3], True)
- with self.test_session() as sess:
- (values_greater_, values_less_, values_true_) = sess.run(
- [values_greater, values_less, values_true])
- self.assertAllClose(values_greater_, values_true_)
- self.assertAllClose(values_less_, values_true_)
-
- def testRandomImageScale(self):
- preprocess_options = [(preprocessor.random_image_scale, {})]
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images_scaled = tensor_dict[fields.InputDataFields.image]
- images_original_shape = tf.shape(images_original)
- images_scaled_shape = tf.shape(images_scaled)
- with self.test_session() as sess:
- (images_original_shape_, images_scaled_shape_) = sess.run(
- [images_original_shape, images_scaled_shape])
- self.assertTrue(
- images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
- self.assertTrue(
- images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
- self.assertTrue(
- images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
- self.assertTrue(
- images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
-
- def testRandomRGBtoGray(self):
- preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
- images_gray = tensor_dict[fields.InputDataFields.image]
- images_gray_r, images_gray_g, images_gray_b = tf.split(
- value=images_gray, num_or_size_splits=3, axis=3)
- images_r, images_g, images_b = tf.split(
- value=images_original, num_or_size_splits=3, axis=3)
- images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
- tf.to_float(images_gray_r))
- images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
- tf.to_float(images_gray_g))
- images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
- images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
- tf.to_float(images_gray_g))
- images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
- tf.to_float(images_gray_b))
- images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
- images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
- tf.to_float(images_gray_b))
- images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
- tf.to_float(images_gray_r))
- images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
- image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
- with self.test_session() as sess:
- (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
- [images_r_diff, images_g_diff, images_b_diff, image_zero1])
- self.assertAllClose(images_r_diff_, image_zero1_)
- self.assertAllClose(images_g_diff_, image_zero1_)
- self.assertAllClose(images_b_diff_, image_zero1_)
-
- def testRandomAdjustBrightness(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_bright = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_bright_shape = tf.shape(images_bright)
- with self.test_session() as sess:
- (image_original_shape_, image_bright_shape_) = sess.run(
- [image_original_shape, image_bright_shape])
- self.assertAllEqual(image_original_shape_, image_bright_shape_)
-
- def testRandomAdjustContrast(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_contrast = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_contrast_shape = tf.shape(images_contrast)
- with self.test_session() as sess:
- (image_original_shape_, image_contrast_shape_) = sess.run(
- [image_original_shape, image_contrast_shape])
- self.assertAllEqual(image_original_shape_, image_contrast_shape_)
-
- def testRandomAdjustHue(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_adjust_hue, {}))
- images_original = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_hue = tensor_dict[fields.InputDataFields.image]
- image_original_shape = tf.shape(images_original)
- image_hue_shape = tf.shape(images_hue)
- with self.test_session() as sess:
- (image_original_shape_, image_hue_shape_) = sess.run(
- [image_original_shape, image_hue_shape])
- self.assertAllEqual(image_original_shape_, image_hue_shape_)
-
- def testRandomDistortColor(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_distort_color, {}))
- images_original = self.createTestImages()
- images_original_shape = tf.shape(images_original)
- tensor_dict = {fields.InputDataFields.image: images_original}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images_distorted_color = tensor_dict[fields.InputDataFields.image]
- images_distorted_color_shape = tf.shape(images_distorted_color)
- with self.test_session() as sess:
- (images_original_shape_, images_distorted_color_shape_) = sess.run(
- [images_original_shape, images_distorted_color_shape])
- self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
-
- def testRandomJitterBoxes(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
- boxes = self.createTestBoxes()
- boxes_shape = tf.shape(boxes)
- tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- distorted_boxes_shape = tf.shape(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_shape_, distorted_boxes_shape_) = sess.run(
- [boxes_shape, distorted_boxes_shape])
- self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
-
- def testRandomCropImage(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_crop_image, {}))
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- self.assertEqual(3, distorted_images.get_shape()[3])
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageGrayscale(self):
- preprocessing_options = [(preprocessor.rgb_to_gray, {}),
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1,
- }),
- (preprocessor.random_crop_image, {})]
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- self.assertEqual(1, distorted_images.get_shape()[3])
-
- with self.test_session() as sess:
- session_results = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = session_results
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageWithBoxOutOfImage(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_crop_image, {}))
- images = self.createTestImages()
- boxes = self.createTestBoxesOutOfImage()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testRandomCropImageWithRandomCoefOne(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_image, {
- 'random_coef': 1.0
- })]
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_label_scores = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_label_scores]
- boxes_shape = tf.shape(boxes)
- distorted_boxes_shape = tf.shape(distorted_boxes)
- images_shape = tf.shape(images)
- distorted_images_shape = tf.shape(distorted_images)
-
- with self.test_session() as sess:
- (boxes_shape_, distorted_boxes_shape_, images_shape_,
- distorted_images_shape_, images_, distorted_images_,
- boxes_, distorted_boxes_, labels_, distorted_labels_,
- label_scores_, distorted_label_scores_) = sess.run(
- [boxes_shape, distorted_boxes_shape, images_shape,
- distorted_images_shape, images, distorted_images,
- boxes, distorted_boxes, labels, distorted_labels,
- label_scores, distorted_label_scores])
- self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
- self.assertAllEqual(images_shape_, distorted_images_shape_)
- self.assertAllClose(images_, distorted_images_)
- self.assertAllClose(boxes_, distorted_boxes_)
- self.assertAllEqual(labels_, distorted_labels_)
- self.assertAllEqual(label_scores_, distorted_label_scores_)
-
- def testRandomCropWithMockSampleDistortedBoundingBox(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createColorfulTestImage()
- boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
- [0.2, 0.4, 0.75, 0.75],
- [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
- labels = tf.constant([1, 7, 11], dtype=tf.int32)
-
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (tf.constant(
- [6, 143, 0], dtype=tf.int32), tf.constant(
- [190, 237, -1], dtype=tf.int32), tf.constant(
- [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
-
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
- [0.28421, 0.0, 0.38947365, 0.57805908]],
- dtype=tf.float32)
- expected_labels = tf.constant([7, 11], dtype=tf.int32)
-
- with self.test_session() as sess:
- (distorted_boxes_, distorted_labels_,
- expected_boxes_, expected_labels_) = sess.run(
- [distorted_boxes, distorted_labels,
- expected_boxes, expected_labels])
- self.assertAllClose(distorted_boxes_, expected_boxes_)
- self.assertAllEqual(distorted_labels_, expected_labels_)
-
- def testStrictRandomCropImageWithLabelScores(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_label_scores = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, label_scores))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_label_scores = (
- sess.run(
- [new_image, new_boxes, new_labels, new_label_scores])
- )
-
- expected_boxes = np.array(
- [[0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllEqual(new_label_scores, [1.0, 0.5])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
-
- def testStrictRandomCropImageWithMasks(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_masks = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, masks=masks))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_masks = sess.run(
- [new_image, new_boxes, new_labels, new_masks])
- expected_boxes = np.array(
- [[0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllEqual(new_masks.shape, [2, 190, 237])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
-
- def testStrictRandomCropImageWithKeypoints(self):
- image = self.createColorfulTestImage()[0]
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- new_image, new_boxes, new_labels, new_keypoints = (
- preprocessor._strict_random_crop_image(
- image, boxes, labels, keypoints=keypoints))
- with self.test_session() as sess:
- new_image, new_boxes, new_labels, new_keypoints = sess.run(
- [new_image, new_boxes, new_labels, new_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32)
- expected_keypoints = np.array([
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]]
- ], dtype=np.float32)
- self.assertAllEqual(new_image.shape, [190, 237, 3])
- self.assertAllClose(
- new_boxes.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- new_keypoints.flatten(), expected_keypoints.flatten())
-
- def testRunRandomCropImageWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks,
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_masks])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
-
- def testRunRandomCropImageWithKeypointsInsideCrop(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypointsInsideCrop()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- expected_keypoints = np.array([
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]],
- [[0.38947368, 0.07173],
- [0.49473682, 0.24050637],
- [0.60000002, 0.40928277]]
- ])
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- distorted_keypoints_.flatten(), expected_keypoints.flatten())
-
- def testRunRandomCropImageWithKeypointsOutsideCrop(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypointsOutsideCrop()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_image, {})]
-
- with mock.patch.object(
- tf.image,
- 'sample_distorted_bounding_box'
- ) as mock_sample_distorted_bounding_box:
- mock_sample_distorted_bounding_box.return_value = (
- tf.constant([6, 143, 0], dtype=tf.int32),
- tf.constant([190, 237, -1], dtype=tf.int32),
- tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run(
- [distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints])
-
- expected_boxes = np.array([
- [0.0, 0.0, 0.75789469, 1.0],
- [0.23157893, 0.24050637, 0.75789469, 1.0],
- ], dtype=np.float32)
- expected_keypoints = np.array([
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- [[np.nan, np.nan],
- [np.nan, np.nan],
- [np.nan, np.nan]],
- ])
- self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(
- distorted_boxes_.flatten(), expected_boxes.flatten())
- self.assertAllClose(
- distorted_keypoints_.flatten(), expected_keypoints.flatten())
-
- def testRunRetainBoxesAboveThreshold(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores
- }
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True)
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_boxes = retained_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- retained_labels = retained_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- retained_label_scores = retained_tensor_dict[
- fields.InputDataFields.groundtruth_label_scores]
-
- with self.test_session() as sess:
- (retained_boxes_, retained_labels_,
- retained_label_scores_, expected_retained_boxes_,
- expected_retained_labels_, expected_retained_label_scores_) = sess.run(
- [retained_boxes, retained_labels, retained_label_scores,
- self.expectedBoxesAfterThresholding(),
- self.expectedLabelsAfterThresholding(),
- self.expectedLabelScoresAfterThresholding()])
-
- self.assertAllClose(retained_boxes_, expected_retained_boxes_)
- self.assertAllClose(retained_labels_, expected_retained_labels_)
- self.assertAllClose(
- retained_label_scores_, expected_retained_label_scores_)
-
- def testRunRetainBoxesAboveThresholdWithMasks(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- masks = self.createTestMasks()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True,
- include_instance_masks=True)
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
-
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_masks = retained_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
-
- with self.test_session() as sess:
- (retained_masks_, expected_masks_) = sess.run(
- [retained_masks,
- self.expectedMasksAfterThresholding()])
- self.assertAllClose(retained_masks_, expected_masks_)
-
- def testRunRetainBoxesAboveThresholdWithKeypoints(self):
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- label_scores = self.createTestLabelScores()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_label_scores: label_scores,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=True,
- include_keypoints=True)
-
- preprocessing_options = [
- (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
- ]
-
- retained_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- retained_keypoints = retained_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
-
- with self.test_session() as sess:
- (retained_keypoints_, expected_keypoints_) = sess.run(
- [retained_keypoints,
- self.expectedKeypointsAfterThresholding()])
- self.assertAllClose(retained_keypoints_, expected_keypoints_)
-
- def testRunRandomCropToAspectRatioWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
-
- with mock.patch.object(preprocessor,
- '_random_integer') as mock_random_integer:
- mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels, distorted_masks
- ])
-
- expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
- self.assertAllEqual(distorted_labels_, [1])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
-
- def testRunRandomCropToAspectRatioWithKeypoints(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
-
- with mock.patch.object(preprocessor,
- '_random_integer') as mock_random_integer:
- mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints
- ])
-
- expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
- expected_keypoints = np.array(
- [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
- self.assertAllEqual(distorted_labels_, [1])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllClose(distorted_keypoints_.flatten(),
- expected_keypoints.flatten())
-
- def testRunRandomPadToAspectRatioWithMasks(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_instance_masks: masks
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_instance_masks=True)
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
-
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_masks = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_instance_masks]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_masks_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels, distorted_masks
- ])
-
- expected_boxes = np.array(
- [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllEqual(distorted_masks_.shape, [2, 400, 400])
-
- def testRunRandomPadToAspectRatioWithKeypoints(self):
- image = self.createColorfulTestImage()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- keypoints = self.createTestKeypoints()
-
- tensor_dict = {
- fields.InputDataFields.image: image,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- fields.InputDataFields.groundtruth_keypoints: keypoints
- }
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_keypoints=True)
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
-
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- distorted_labels = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_classes]
- distorted_keypoints = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_keypoints]
- with self.test_session() as sess:
- (distorted_image_, distorted_boxes_, distorted_labels_,
- distorted_keypoints_) = sess.run([
- distorted_image, distorted_boxes, distorted_labels,
- distorted_keypoints
- ])
-
- expected_boxes = np.array(
- [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
- expected_keypoints = np.array([
- [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]],
- [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]],
- ], dtype=np.float32)
- self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
- self.assertAllEqual(distorted_labels_, [1, 2])
- self.assertAllClose(distorted_boxes_.flatten(),
- expected_boxes.flatten())
- self.assertAllClose(distorted_keypoints_.flatten(),
- expected_keypoints.flatten())
-
- def testRandomPadImage(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_pad_image, {})]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_, boxes_, padded_boxes_) = sess.run(
- [boxes_shape, padded_boxes_shape, images_shape,
- padded_images_shape, boxes, padded_boxes])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
- self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
- self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
- self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
- self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
- padded_boxes_[:, 2] - padded_boxes_[:, 0])))
- self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
- padded_boxes_[:, 3] - padded_boxes_[:, 1])))
-
- def testRandomCropPadImageWithRandomCoefOne(self):
- preprocessing_options = [(preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- })]
-
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_pad_image, {
- 'random_coef': 1.0
- })]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_, boxes_, padded_boxes_) = sess.run(
- [boxes_shape, padded_boxes_shape, images_shape,
- padded_images_shape, boxes, padded_boxes])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
- self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
- self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
- self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
- self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
- padded_boxes_[:, 2] - padded_boxes_[:, 0])))
- self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
- padded_boxes_[:, 3] - padded_boxes_[:, 1])))
-
- def testRandomCropToAspectRatio(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, [])
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
- 'aspect_ratio': 2.0
- })]
- cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
- cropped_boxes = cropped_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- cropped_boxes_shape = tf.shape(cropped_boxes)
- images_shape = tf.shape(images)
- cropped_images_shape = tf.shape(cropped_images)
-
- with self.test_session() as sess:
- (boxes_shape_, cropped_boxes_shape_, images_shape_,
- cropped_images_shape_) = sess.run([
- boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
- ])
- self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
- self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
- self.assertEqual(images_shape_[2], cropped_images_shape_[2])
-
- def testRandomPadToAspectRatio(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- tensor_dict = preprocessor.preprocess(tensor_dict, [])
- images = tensor_dict[fields.InputDataFields.image]
-
- preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {
- 'aspect_ratio': 2.0
- })]
- padded_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
-
- padded_images = padded_tensor_dict[fields.InputDataFields.image]
- padded_boxes = padded_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- boxes_shape = tf.shape(boxes)
- padded_boxes_shape = tf.shape(padded_boxes)
- images_shape = tf.shape(images)
- padded_images_shape = tf.shape(padded_images)
-
- with self.test_session() as sess:
- (boxes_shape_, padded_boxes_shape_, images_shape_,
- padded_images_shape_) = sess.run([
- boxes_shape, padded_boxes_shape, images_shape, padded_images_shape
- ])
- self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
- self.assertEqual(images_shape_[1], padded_images_shape_[1])
- self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
-
- def testRandomBlackPatches(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_black_patches, {
- 'size_to_image_ratio': 0.5
- }))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
- images_shape = tf.shape(images)
- blacked_images_shape = tf.shape(blacked_images)
-
- with self.test_session() as sess:
- (images_shape_, blacked_images_shape_) = sess.run(
- [images_shape, blacked_images_shape])
- self.assertAllEqual(images_shape_, blacked_images_shape_)
-
- def testRandomResizeMethod(self):
- preprocessing_options = []
- preprocessing_options.append((preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }))
- preprocessing_options.append((preprocessor.random_resize_method, {
- 'target_size': (75, 150)
- }))
- images = self.createTestImages()
- tensor_dict = {fields.InputDataFields.image: images}
- resized_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- resized_images = resized_tensor_dict[fields.InputDataFields.image]
- resized_images_shape = tf.shape(resized_images)
- expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
-
- with self.test_session() as sess:
- (expected_images_shape_, resized_images_shape_) = sess.run(
- [expected_images_shape, resized_images_shape])
- self.assertAllEqual(expected_images_shape_,
- resized_images_shape_)
-
- def testResizeImageWithMasks(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- height = 50
- width = 100
- expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_image(
- in_image, in_masks, new_height=height, new_width=width)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeImageWithNoInstanceMask(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- height = 50
- width = 100
- expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_image(
- in_image, in_masks, new_height=height, new_width=width)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRangePreservesStaticSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
- min_dim = 50
- max_dim = 100
- expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.random_uniform(in_shape)
- out_image = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
-
- def testResizeToRangeWithDynamicSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
- min_dim = 50
- max_dim = 100
- expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- out_image = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- with self.test_session() as sess:
- out_image_shape = sess.run(out_image_shape,
- feed_dict={in_image:
- np.random.randn(*in_shape)})
- self.assertAllEqual(out_image_shape, expected_shape)
-
- def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
- self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
-
- def testResizeToRangeWithMasksAndDynamicSpatialShape(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape],
- feed_dict={
- in_image: np.random.randn(*in_image_shape),
- in_masks: np.random.randn(*in_masks_shape)
- })
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- min_dim = 50
- max_dim = 100
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_to_range(
- in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToRange4DImageTensor(self):
- image = tf.random_uniform([1, 200, 300, 3])
- with self.assertRaises(ValueError):
- preprocessor.resize_to_range(image, 500, 600)
-
- def testResizeToRangeSameMinMax(self):
- """Tests image resizing, checking output sizes."""
- in_shape_list = [[312, 312, 3], [299, 299, 3]]
- min_dim = 320
- max_dim = 320
- expected_shape_list = [[320, 320, 3], [320, 320, 3]]
-
- for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
- in_image = tf.random_uniform(in_shape)
- out_image = preprocessor.resize_to_range(
- in_image, min_dimension=min_dim, max_dimension=max_dim)
- out_image_shape = tf.shape(out_image)
-
- with self.test_session() as sess:
- out_image_shape = sess.run(out_image_shape)
- self.assertAllEqual(out_image_shape, expected_shape)
-
- def testResizeToMinDimensionTensorShapes(self):
- in_image_shape_list = [[60, 55, 3], [15, 30, 3]]
- in_masks_shape_list = [[15, 60, 55], [10, 15, 30]]
- min_dim = 50
- expected_image_shape_list = [[60, 55, 3], [50, 100, 3]]
- expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_to_min_dimension(
- in_image, in_masks, min_dimension=min_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape],
- feed_dict={
- in_image: np.random.randn(*in_image_shape),
- in_masks: np.random.randn(*in_masks_shape)
- })
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self):
- """Tests image resizing, checking output sizes."""
- in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
- in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
- min_dim = 50
- expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
- expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
-
- for (in_image_shape, expected_image_shape, in_masks_shape,
- expected_mask_shape) in zip(in_image_shape_list,
- expected_image_shape_list,
- in_masks_shape_list,
- expected_masks_shape_list):
- in_image = tf.random_uniform(in_image_shape)
- in_masks = tf.random_uniform(in_masks_shape)
- out_image, out_masks = preprocessor.resize_to_min_dimension(
- in_image, in_masks, min_dimension=min_dim)
- out_image_shape = tf.shape(out_image)
- out_masks_shape = tf.shape(out_masks)
-
- with self.test_session() as sess:
- out_image_shape, out_masks_shape = sess.run(
- [out_image_shape, out_masks_shape])
- self.assertAllEqual(out_image_shape, expected_image_shape)
- self.assertAllEqual(out_masks_shape, expected_mask_shape)
-
- def testResizeToMinDimensionRaisesErrorOn4DImage(self):
- image = tf.random_uniform([1, 200, 300, 3])
- with self.assertRaises(ValueError):
- preprocessor.resize_to_min_dimension(image, 500)
-
- def testScaleBoxesToPixelCoordinates(self):
- """Tests box scaling, checking scaled values."""
- in_shape = [60, 40, 3]
- in_boxes = [[0.1, 0.2, 0.4, 0.6],
- [0.5, 0.3, 0.9, 0.7]]
-
- expected_boxes = [[6., 8., 24., 24.],
- [30., 12., 54., 28.]]
-
- in_image = tf.random_uniform(in_shape)
- in_boxes = tf.constant(in_boxes)
- _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
- in_image, boxes=in_boxes)
- with self.test_session() as sess:
- out_boxes = sess.run(out_boxes)
- self.assertAllClose(out_boxes, expected_boxes)
-
- def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
- """Tests box and keypoint scaling, checking scaled values."""
- in_shape = [60, 40, 3]
- in_boxes = self.createTestBoxes()
- in_keypoints = self.createTestKeypoints()
-
- expected_boxes = [[0., 10., 45., 40.],
- [15., 20., 45., 40.]]
- expected_keypoints = [
- [[6., 4.], [12., 8.], [18., 12.]],
- [[24., 16.], [30., 20.], [36., 24.]],
- ]
-
- in_image = tf.random_uniform(in_shape)
- _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
- in_image, boxes=in_boxes, keypoints=in_keypoints)
- with self.test_session() as sess:
- out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
- self.assertAllClose(out_boxes_, expected_boxes)
- self.assertAllClose(out_keypoints_, expected_keypoints)
-
- def testSubtractChannelMean(self):
- """Tests whether channel means have been subtracted."""
- with self.test_session():
- image = tf.zeros((240, 320, 3))
- means = [1, 2, 3]
- actual = preprocessor.subtract_channel_mean(image, means=means)
- actual = actual.eval()
-
- self.assertTrue((actual[:, :, 0] == -1).all())
- self.assertTrue((actual[:, :, 1] == -2).all())
- self.assertTrue((actual[:, :, 2] == -3).all())
-
- def testOneHotEncoding(self):
- """Tests one hot encoding of multiclass labels."""
- with self.test_session():
- labels = tf.constant([1, 4, 2], dtype=tf.int32)
- one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
- one_hot = one_hot.eval()
-
- self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
-
- def testSSDRandomCrop(self):
- preprocessing_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop, {})]
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
-
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testSSDRandomCropPad(self):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- preprocessing_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop_pad, {})]
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels,
- }
- distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
- preprocessing_options)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
-
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run([
- boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
- ])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def _testSSDRandomCropFixedAspectRatio(self,
- include_label_scores,
- include_instance_masks,
- include_keypoints):
- images = self.createTestImages()
- boxes = self.createTestBoxes()
- labels = self.createTestLabels()
- preprocessing_options = [
- (preprocessor.normalize_image, {
- 'original_minval': 0,
- 'original_maxval': 255,
- 'target_minval': 0,
- 'target_maxval': 1
- }),
- (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
- tensor_dict = {
- fields.InputDataFields.image: images,
- fields.InputDataFields.groundtruth_boxes: boxes,
- fields.InputDataFields.groundtruth_classes: labels
- }
- if include_label_scores:
- label_scores = self.createTestLabelScores()
- tensor_dict[fields.InputDataFields.groundtruth_label_scores] = (
- label_scores)
- if include_instance_masks:
- masks = self.createTestMasks()
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
- if include_keypoints:
- keypoints = self.createTestKeypoints()
- tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
-
- preprocessor_arg_map = preprocessor.get_default_func_arg_map(
- include_label_scores=include_label_scores,
- include_instance_masks=include_instance_masks,
- include_keypoints=include_keypoints)
- distorted_tensor_dict = preprocessor.preprocess(
- tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
- distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
- distorted_boxes = distorted_tensor_dict[
- fields.InputDataFields.groundtruth_boxes]
- images_rank = tf.rank(images)
- distorted_images_rank = tf.rank(distorted_images)
- boxes_rank = tf.rank(boxes)
- distorted_boxes_rank = tf.rank(distorted_boxes)
-
- with self.test_session() as sess:
- (boxes_rank_, distorted_boxes_rank_, images_rank_,
- distorted_images_rank_) = sess.run(
- [boxes_rank, distorted_boxes_rank, images_rank,
- distorted_images_rank])
- self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
- self.assertAllEqual(images_rank_, distorted_images_rank_)
-
- def testSSDRandomCropFixedAspectRatio(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
- include_instance_masks=False,
- include_keypoints=False)
-
- def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
- include_instance_masks=True,
- include_keypoints=True)
-
- def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
- self._testSSDRandomCropFixedAspectRatio(include_label_scores=True,
- include_instance_masks=True,
- include_keypoints=True)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/region_similarity_calculator.py b/object_detection/core/region_similarity_calculator.py
deleted file mode 100644
index f344006a..00000000
--- a/object_detection/core/region_similarity_calculator.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Region Similarity Calculators for BoxLists.
-
-Region Similarity Calculators compare a pairwise measure of similarity
-between the boxes in two BoxLists.
-"""
-from abc import ABCMeta
-from abc import abstractmethod
-
-import tensorflow as tf
-
-from object_detection.core import box_list_ops
-
-
-class RegionSimilarityCalculator(object):
- """Abstract base class for region similarity calculator."""
- __metaclass__ = ABCMeta
-
- def compare(self, boxlist1, boxlist2, scope=None):
- """Computes matrix of pairwise similarity between BoxLists.
-
- This op (to be overriden) computes a measure of pairwise similarity between
- the boxes in the given BoxLists. Higher values indicate more similarity.
-
- Note that this method simply measures similarity and does not explicitly
- perform a matching.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
- scope: Op scope name. Defaults to 'Compare' if None.
-
- Returns:
- a (float32) tensor of shape [N, M] with pairwise similarity score.
- """
- with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
- return self._compare(boxlist1, boxlist2)
-
- @abstractmethod
- def _compare(self, boxlist1, boxlist2):
- pass
-
-
-class IouSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on Intersection over Union (IOU) metric.
-
- This class computes pairwise similarity between two BoxLists based on IOU.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute pairwise IOU similarity between the two BoxLists.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing pairwise iou scores.
- """
- return box_list_ops.iou(boxlist1, boxlist2)
-
-
-class NegSqDistSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on the squared distance metric.
-
- This class computes pairwise similarity between two BoxLists based on the
- negative squared distance metric.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute matrix of (negated) sq distances.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing negated pairwise squared distance.
- """
- return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
-
-
-class IoaSimilarity(RegionSimilarityCalculator):
- """Class to compute similarity based on Intersection over Area (IOA) metric.
-
- This class computes pairwise similarity between two BoxLists based on their
- pairwise intersections divided by the areas of second BoxLists.
- """
-
- def _compare(self, boxlist1, boxlist2):
- """Compute pairwise IOA similarity between the two BoxLists.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
-
- Returns:
- A tensor with shape [N, M] representing pairwise IOA scores.
- """
- return box_list_ops.ioa(boxlist1, boxlist2)
diff --git a/object_detection/core/region_similarity_calculator_test.py b/object_detection/core/region_similarity_calculator_test.py
deleted file mode 100644
index 162151a3..00000000
--- a/object_detection/core/region_similarity_calculator_test.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for region_similarity_calculator."""
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import region_similarity_calculator
-
-
-class RegionSimilarityCalculatorTest(tf.test.TestCase):
-
- def test_get_correct_pairwise_similarity_based_on_iou(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
- iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
- with self.test_session() as sess:
- iou_output = sess.run(iou_similarity)
- self.assertAllClose(iou_output, exp_output)
-
- def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
- corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
- [1.0, 1.0, 0.0, 2.0]])
- corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
- [-4.0, 0.0, 0.0, 3.0],
- [0.0, 0.0, 0.0, 0.0]])
- exp_output = [[-26, -25, 0], [-18, -27, -6]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
- with self.test_session() as sess:
- dist_output = sess.run(dist_similarity)
- self.assertAllClose(dist_output, exp_output)
-
- def test_get_correct_pairwise_similarity_based_on_ioa(self):
- corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
- corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]])
- exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
- [1.0 / 12.0, 0.0, 5.0 / 400.0]]
- exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
- [0, 0],
- [6.0 / 6.0, 5.0 / 5.0]]
- boxes1 = box_list.BoxList(corners1)
- boxes2 = box_list.BoxList(corners2)
- ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
- ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
- ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
- with self.test_session() as sess:
- iou_output_1, iou_output_2 = sess.run(
- [ioa_similarity_1, ioa_similarity_2])
- self.assertAllClose(iou_output_1, exp_output_1)
- self.assertAllClose(iou_output_2, exp_output_2)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/core/standard_fields.py b/object_detection/core/standard_fields.py
deleted file mode 100644
index 7cbf5ee8..00000000
--- a/object_detection/core/standard_fields.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Contains classes specifying naming conventions used for object detection.
-
-
-Specifies:
- InputDataFields: standard fields used by reader/preprocessor/batcher.
- DetectionResultFields: standard fields returned by object detector.
- BoxListFields: standard field used by BoxList
- TfExampleFields: standard fields for tf-example data format (go/tf-example).
-"""
-
-
-class InputDataFields(object):
- """Names for the input tensors.
-
- Holds the standard data field names to use for identifying input tensors. This
- should be used by the decoder to identify keys for the returned tensor_dict
- containing input tensors. And it should be used by the model to identify the
- tensors it needs.
-
- Attributes:
- image: image.
- original_image: image in the original input size.
- key: unique key corresponding to image.
- source_id: source of the original image.
- filename: original filename of the dataset (without common path).
- groundtruth_image_classes: image-level class labels.
- groundtruth_boxes: coordinates of the ground truth boxes in the image.
- groundtruth_classes: box-level class labels.
- groundtruth_label_types: box-level label types (e.g. explicit negative).
- groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
- is the groundtruth a single object or a crowd.
- groundtruth_area: area of a groundtruth segment.
- groundtruth_difficult: is a `difficult` object
- groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
- same class, forming a connected group, where instances are heavily
- occluding each other.
- proposal_boxes: coordinates of object proposal boxes.
- proposal_objectness: objectness score of each proposal.
- groundtruth_instance_masks: ground truth instance masks.
- groundtruth_instance_boundaries: ground truth instance boundaries.
- groundtruth_instance_classes: instance mask-level class labels.
- groundtruth_keypoints: ground truth keypoints.
- groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
- groundtruth_label_scores: groundtruth label scores.
- """
- image = 'image'
- original_image = 'original_image'
- key = 'key'
- source_id = 'source_id'
- filename = 'filename'
- groundtruth_image_classes = 'groundtruth_image_classes'
- groundtruth_boxes = 'groundtruth_boxes'
- groundtruth_classes = 'groundtruth_classes'
- groundtruth_label_types = 'groundtruth_label_types'
- groundtruth_is_crowd = 'groundtruth_is_crowd'
- groundtruth_area = 'groundtruth_area'
- groundtruth_difficult = 'groundtruth_difficult'
- groundtruth_group_of = 'groundtruth_group_of'
- proposal_boxes = 'proposal_boxes'
- proposal_objectness = 'proposal_objectness'
- groundtruth_instance_masks = 'groundtruth_instance_masks'
- groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
- groundtruth_instance_classes = 'groundtruth_instance_classes'
- groundtruth_keypoints = 'groundtruth_keypoints'
- groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
- groundtruth_label_scores = 'groundtruth_label_scores'
-
-
-class DetectionResultFields(object):
- """Naming converntions for storing the output of the detector.
-
- Attributes:
- source_id: source of the original image.
- key: unique key corresponding to image.
- detection_boxes: coordinates of the detection boxes in the image.
- detection_scores: detection scores for the detection boxes in the image.
- detection_classes: detection-level class labels.
- detection_masks: contains a segmentation mask for each detection box.
- detection_boundaries: contains an object boundary for each detection box.
- detection_keypoints: contains detection keypoints for each detection box.
- num_detections: number of detections in the batch.
- """
-
- source_id = 'source_id'
- key = 'key'
- detection_boxes = 'detection_boxes'
- detection_scores = 'detection_scores'
- detection_classes = 'detection_classes'
- detection_masks = 'detection_masks'
- detection_boundaries = 'detection_boundaries'
- detection_keypoints = 'detection_keypoints'
- num_detections = 'num_detections'
-
-
-class BoxListFields(object):
- """Naming conventions for BoxLists.
-
- Attributes:
- boxes: bounding box coordinates.
- classes: classes per bounding box.
- scores: scores per bounding box.
- weights: sample weights per bounding box.
- objectness: objectness score per bounding box.
- masks: masks per bounding box.
- boundaries: boundaries per bounding box.
- keypoints: keypoints per bounding box.
- keypoint_heatmaps: keypoint heatmaps per bounding box.
- """
- boxes = 'boxes'
- classes = 'classes'
- scores = 'scores'
- weights = 'weights'
- objectness = 'objectness'
- masks = 'masks'
- boundaries = 'boundaries'
- keypoints = 'keypoints'
- keypoint_heatmaps = 'keypoint_heatmaps'
-
-
-class TfExampleFields(object):
- """TF-example proto feature names for object detection.
-
- Holds the standard feature names to load from an Example proto for object
- detection.
-
- Attributes:
- image_encoded: JPEG encoded string
- image_format: image format, e.g. "JPEG"
- filename: filename
- channels: number of channels of image
- colorspace: colorspace, e.g. "RGB"
- height: height of image in pixels, e.g. 462
- width: width of image in pixels, e.g. 581
- source_id: original source of the image
- object_class_text: labels in text format, e.g. ["person", "cat"]
- object_class_label: labels in numbers, e.g. [16, 8]
- object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
- object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
- object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
- object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
- object_view: viewpoint of object, e.g. ["frontal", "left"]
- object_truncated: is object truncated, e.g. [true, false]
- object_occluded: is object occluded, e.g. [true, false]
- object_difficult: is object difficult, e.g. [true, false]
- object_group_of: is object a single object or a group of objects
- object_depiction: is object a depiction
- object_is_crowd: [DEPRECATED, use object_group_of instead]
- is the object a single object or a crowd
- object_segment_area: the area of the segment.
- instance_masks: instance segmentation masks.
- instance_boundaries: instance boundaries.
- instance_classes: Classes for each instance segmentation mask.
- detection_class_label: class label in numbers.
- detection_bbox_ymin: ymin coordinates of a detection box.
- detection_bbox_xmin: xmin coordinates of a detection box.
- detection_bbox_ymax: ymax coordinates of a detection box.
- detection_bbox_xmax: xmax coordinates of a detection box.
- detection_score: detection score for the class label and box.
- """
- image_encoded = 'image/encoded'
- image_format = 'image/format' # format is reserved keyword
- filename = 'image/filename'
- channels = 'image/channels'
- colorspace = 'image/colorspace'
- height = 'image/height'
- width = 'image/width'
- source_id = 'image/source_id'
- object_class_text = 'image/object/class/text'
- object_class_label = 'image/object/class/label'
- object_bbox_ymin = 'image/object/bbox/ymin'
- object_bbox_xmin = 'image/object/bbox/xmin'
- object_bbox_ymax = 'image/object/bbox/ymax'
- object_bbox_xmax = 'image/object/bbox/xmax'
- object_view = 'image/object/view'
- object_truncated = 'image/object/truncated'
- object_occluded = 'image/object/occluded'
- object_difficult = 'image/object/difficult'
- object_group_of = 'image/object/group_of'
- object_depiction = 'image/object/depiction'
- object_is_crowd = 'image/object/is_crowd'
- object_segment_area = 'image/object/segment/area'
- instance_masks = 'image/segmentation/object'
- instance_boundaries = 'image/boundaries/object'
- instance_classes = 'image/segmentation/object/class'
- detection_class_label = 'image/detection/label'
- detection_bbox_ymin = 'image/detection/bbox/ymin'
- detection_bbox_xmin = 'image/detection/bbox/xmin'
- detection_bbox_ymax = 'image/detection/bbox/ymax'
- detection_bbox_xmax = 'image/detection/bbox/xmax'
- detection_score = 'image/detection/score'
diff --git a/object_detection/core/target_assigner.py b/object_detection/core/target_assigner.py
deleted file mode 100644
index d028dd59..00000000
--- a/object_detection/core/target_assigner.py
+++ /dev/null
@@ -1,455 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base target assigner module.
-
-The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
-groundtruth detections (bounding boxes), to assign classification and regression
-targets to each anchor as well as weights to each anchor (specifying, e.g.,
-which anchors should not contribute to training loss).
-
-It assigns classification/regression targets by performing the following steps:
-1) Computing pairwise similarity between anchors and groundtruth boxes using a
- provided RegionSimilarity Calculator
-2) Computing a matching based on the similarity matrix using a provided Matcher
-3) Assigning regression targets based on the matching and a provided BoxCoder
-4) Assigning classification targets based on the matching and groundtruth labels
-
-Note that TargetAssigners only operate on detections from a single
-image at a time, so any logic for applying a TargetAssigner to multiple
-images must be handled externally.
-"""
-import tensorflow as tf
-
-from object_detection.box_coders import faster_rcnn_box_coder
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_coder as bcoder
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import matcher as mat
-from object_detection.core import region_similarity_calculator as sim_calc
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-
-
-class TargetAssigner(object):
- """Target assigner to compute classification and regression targets."""
-
- def __init__(self, similarity_calc, matcher, box_coder,
- positive_class_weight=1.0, negative_class_weight=1.0,
- unmatched_cls_target=None):
- """Construct Object Detection Target Assigner.
-
- Args:
- similarity_calc: a RegionSimilarityCalculator
- matcher: an object_detection.core.Matcher used to match groundtruth to
- anchors.
- box_coder: an object_detection.core.BoxCoder used to encode matching
- groundtruth boxes with respect to anchors.
- positive_class_weight: classification weight to be associated to positive
- anchors (default: 1.0)
- negative_class_weight: classification weight to be associated to negative
- anchors (default: 1.0)
- unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
- which is consistent with the classification target for each
- anchor (and can be empty for scalar targets). This shape must thus be
- compatible with the groundtruth labels that are passed to the "assign"
- function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
- If set to None, unmatched_cls_target is set to be [0] for each anchor.
-
- Raises:
- ValueError: if similarity_calc is not a RegionSimilarityCalculator or
- if matcher is not a Matcher or if box_coder is not a BoxCoder
- """
- if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
- raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
- if not isinstance(matcher, mat.Matcher):
- raise ValueError('matcher must be a Matcher')
- if not isinstance(box_coder, bcoder.BoxCoder):
- raise ValueError('box_coder must be a BoxCoder')
- self._similarity_calc = similarity_calc
- self._matcher = matcher
- self._box_coder = box_coder
- self._positive_class_weight = positive_class_weight
- self._negative_class_weight = negative_class_weight
- if unmatched_cls_target is None:
- self._unmatched_cls_target = tf.constant([0], tf.float32)
- else:
- self._unmatched_cls_target = unmatched_cls_target
-
- @property
- def box_coder(self):
- return self._box_coder
-
- def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
- **params):
- """Assign classification and regression targets to each anchor.
-
- For a given set of anchors and groundtruth detections, match anchors
- to groundtruth_boxes and assign classification and regression targets to
- each anchor as well as weights based on the resulting match (specifying,
- e.g., which anchors should not contribute to training loss).
-
- Anchors that are not matched to anything are given a classification target
- of self._unmatched_cls_target which can be specified via the constructor.
-
- Args:
- anchors: a BoxList representing N anchors
- groundtruth_boxes: a BoxList representing M groundtruth boxes
- groundtruth_labels: a tensor of shape [M, d_1, ... d_k]
- with labels for each of the ground_truth boxes. The subshape
- [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
- to None, groundtruth_labels assumes a binary problem where all
- ground_truth boxes get a positive label (of 1).
- **params: Additional keyword arguments for specific implementations of
- the Matcher.
-
- Returns:
- cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
- where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
- which has shape [num_gt_boxes, d_1, d_2, ... d_k].
- cls_weights: a float32 tensor with shape [num_anchors]
- reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
- reg_weights: a float32 tensor with shape [num_anchors]
- match: a matcher.Match object encoding the match between anchors and
- groundtruth boxes, with rows corresponding to groundtruth boxes
- and columns corresponding to anchors.
-
- Raises:
- ValueError: if anchors or groundtruth_boxes are not of type
- box_list.BoxList
- """
- if not isinstance(anchors, box_list.BoxList):
- raise ValueError('anchors must be an BoxList')
- if not isinstance(groundtruth_boxes, box_list.BoxList):
- raise ValueError('groundtruth_boxes must be an BoxList')
-
- if groundtruth_labels is None:
- groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
- 0))
- groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
- unmatched_shape_assert = tf.assert_equal(
- tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target),
- message='Unmatched class target shape incompatible '
- 'with groundtruth labels shape!')
- labels_and_box_shapes_assert = tf.assert_equal(
- tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(),
- message='Groundtruth boxes and labels have incompatible shapes!')
-
- with tf.control_dependencies(
- [unmatched_shape_assert, labels_and_box_shapes_assert]):
- match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
- anchors)
- match = self._matcher.match(match_quality_matrix, **params)
- reg_targets = self._create_regression_targets(anchors,
- groundtruth_boxes,
- match)
- cls_targets = self._create_classification_targets(groundtruth_labels,
- match)
- reg_weights = self._create_regression_weights(match)
- cls_weights = self._create_classification_weights(
- match, self._positive_class_weight, self._negative_class_weight)
-
- num_anchors = anchors.num_boxes_static()
- if num_anchors is not None:
- reg_targets = self._reset_target_shape(reg_targets, num_anchors)
- cls_targets = self._reset_target_shape(cls_targets, num_anchors)
- reg_weights = self._reset_target_shape(reg_weights, num_anchors)
- cls_weights = self._reset_target_shape(cls_weights, num_anchors)
-
- return cls_targets, cls_weights, reg_targets, reg_weights, match
-
- def _reset_target_shape(self, target, num_anchors):
- """Sets the static shape of the target.
-
- Args:
- target: the target tensor. Its first dimension will be overwritten.
- num_anchors: the number of anchors, which is used to override the target's
- first dimension.
-
- Returns:
- A tensor with the shape info filled in.
- """
- target_shape = target.get_shape().as_list()
- target_shape[0] = num_anchors
- target.set_shape(target_shape)
- return target
-
- def _create_regression_targets(self, anchors, groundtruth_boxes, match):
- """Returns a regression target for each anchor.
-
- Args:
- anchors: a BoxList representing N anchors
- groundtruth_boxes: a BoxList representing M groundtruth_boxes
- match: a matcher.Match object
-
- Returns:
- reg_targets: a float32 tensor with shape [N, box_code_dimension]
- """
- matched_anchor_indices = match.matched_column_indices()
- unmatched_ignored_anchor_indices = (match.
- unmatched_or_ignored_column_indices())
- matched_gt_indices = match.matched_row_indices()
- matched_anchors = box_list_ops.gather(anchors,
- matched_anchor_indices)
- matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
- matched_gt_indices)
- matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
- matched_anchors)
- unmatched_ignored_reg_targets = tf.tile(
- self._default_regression_target(),
- tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
- reg_targets = tf.dynamic_stitch(
- [matched_anchor_indices, unmatched_ignored_anchor_indices],
- [matched_reg_targets, unmatched_ignored_reg_targets])
- # TODO: summarize the number of matches on average.
- return reg_targets
-
- def _default_regression_target(self):
- """Returns the default target for anchors to regress to.
-
- Default regression targets are set to zero (though in
- this implementation what these targets are set to should
- not matter as the regression weight of any box set to
- regress to the default target is zero).
-
- Returns:
- default_target: a float32 tensor with shape [1, box_code_dimension]
- """
- return tf.constant([self._box_coder.code_size*[0]], tf.float32)
-
- def _create_classification_targets(self, groundtruth_labels, match):
- """Create classification targets for each anchor.
-
- Assign a classification target of for each anchor to the matching
- groundtruth label that is provided by match. Anchors that are not matched
- to anything are given the target self._unmatched_cls_target
-
- Args:
- groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
- with labels for each of the ground_truth boxes. The subshape
- [d_1, ... d_k] can be empty (corresponding to scalar labels).
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
-
- Returns:
- cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
- where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
- which has shape [num_gt_boxes, d_1, d_2, ... d_k].
- """
- matched_anchor_indices = match.matched_column_indices()
- unmatched_ignored_anchor_indices = (match.
- unmatched_or_ignored_column_indices())
- matched_gt_indices = match.matched_row_indices()
- matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
-
- ones = self._unmatched_cls_target.shape.ndims * [1]
- unmatched_ignored_cls_targets = tf.tile(
- tf.expand_dims(self._unmatched_cls_target, 0),
- tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
-
- cls_targets = tf.dynamic_stitch(
- [matched_anchor_indices, unmatched_ignored_anchor_indices],
- [matched_cls_targets, unmatched_ignored_cls_targets])
- return cls_targets
-
- def _create_regression_weights(self, match):
- """Set regression weight for each anchor.
-
- Only positive anchors are set to contribute to the regression loss, so this
- method returns a weight of 1 for every positive anchor and 0 for every
- negative anchor.
-
- Args:
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
-
- Returns:
- reg_weights: a float32 tensor with shape [num_anchors] representing
- regression weights
- """
- reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
- return reg_weights
-
- def _create_classification_weights(self,
- match,
- positive_class_weight=1.0,
- negative_class_weight=1.0):
- """Create classification weights for each anchor.
-
- Positive (matched) anchors are associated with a weight of
- positive_class_weight and negative (unmatched) anchors are associated with
- a weight of negative_class_weight. When anchors are ignored, weights are set
- to zero. By default, both positive/negative weights are set to 1.0,
- but they can be adjusted to handle class imbalance (which is almost always
- the case in object detection).
-
- Args:
- match: a matcher.Match object that provides a matching between anchors
- and groundtruth boxes.
- positive_class_weight: weight to be associated to positive anchors
- negative_class_weight: weight to be associated to negative anchors
-
- Returns:
- cls_weights: a float32 tensor with shape [num_anchors] representing
- classification weights.
- """
- matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
- ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
- unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
- cls_weights = (positive_class_weight * matched_indicator
- + negative_class_weight * unmatched_indicator)
- return cls_weights
-
- def get_box_coder(self):
- """Get BoxCoder of this TargetAssigner.
-
- Returns:
- BoxCoder: BoxCoder object.
- """
- return self._box_coder
-
-
-# TODO: This method pulls in all the implementation dependencies into
-# core. Therefore its best to have this factory method outside of core.
-def create_target_assigner(reference, stage=None,
- positive_class_weight=1.0,
- negative_class_weight=1.0,
- unmatched_cls_target=None):
- """Factory function for creating standard target assigners.
-
- Args:
- reference: string referencing the type of TargetAssigner.
- stage: string denoting stage: {proposal, detection}.
- positive_class_weight: classification weight to be associated to positive
- anchors (default: 1.0)
- negative_class_weight: classification weight to be associated to negative
- anchors (default: 1.0)
- unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
- which is consistent with the classification target for each
- anchor (and can be empty for scalar targets). This shape must thus be
- compatible with the groundtruth labels that are passed to the Assign
- function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
- If set to None, unmatched_cls_target is set to be 0 for each anchor.
-
- Returns:
- TargetAssigner: desired target assigner.
-
- Raises:
- ValueError: if combination reference+stage is invalid.
- """
- if reference == 'Multibox' and stage == 'proposal':
- similarity_calc = sim_calc.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-
- elif reference == 'FasterRCNN' and stage == 'proposal':
- similarity_calc = sim_calc.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
- unmatched_threshold=0.3,
- force_match_for_each_row=True)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=[10.0, 10.0, 5.0, 5.0])
-
- elif reference == 'FasterRCNN' and stage == 'detection':
- similarity_calc = sim_calc.IouSimilarity()
- # Uses all proposals with IOU < 0.5 as candidate negatives.
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- negatives_lower_than_unmatched=True)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
- scale_factors=[10.0, 10.0, 5.0, 5.0])
-
- elif reference == 'FastRCNN':
- similarity_calc = sim_calc.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.1,
- force_match_for_each_row=False,
- negatives_lower_than_unmatched=False)
- box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
-
- else:
- raise ValueError('No valid combination of reference and stage.')
-
- return TargetAssigner(similarity_calc, matcher, box_coder,
- positive_class_weight=positive_class_weight,
- negative_class_weight=negative_class_weight,
- unmatched_cls_target=unmatched_cls_target)
-
-
-def batch_assign_targets(target_assigner,
- anchors_batch,
- gt_box_batch,
- gt_class_targets_batch):
- """Batched assignment of classification and regression targets.
-
- Args:
- target_assigner: a target assigner.
- anchors_batch: BoxList representing N box anchors or list of BoxList objects
- with length batch_size representing anchor sets.
- gt_box_batch: a list of BoxList objects with length batch_size
- representing groundtruth boxes for each image in the batch
- gt_class_targets_batch: a list of tensors with length batch_size, where
- each tensor has shape [num_gt_boxes_i, classification_target_size] and
- num_gt_boxes_i is the number of boxes in the ith boxlist of
- gt_box_batch.
-
- Returns:
- batch_cls_targets: a tensor with shape [batch_size, num_anchors,
- num_classes],
- batch_cls_weights: a tensor with shape [batch_size, num_anchors],
- batch_reg_targets: a tensor with shape [batch_size, num_anchors,
- box_code_dimension]
- batch_reg_weights: a tensor with shape [batch_size, num_anchors],
- match_list: a list of matcher.Match objects encoding the match between
- anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors.
- Raises:
- ValueError: if input list lengths are inconsistent, i.e.,
- batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
- and batch_size == len(anchors_batch) unless anchors_batch is a single
- BoxList.
- """
- if not isinstance(anchors_batch, list):
- anchors_batch = len(gt_box_batch) * [anchors_batch]
- if not all(
- isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
- raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
- if not (len(anchors_batch)
- == len(gt_box_batch)
- == len(gt_class_targets_batch)):
- raise ValueError('batch size incompatible with lengths of anchors_batch, '
- 'gt_box_batch and gt_class_targets_batch.')
- cls_targets_list = []
- cls_weights_list = []
- reg_targets_list = []
- reg_weights_list = []
- match_list = []
- for anchors, gt_boxes, gt_class_targets in zip(
- anchors_batch, gt_box_batch, gt_class_targets_batch):
- (cls_targets, cls_weights, reg_targets,
- reg_weights, match) = target_assigner.assign(
- anchors, gt_boxes, gt_class_targets)
- cls_targets_list.append(cls_targets)
- cls_weights_list.append(cls_weights)
- reg_targets_list.append(reg_targets)
- reg_weights_list.append(reg_weights)
- match_list.append(match)
- batch_cls_targets = tf.stack(cls_targets_list)
- batch_cls_weights = tf.stack(cls_weights_list)
- batch_reg_targets = tf.stack(reg_targets_list)
- batch_reg_weights = tf.stack(reg_weights_list)
- return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, match_list)
diff --git a/object_detection/core/target_assigner_test.py b/object_detection/core/target_assigner_test.py
deleted file mode 100644
index 5055e170..00000000
--- a/object_detection/core/target_assigner_test.py
+++ /dev/null
@@ -1,717 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.target_assigner."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.box_coders import mean_stddev_box_coder
-from object_detection.core import box_list
-from object_detection.core import region_similarity_calculator
-from object_detection.core import target_assigner as targetassigner
-from object_detection.matchers import argmax_matcher
-from object_detection.matchers import bipartite_matcher
-
-
-class TargetAssignerTest(tf.test.TestCase):
-
- def test_assign_agnostic(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder, unmatched_cls_target=None)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0]])
- prior_stddevs = tf.constant(3 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
- exp_matching_anchors = [0, 1]
-
- result = target_assigner.assign(priors, boxes, num_valid_rows=2)
- (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
-
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out,
- reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
- [cls_targets, cls_weights, reg_targets, reg_weights,
- match.matched_column_indices()])
-
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(matching_anchors_out, exp_matching_anchors)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
- self.assertEquals(matching_anchors_out.dtype, np.int32)
-
- def test_assign_with_ignored_matches(self):
- # Note: test is very similar to above. The third box matched with an IOU
- # of 0.35, which is between the matched and unmatched threshold. This means
- # That like above the expected classification targets are [1, 1, 0].
- # Unlike above, the third target is ignored and therefore expected
- # classification weights are [1, 1, 0].
- similarity_calc = region_similarity_calculator.IouSimilarity()
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
- unmatched_threshold=0.3)
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0.0, 0.5, .9, 1.0]])
- prior_stddevs = tf.constant(3 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9]]
- boxes = box_list.BoxList(tf.constant(box_corners))
- exp_cls_targets = [[1], [1], [0]]
- exp_cls_weights = [1, 1, 0]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0]]
- exp_reg_weights = [1, 1, 0]
- exp_matching_anchors = [0, 1]
-
- result = target_assigner.assign(priors, boxes)
- (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out,
- reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
- [cls_targets, cls_weights, reg_targets, reg_weights,
- match.matched_column_indices()])
-
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(matching_anchors_out, exp_matching_anchors)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
- self.assertEquals(matching_anchors_out.dtype, np.int32)
-
- def test_assign_multiclass(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- prior_stddevs = tf.constant(4 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], tf.float32)
-
- exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [1, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 1, 0, 0, 0]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0],
- [0, 0, -.5, .2]]
- exp_reg_weights = [1, 1, 0, 1]
- exp_matching_anchors = [0, 1, 3]
-
- result = target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
- (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out,
- reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
- [cls_targets, cls_weights, reg_targets, reg_weights,
- match.matched_column_indices()])
-
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(matching_anchors_out, exp_matching_anchors)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
- self.assertEquals(matching_anchors_out.dtype, np.int32)
-
- def test_assign_multiclass_unequal_class_weights(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- positive_class_weight=1.0, negative_class_weight=0.5,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- prior_stddevs = tf.constant(4 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], tf.float32)
-
- exp_cls_weights = [1, 1, .5, 1]
- result = target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
- (_, cls_weights, _, _, _) = result
- with self.test_session() as sess:
- cls_weights_out = sess.run(cls_weights)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
-
- def test_assign_multidimensional_class_targets(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- prior_stddevs = tf.constant(4 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
- [[1, 0], [0, 1]],
- [[0, 1], [1, .5]]], tf.float32)
-
- exp_cls_targets = [[[0, 1], [1, 0]],
- [[1, 0], [0, 1]],
- [[0, 0], [0, 0]],
- [[0, 1], [1, .5]]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, -1, 1],
- [0, 0, 0, 0],
- [0, 0, -.5, .2]]
- exp_reg_weights = [1, 1, 0, 1]
- exp_matching_anchors = [0, 1, 3]
-
- result = target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
- (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out,
- reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
- [cls_targets, cls_weights, reg_targets, reg_weights,
- match.matched_column_indices()])
-
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(matching_anchors_out, exp_matching_anchors)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
- self.assertEquals(matching_anchors_out.dtype, np.int32)
-
- def test_assign_empty_groundtruth(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- prior_stddevs = tf.constant(4 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
- box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
- boxes = box_list.BoxList(box_corners)
-
- groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
- groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
-
- exp_cls_targets = [[0, 0, 0],
- [0, 0, 0],
- [0, 0, 0],
- [0, 0, 0]]
- exp_cls_weights = [1, 1, 1, 1]
- exp_reg_targets = [[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]
- exp_reg_weights = [0, 0, 0, 0]
- exp_matching_anchors = []
-
- result = target_assigner.assign(priors, boxes, groundtruth_labels)
- (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out,
- reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
- [cls_targets, cls_weights, reg_targets, reg_weights,
- match.matched_column_indices()])
-
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(matching_anchors_out, exp_matching_anchors)
- self.assertEquals(cls_targets_out.dtype, np.float32)
- self.assertEquals(cls_weights_out.dtype, np.float32)
- self.assertEquals(reg_targets_out.dtype, np.float32)
- self.assertEquals(reg_weights_out.dtype, np.float32)
- self.assertEquals(matching_anchors_out.dtype, np.int32)
-
- def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 1.0, 0.8],
- [0, 0.5, .5, 1.0],
- [.75, 0, 1.0, .25]])
- prior_stddevs = tf.constant(4 * [4 * [.1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.0, 0.0, 0.5, 0.8],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 1, 0, 0, 0]], tf.float32)
- result = target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
- (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
- with self.test_session() as sess:
- with self.assertRaisesWithPredicateMatch(
- tf.errors.InvalidArgumentError,
- 'Groundtruth boxes and labels have incompatible shapes!'):
- sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
-
- def test_raises_error_on_invalid_groundtruth_labels(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
- target_assigner = targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- unmatched_cls_target=unmatched_cls_target)
-
- prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
- prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- box_corners = [[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.9, 0.9],
- [.75, 0, .95, .27]]
- boxes = box_list.BoxList(tf.constant(box_corners))
-
- groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
-
- with self.assertRaises(ValueError):
- target_assigner.assign(priors, boxes, groundtruth_labels,
- num_valid_rows=3)
-
-
-class BatchTargetAssignerTest(tf.test.TestCase):
-
- def _get_agnostic_target_assigner(self):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- positive_class_weight=1.0,
- negative_class_weight=1.0,
- unmatched_cls_target=None)
-
- def _get_multi_class_target_assigner(self, num_classes):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- positive_class_weight=1.0,
- negative_class_weight=1.0,
- unmatched_cls_target=unmatched_cls_target)
-
- def _get_multi_dimensional_target_assigner(self, target_dimensions):
- similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
- unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
- tf.float32)
- return targetassigner.TargetAssigner(
- similarity_calc, matcher, box_coder,
- positive_class_weight=1.0,
- negative_class_weight=1.0,
- unmatched_cls_target=unmatched_cls_target)
-
- def test_batch_assign_targets(self):
- box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
- box_list2 = box_list.BoxList(tf.constant(
- [[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]]
- ))
-
- gt_box_batch = [box_list1, box_list2]
- gt_class_targets = [None, None]
-
- prior_means = tf.constant([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]])
- prior_stddevs = tf.constant([[.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0,],
- [0, 0, 0, 0,],],
- [[0, 0, 0, 0,],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[1], [0], [0], [0]],
- [[0], [1], [1], [0]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
- exp_match_0 = [0]
- exp_match_1 = [1, 2]
-
- agnostic_target_assigner = self._get_agnostic_target_assigner()
- (cls_targets, cls_weights, reg_targets, reg_weights,
- match_list) = targetassigner.batch_assign_targets(
- agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
- self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
- match_out_0, match_out_1) = sess.run([
- cls_targets, cls_weights, reg_targets, reg_weights] + [
- match.matched_column_indices() for match in match_list])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(match_out_0, exp_match_0)
- self.assertAllClose(match_out_1, exp_match_1)
-
- def test_batch_assign_multiclass_targets(self):
- box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
-
- box_list2 = box_list.BoxList(tf.constant(
- [[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]]
- ))
-
- gt_box_batch = [box_list1, box_list2]
-
- class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
- class_targets2 = tf.constant([[0, 0, 0, 1],
- [0, 0, 1, 0]], tf.float32)
-
- gt_class_targets = [class_targets1, class_targets2]
-
- prior_means = tf.constant([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]])
- prior_stddevs = tf.constant([[.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 0, 0, 0],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
- exp_cls_targets = [[[0, 1, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0],
- [1, 0, 0, 0]],
- [[1, 0, 0, 0],
- [0, 0, 0, 1],
- [0, 0, 1, 0],
- [1, 0, 0, 0]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
- exp_match_0 = [0]
- exp_match_1 = [1, 2]
-
- multiclass_target_assigner = self._get_multi_class_target_assigner(
- num_classes=3)
-
- (cls_targets, cls_weights, reg_targets, reg_weights,
- match_list) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
- self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
- match_out_0, match_out_1) = sess.run([
- cls_targets, cls_weights, reg_targets, reg_weights] + [
- match.matched_column_indices() for match in match_list])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(match_out_0, exp_match_0)
- self.assertAllClose(match_out_1, exp_match_1)
-
- def test_batch_assign_multidimensional_targets(self):
- box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
-
- box_list2 = box_list.BoxList(tf.constant(
- [[0, 0.25123152, 1, 1],
- [0.015789, 0.0985, 0.55789, 0.3842]]
- ))
-
- gt_box_batch = [box_list1, box_list2]
- class_targets1 = tf.constant([[[0, 1, 1],
- [1, 1, 0]]], tf.float32)
- class_targets2 = tf.constant([[[0, 1, 1],
- [1, 1, 0]],
- [[0, 0, 1],
- [0, 0, 1]]], tf.float32)
-
- gt_class_targets = [class_targets1, class_targets2]
-
- prior_means = tf.constant([[0, 0, .25, .25],
- [0, .25, 1, 1],
- [0, .1, .5, .5],
- [.75, .75, 1, 1]])
- prior_stddevs = tf.constant([[.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1],
- [.1, .1, .1, .1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- exp_reg_targets = [[[0, 0, -0.5, -0.5],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 0, 0, 0],
- [0, 0.01231521, 0, 0],
- [0.15789001, -0.01500003, 0.57889998, -1.15799987],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1, 1, 1],
- [1, 1, 1, 1]]
-
- exp_cls_targets = [[[[0., 1., 1.],
- [1., 1., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 0., 0.],
- [0., 0., 0.]]],
- [[[0., 0., 0.],
- [0., 0., 0.]],
- [[0., 1., 1.],
- [1., 1., 0.]],
- [[0., 0., 1.],
- [0., 0., 1.]],
- [[0., 0., 0.],
- [0., 0., 0.]]]]
- exp_reg_weights = [[1, 0, 0, 0],
- [0, 1, 1, 0]]
- exp_match_0 = [0]
- exp_match_1 = [1, 2]
-
- multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
- target_dimensions=(2, 3))
-
- (cls_targets, cls_weights, reg_targets, reg_weights,
- match_list) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
- self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
- match_out_0, match_out_1) = sess.run([
- cls_targets, cls_weights, reg_targets, reg_weights] + [
- match.matched_column_indices() for match in match_list])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(match_out_0, exp_match_0)
- self.assertAllClose(match_out_1, exp_match_1)
-
- def test_batch_assign_empty_groundtruth(self):
- box_coords_expanded = tf.zeros((1, 4), tf.float32)
- box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
- box_list1 = box_list.BoxList(box_coords)
- gt_box_batch = [box_list1]
-
- prior_means = tf.constant([[0, 0, .25, .25],
- [0, .25, 1, 1]])
- prior_stddevs = tf.constant([[.1, .1, .1, .1],
- [.1, .1, .1, .1]])
- priors = box_list.BoxList(prior_means)
- priors.add_field('stddev', prior_stddevs)
-
- exp_reg_targets = [[[0, 0, 0, 0],
- [0, 0, 0, 0]]]
- exp_cls_weights = [[1, 1]]
- exp_cls_targets = [[[1, 0, 0, 0],
- [1, 0, 0, 0]]]
- exp_reg_weights = [[0, 0]]
- exp_match_0 = []
-
- num_classes = 3
- pad = 1
- gt_class_targets = tf.zeros((0, num_classes + pad))
- gt_class_targets_batch = [gt_class_targets]
-
- multiclass_target_assigner = self._get_multi_class_target_assigner(
- num_classes=3)
-
- (cls_targets, cls_weights, reg_targets, reg_weights,
- match_list) = targetassigner.batch_assign_targets(
- multiclass_target_assigner, priors,
- gt_box_batch, gt_class_targets_batch)
- self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
- with self.test_session() as sess:
- (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
- match_out_0) = sess.run([
- cls_targets, cls_weights, reg_targets, reg_weights] + [
- match.matched_column_indices() for match in match_list])
- self.assertAllClose(cls_targets_out, exp_cls_targets)
- self.assertAllClose(cls_weights_out, exp_cls_weights)
- self.assertAllClose(reg_targets_out, exp_reg_targets)
- self.assertAllClose(reg_weights_out, exp_reg_weights)
- self.assertAllClose(match_out_0, exp_match_0)
-
-
-class CreateTargetAssignerTest(tf.test.TestCase):
-
- def test_create_target_assigner(self):
- """Tests that named constructor gives working target assigners.
-
- TODO: Make this test more general.
- """
- corners = [[0.0, 0.0, 1.0, 1.0]]
- groundtruth = box_list.BoxList(tf.constant(corners))
-
- priors = box_list.BoxList(tf.constant(corners))
- prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
- priors.add_field('stddev', prior_stddevs)
- multibox_ta = (targetassigner
- .create_target_assigner('Multibox', stage='proposal'))
- multibox_ta.assign(priors, groundtruth)
- # No tests on output, as that may vary arbitrarily as new target assigners
- # are added. As long as it is constructed correctly and runs without errors,
- # tests on the individual assigners cover correctness of the assignments.
-
- anchors = box_list.BoxList(tf.constant(corners))
- faster_rcnn_proposals_ta = (targetassigner
- .create_target_assigner('FasterRCNN',
- stage='proposal'))
- faster_rcnn_proposals_ta.assign(anchors, groundtruth)
-
- fast_rcnn_ta = (targetassigner
- .create_target_assigner('FastRCNN'))
- fast_rcnn_ta.assign(anchors, groundtruth)
-
- faster_rcnn_detection_ta = (targetassigner
- .create_target_assigner('FasterRCNN',
- stage='detection'))
- faster_rcnn_detection_ta.assign(anchors, groundtruth)
-
- with self.assertRaises(ValueError):
- targetassigner.create_target_assigner('InvalidDetector',
- stage='invalid_stage')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/data/kitti_label_map.pbtxt b/object_detection/data/kitti_label_map.pbtxt
deleted file mode 100644
index 0afcc693..00000000
--- a/object_detection/data/kitti_label_map.pbtxt
+++ /dev/null
@@ -1,9 +0,0 @@
-item {
- id: 1
- name: 'car'
-}
-
-item {
- id: 2
- name: 'pedestrian'
-}
diff --git a/object_detection/data/mscoco_label_map.pbtxt b/object_detection/data/mscoco_label_map.pbtxt
deleted file mode 100644
index 1f4872bd..00000000
--- a/object_detection/data/mscoco_label_map.pbtxt
+++ /dev/null
@@ -1,400 +0,0 @@
-item {
- name: "/m/01g317"
- id: 1
- display_name: "person"
-}
-item {
- name: "/m/0199g"
- id: 2
- display_name: "bicycle"
-}
-item {
- name: "/m/0k4j"
- id: 3
- display_name: "car"
-}
-item {
- name: "/m/04_sv"
- id: 4
- display_name: "motorcycle"
-}
-item {
- name: "/m/05czz6l"
- id: 5
- display_name: "airplane"
-}
-item {
- name: "/m/01bjv"
- id: 6
- display_name: "bus"
-}
-item {
- name: "/m/07jdr"
- id: 7
- display_name: "train"
-}
-item {
- name: "/m/07r04"
- id: 8
- display_name: "truck"
-}
-item {
- name: "/m/019jd"
- id: 9
- display_name: "boat"
-}
-item {
- name: "/m/015qff"
- id: 10
- display_name: "traffic light"
-}
-item {
- name: "/m/01pns0"
- id: 11
- display_name: "fire hydrant"
-}
-item {
- name: "/m/02pv19"
- id: 13
- display_name: "stop sign"
-}
-item {
- name: "/m/015qbp"
- id: 14
- display_name: "parking meter"
-}
-item {
- name: "/m/0cvnqh"
- id: 15
- display_name: "bench"
-}
-item {
- name: "/m/015p6"
- id: 16
- display_name: "bird"
-}
-item {
- name: "/m/01yrx"
- id: 17
- display_name: "cat"
-}
-item {
- name: "/m/0bt9lr"
- id: 18
- display_name: "dog"
-}
-item {
- name: "/m/03k3r"
- id: 19
- display_name: "horse"
-}
-item {
- name: "/m/07bgp"
- id: 20
- display_name: "sheep"
-}
-item {
- name: "/m/01xq0k1"
- id: 21
- display_name: "cow"
-}
-item {
- name: "/m/0bwd_0j"
- id: 22
- display_name: "elephant"
-}
-item {
- name: "/m/01dws"
- id: 23
- display_name: "bear"
-}
-item {
- name: "/m/0898b"
- id: 24
- display_name: "zebra"
-}
-item {
- name: "/m/03bk1"
- id: 25
- display_name: "giraffe"
-}
-item {
- name: "/m/01940j"
- id: 27
- display_name: "backpack"
-}
-item {
- name: "/m/0hnnb"
- id: 28
- display_name: "umbrella"
-}
-item {
- name: "/m/080hkjn"
- id: 31
- display_name: "handbag"
-}
-item {
- name: "/m/01rkbr"
- id: 32
- display_name: "tie"
-}
-item {
- name: "/m/01s55n"
- id: 33
- display_name: "suitcase"
-}
-item {
- name: "/m/02wmf"
- id: 34
- display_name: "frisbee"
-}
-item {
- name: "/m/071p9"
- id: 35
- display_name: "skis"
-}
-item {
- name: "/m/06__v"
- id: 36
- display_name: "snowboard"
-}
-item {
- name: "/m/018xm"
- id: 37
- display_name: "sports ball"
-}
-item {
- name: "/m/02zt3"
- id: 38
- display_name: "kite"
-}
-item {
- name: "/m/03g8mr"
- id: 39
- display_name: "baseball bat"
-}
-item {
- name: "/m/03grzl"
- id: 40
- display_name: "baseball glove"
-}
-item {
- name: "/m/06_fw"
- id: 41
- display_name: "skateboard"
-}
-item {
- name: "/m/019w40"
- id: 42
- display_name: "surfboard"
-}
-item {
- name: "/m/0dv9c"
- id: 43
- display_name: "tennis racket"
-}
-item {
- name: "/m/04dr76w"
- id: 44
- display_name: "bottle"
-}
-item {
- name: "/m/09tvcd"
- id: 46
- display_name: "wine glass"
-}
-item {
- name: "/m/08gqpm"
- id: 47
- display_name: "cup"
-}
-item {
- name: "/m/0dt3t"
- id: 48
- display_name: "fork"
-}
-item {
- name: "/m/04ctx"
- id: 49
- display_name: "knife"
-}
-item {
- name: "/m/0cmx8"
- id: 50
- display_name: "spoon"
-}
-item {
- name: "/m/04kkgm"
- id: 51
- display_name: "bowl"
-}
-item {
- name: "/m/09qck"
- id: 52
- display_name: "banana"
-}
-item {
- name: "/m/014j1m"
- id: 53
- display_name: "apple"
-}
-item {
- name: "/m/0l515"
- id: 54
- display_name: "sandwich"
-}
-item {
- name: "/m/0cyhj_"
- id: 55
- display_name: "orange"
-}
-item {
- name: "/m/0hkxq"
- id: 56
- display_name: "broccoli"
-}
-item {
- name: "/m/0fj52s"
- id: 57
- display_name: "carrot"
-}
-item {
- name: "/m/01b9xk"
- id: 58
- display_name: "hot dog"
-}
-item {
- name: "/m/0663v"
- id: 59
- display_name: "pizza"
-}
-item {
- name: "/m/0jy4k"
- id: 60
- display_name: "donut"
-}
-item {
- name: "/m/0fszt"
- id: 61
- display_name: "cake"
-}
-item {
- name: "/m/01mzpv"
- id: 62
- display_name: "chair"
-}
-item {
- name: "/m/02crq1"
- id: 63
- display_name: "couch"
-}
-item {
- name: "/m/03fp41"
- id: 64
- display_name: "potted plant"
-}
-item {
- name: "/m/03ssj5"
- id: 65
- display_name: "bed"
-}
-item {
- name: "/m/04bcr3"
- id: 67
- display_name: "dining table"
-}
-item {
- name: "/m/09g1w"
- id: 70
- display_name: "toilet"
-}
-item {
- name: "/m/07c52"
- id: 72
- display_name: "tv"
-}
-item {
- name: "/m/01c648"
- id: 73
- display_name: "laptop"
-}
-item {
- name: "/m/020lf"
- id: 74
- display_name: "mouse"
-}
-item {
- name: "/m/0qjjc"
- id: 75
- display_name: "remote"
-}
-item {
- name: "/m/01m2v"
- id: 76
- display_name: "keyboard"
-}
-item {
- name: "/m/050k8"
- id: 77
- display_name: "cell phone"
-}
-item {
- name: "/m/0fx9l"
- id: 78
- display_name: "microwave"
-}
-item {
- name: "/m/029bxz"
- id: 79
- display_name: "oven"
-}
-item {
- name: "/m/01k6s3"
- id: 80
- display_name: "toaster"
-}
-item {
- name: "/m/0130jx"
- id: 81
- display_name: "sink"
-}
-item {
- name: "/m/040b_t"
- id: 82
- display_name: "refrigerator"
-}
-item {
- name: "/m/0bt_c3"
- id: 84
- display_name: "book"
-}
-item {
- name: "/m/01x3z"
- id: 85
- display_name: "clock"
-}
-item {
- name: "/m/02s195"
- id: 86
- display_name: "vase"
-}
-item {
- name: "/m/01lsmm"
- id: 87
- display_name: "scissors"
-}
-item {
- name: "/m/0kmg4"
- id: 88
- display_name: "teddy bear"
-}
-item {
- name: "/m/03wvsk"
- id: 89
- display_name: "hair drier"
-}
-item {
- name: "/m/012xff"
- id: 90
- display_name: "toothbrush"
-}
diff --git a/object_detection/data/oid_bbox_trainable_label_map.pbtxt b/object_detection/data/oid_bbox_trainable_label_map.pbtxt
deleted file mode 100644
index 863e4f31..00000000
--- a/object_detection/data/oid_bbox_trainable_label_map.pbtxt
+++ /dev/null
@@ -1,2725 +0,0 @@
-item {
- name: "/m/01g317"
- id: 1
- display_name: "Person"
-}
-item {
- name: "/m/09j2d"
- id: 2
- display_name: "Clothing"
-}
-item {
- name: "/m/04yx4"
- id: 3
- display_name: "Man"
-}
-item {
- name: "/m/0dzct"
- id: 4
- display_name: "Face"
-}
-item {
- name: "/m/07j7r"
- id: 5
- display_name: "Tree"
-}
-item {
- name: "/m/05s2s"
- id: 6
- display_name: "Plant"
-}
-item {
- name: "/m/03bt1vf"
- id: 7
- display_name: "Woman"
-}
-item {
- name: "/m/07yv9"
- id: 8
- display_name: "Vehicle"
-}
-item {
- name: "/m/0cgh4"
- id: 9
- display_name: "Building"
-}
-item {
- name: "/m/01prls"
- id: 10
- display_name: "Land vehicle"
-}
-item {
- name: "/m/09j5n"
- id: 11
- display_name: "Footwear"
-}
-item {
- name: "/m/05r655"
- id: 12
- display_name: "Girl"
-}
-item {
- name: "/m/0jbk"
- id: 13
- display_name: "Animal"
-}
-item {
- name: "/m/0k4j"
- id: 14
- display_name: "Car"
-}
-item {
- name: "/m/02wbm"
- id: 15
- display_name: "Food"
-}
-item {
- name: "/m/083wq"
- id: 16
- display_name: "Wheel"
-}
-item {
- name: "/m/0c9ph5"
- id: 17
- display_name: "Flower"
-}
-item {
- name: "/m/0c_jw"
- id: 18
- display_name: "Furniture"
-}
-item {
- name: "/m/0d4v4"
- id: 19
- display_name: "Window"
-}
-item {
- name: "/m/03jm5"
- id: 20
- display_name: "House"
-}
-item {
- name: "/m/01bl7v"
- id: 21
- display_name: "Boy"
-}
-item {
- name: "/m/0463sg"
- id: 22
- display_name: "Fashion accessory"
-}
-item {
- name: "/m/04bcr3"
- id: 23
- display_name: "Table"
-}
-item {
- name: "/m/0jyfg"
- id: 24
- display_name: "Glasses"
-}
-item {
- name: "/m/01xyhv"
- id: 25
- display_name: "Suit"
-}
-item {
- name: "/m/08dz3q"
- id: 26
- display_name: "Auto part"
-}
-item {
- name: "/m/015p6"
- id: 27
- display_name: "Bird"
-}
-item {
- name: "/m/05y5lj"
- id: 28
- display_name: "Sports equipment"
-}
-item {
- name: "/m/01d40f"
- id: 29
- display_name: "Dress"
-}
-item {
- name: "/m/0bt9lr"
- id: 30
- display_name: "Dog"
-}
-item {
- name: "/m/01lrl"
- id: 31
- display_name: "Carnivore"
-}
-item {
- name: "/m/02p0tk3"
- id: 32
- display_name: "Human body"
-}
-item {
- name: "/m/0fly7"
- id: 33
- display_name: "Jeans"
-}
-item {
- name: "/m/04szw"
- id: 34
- display_name: "Musical instrument"
-}
-item {
- name: "/m/0271t"
- id: 35
- display_name: "Drink"
-}
-item {
- name: "/m/019jd"
- id: 36
- display_name: "Boat"
-}
-item {
- name: "/m/03q69"
- id: 37
- display_name: "Hair"
-}
-item {
- name: "/m/0h9mv"
- id: 38
- display_name: "Tire"
-}
-item {
- name: "/m/04hgtk"
- id: 39
- display_name: "Head"
-}
-item {
- name: "/m/01yrx"
- id: 40
- display_name: "Cat"
-}
-item {
- name: "/m/01rzcn"
- id: 41
- display_name: "Watercraft"
-}
-item {
- name: "/m/01mzpv"
- id: 42
- display_name: "Chair"
-}
-item {
- name: "/m/0199g"
- id: 43
- display_name: "Bike"
-}
-item {
- name: "/m/01fdzj"
- id: 44
- display_name: "Tower"
-}
-item {
- name: "/m/04rky"
- id: 45
- display_name: "Mammal"
-}
-item {
- name: "/m/079cl"
- id: 46
- display_name: "Skyscraper"
-}
-item {
- name: "/m/0dzf4"
- id: 47
- display_name: "Arm"
-}
-item {
- name: "/m/0138tl"
- id: 48
- display_name: "Toy"
-}
-item {
- name: "/m/06msq"
- id: 49
- display_name: "Sculpture"
-}
-item {
- name: "/m/03xxp"
- id: 50
- display_name: "Invertebrate"
-}
-item {
- name: "/m/0hg7b"
- id: 51
- display_name: "Microphone"
-}
-item {
- name: "/m/01n5jq"
- id: 52
- display_name: "Poster"
-}
-item {
- name: "/m/03vt0"
- id: 53
- display_name: "Insect"
-}
-item {
- name: "/m/0342h"
- id: 54
- display_name: "Guitar"
-}
-item {
- name: "/m/0k0pj"
- id: 55
- display_name: "Nose"
-}
-item {
- name: "/m/02dl1y"
- id: 56
- display_name: "Hat"
-}
-item {
- name: "/m/04brg2"
- id: 57
- display_name: "Tableware"
-}
-item {
- name: "/m/02dgv"
- id: 58
- display_name: "Door"
-}
-item {
- name: "/m/01bqk0"
- id: 59
- display_name: "Bicycle wheel"
-}
-item {
- name: "/m/017ftj"
- id: 60
- display_name: "Sunglasses"
-}
-item {
- name: "/m/052lwg6"
- id: 61
- display_name: "Baked goods"
-}
-item {
- name: "/m/014sv8"
- id: 62
- display_name: "Eye"
-}
-item {
- name: "/m/0270h"
- id: 63
- display_name: "Dessert"
-}
-item {
- name: "/m/0283dt1"
- id: 64
- display_name: "Mouth"
-}
-item {
- name: "/m/0k5j"
- id: 65
- display_name: "Aircraft"
-}
-item {
- name: "/m/0cmf2"
- id: 66
- display_name: "Airplane"
-}
-item {
- name: "/m/07jdr"
- id: 67
- display_name: "Train"
-}
-item {
- name: "/m/032b3c"
- id: 68
- display_name: "Jacket"
-}
-item {
- name: "/m/033rq4"
- id: 69
- display_name: "Street light"
-}
-item {
- name: "/m/0k65p"
- id: 70
- display_name: "Hand"
-}
-item {
- name: "/m/01ww8y"
- id: 71
- display_name: "Snack"
-}
-item {
- name: "/m/0zvk5"
- id: 72
- display_name: "Helmet"
-}
-item {
- name: "/m/07mhn"
- id: 73
- display_name: "Trousers"
-}
-item {
- name: "/m/04dr76w"
- id: 74
- display_name: "Bottle"
-}
-item {
- name: "/m/03fp41"
- id: 75
- display_name: "Houseplant"
-}
-item {
- name: "/m/03k3r"
- id: 76
- display_name: "Horse"
-}
-item {
- name: "/m/01y9k5"
- id: 77
- display_name: "Desk"
-}
-item {
- name: "/m/0cdl1"
- id: 78
- display_name: "Palm tree"
-}
-item {
- name: "/m/0f4s2w"
- id: 79
- display_name: "Vegetable"
-}
-item {
- name: "/m/02xwb"
- id: 80
- display_name: "Fruit"
-}
-item {
- name: "/m/035r7c"
- id: 81
- display_name: "Leg"
-}
-item {
- name: "/m/0bt_c3"
- id: 82
- display_name: "Book"
-}
-item {
- name: "/m/01_bhs"
- id: 83
- display_name: "Fast food"
-}
-item {
- name: "/m/01599"
- id: 84
- display_name: "Beer"
-}
-item {
- name: "/m/03120"
- id: 85
- display_name: "Flag"
-}
-item {
- name: "/m/026t6"
- id: 86
- display_name: "Drum"
-}
-item {
- name: "/m/01bjv"
- id: 87
- display_name: "Bus"
-}
-item {
- name: "/m/07r04"
- id: 88
- display_name: "Truck"
-}
-item {
- name: "/m/018xm"
- id: 89
- display_name: "Ball"
-}
-item {
- name: "/m/01rkbr"
- id: 90
- display_name: "Tie"
-}
-item {
- name: "/m/0fm3zh"
- id: 91
- display_name: "Flowerpot"
-}
-item {
- name: "/m/02_n6y"
- id: 92
- display_name: "Goggles"
-}
-item {
- name: "/m/04_sv"
- id: 93
- display_name: "Motorcycle"
-}
-item {
- name: "/m/06z37_"
- id: 94
- display_name: "Picture frame"
-}
-item {
- name: "/m/01bfm9"
- id: 95
- display_name: "Shorts"
-}
-item {
- name: "/m/0h8mhzd"
- id: 96
- display_name: "Sports uniform"
-}
-item {
- name: "/m/0d_2m"
- id: 97
- display_name: "Moths and butterflies"
-}
-item {
- name: "/m/0gjbg72"
- id: 98
- display_name: "Shelf"
-}
-item {
- name: "/m/01n4qj"
- id: 99
- display_name: "Shirt"
-}
-item {
- name: "/m/0ch_cf"
- id: 100
- display_name: "Fish"
-}
-item {
- name: "/m/06m11"
- id: 101
- display_name: "Rose"
-}
-item {
- name: "/m/01jfm_"
- id: 102
- display_name: "Licence plate"
-}
-item {
- name: "/m/02crq1"
- id: 103
- display_name: "Couch"
-}
-item {
- name: "/m/083kb"
- id: 104
- display_name: "Weapon"
-}
-item {
- name: "/m/01c648"
- id: 105
- display_name: "Laptop"
-}
-item {
- name: "/m/09tvcd"
- id: 106
- display_name: "Wine glass"
-}
-item {
- name: "/m/0h2r6"
- id: 107
- display_name: "Van"
-}
-item {
- name: "/m/081qc"
- id: 108
- display_name: "Wine"
-}
-item {
- name: "/m/09ddx"
- id: 109
- display_name: "Duck"
-}
-item {
- name: "/m/03p3bw"
- id: 110
- display_name: "Bicycle helmet"
-}
-item {
- name: "/m/0cyf8"
- id: 111
- display_name: "Butterfly"
-}
-item {
- name: "/m/0b_rs"
- id: 112
- display_name: "Swimming pool"
-}
-item {
- name: "/m/039xj_"
- id: 113
- display_name: "Ear"
-}
-item {
- name: "/m/021sj1"
- id: 114
- display_name: "Office"
-}
-item {
- name: "/m/0dv5r"
- id: 115
- display_name: "Camera"
-}
-item {
- name: "/m/01lynh"
- id: 116
- display_name: "Stairs"
-}
-item {
- name: "/m/06bt6"
- id: 117
- display_name: "Reptile"
-}
-item {
- name: "/m/01226z"
- id: 118
- display_name: "Football"
-}
-item {
- name: "/m/0fszt"
- id: 119
- display_name: "Cake"
-}
-item {
- name: "/m/050k8"
- id: 120
- display_name: "Mobile phone"
-}
-item {
- name: "/m/02wbtzl"
- id: 121
- display_name: "Sun hat"
-}
-item {
- name: "/m/02p5f1q"
- id: 122
- display_name: "Coffee cup"
-}
-item {
- name: "/m/025nd"
- id: 123
- display_name: "Christmas tree"
-}
-item {
- name: "/m/02522"
- id: 124
- display_name: "Computer monitor"
-}
-item {
- name: "/m/09ct_"
- id: 125
- display_name: "Helicopter"
-}
-item {
- name: "/m/0cvnqh"
- id: 126
- display_name: "Bench"
-}
-item {
- name: "/m/0d5gx"
- id: 127
- display_name: "Castle"
-}
-item {
- name: "/m/01xygc"
- id: 128
- display_name: "Coat"
-}
-item {
- name: "/m/04m6gz"
- id: 129
- display_name: "Porch"
-}
-item {
- name: "/m/01gkx_"
- id: 130
- display_name: "Swimwear"
-}
-item {
- name: "/m/01s105"
- id: 131
- display_name: "Cabinetry"
-}
-item {
- name: "/m/01j61q"
- id: 132
- display_name: "Tent"
-}
-item {
- name: "/m/0hnnb"
- id: 133
- display_name: "Umbrella"
-}
-item {
- name: "/m/01j51"
- id: 134
- display_name: "Balloon"
-}
-item {
- name: "/m/01knjb"
- id: 135
- display_name: "Billboard"
-}
-item {
- name: "/m/03__z0"
- id: 136
- display_name: "Bookcase"
-}
-item {
- name: "/m/01m2v"
- id: 137
- display_name: "Computer keyboard"
-}
-item {
- name: "/m/0167gd"
- id: 138
- display_name: "Doll"
-}
-item {
- name: "/m/0284d"
- id: 139
- display_name: "Dairy"
-}
-item {
- name: "/m/03ssj5"
- id: 140
- display_name: "Bed"
-}
-item {
- name: "/m/02fq_6"
- id: 141
- display_name: "Fedora"
-}
-item {
- name: "/m/06nwz"
- id: 142
- display_name: "Seafood"
-}
-item {
- name: "/m/0220r2"
- id: 143
- display_name: "Fountain"
-}
-item {
- name: "/m/01mqdt"
- id: 144
- display_name: "Traffic sign"
-}
-item {
- name: "/m/0268lbt"
- id: 145
- display_name: "Hiking equipment"
-}
-item {
- name: "/m/07c52"
- id: 146
- display_name: "Television"
-}
-item {
- name: "/m/0grw1"
- id: 147
- display_name: "Salad"
-}
-item {
- name: "/m/01h3n"
- id: 148
- display_name: "Bee"
-}
-item {
- name: "/m/078n6m"
- id: 149
- display_name: "Coffee table"
-}
-item {
- name: "/m/01xq0k1"
- id: 150
- display_name: "Cattle"
-}
-item {
- name: "/m/0gd2v"
- id: 151
- display_name: "Marine mammal"
-}
-item {
- name: "/m/0dbvp"
- id: 152
- display_name: "Goose"
-}
-item {
- name: "/m/03rszm"
- id: 153
- display_name: "Curtain"
-}
-item {
- name: "/m/0h8n5zk"
- id: 154
- display_name: "Kitchen & dining room table"
-}
-item {
- name: "/m/019dx1"
- id: 155
- display_name: "Home appliance"
-}
-item {
- name: "/m/03hl4l9"
- id: 156
- display_name: "Marine invertebrates"
-}
-item {
- name: "/m/0b3fp9"
- id: 157
- display_name: "Countertop"
-}
-item {
- name: "/m/02rdsp"
- id: 158
- display_name: "Office supplies"
-}
-item {
- name: "/m/0hf58v5"
- id: 159
- display_name: "Luggage and bags"
-}
-item {
- name: "/m/04h7h"
- id: 160
- display_name: "Lighthouse"
-}
-item {
- name: "/m/024g6"
- id: 161
- display_name: "Cocktail"
-}
-item {
- name: "/m/0cffdh"
- id: 162
- display_name: "Maple"
-}
-item {
- name: "/m/03q5c7"
- id: 163
- display_name: "Saucer"
-}
-item {
- name: "/m/014y4n"
- id: 164
- display_name: "Paddle"
-}
-item {
- name: "/m/01yx86"
- id: 165
- display_name: "Bronze sculpture"
-}
-item {
- name: "/m/020jm"
- id: 166
- display_name: "Beetle"
-}
-item {
- name: "/m/025dyy"
- id: 167
- display_name: "Box"
-}
-item {
- name: "/m/01llwg"
- id: 168
- display_name: "Necklace"
-}
-item {
- name: "/m/08pbxl"
- id: 169
- display_name: "Monkey"
-}
-item {
- name: "/m/02d9qx"
- id: 170
- display_name: "Whiteboard"
-}
-item {
- name: "/m/02pkr5"
- id: 171
- display_name: "Plumbing fixture"
-}
-item {
- name: "/m/0h99cwc"
- id: 172
- display_name: "Kitchen appliance"
-}
-item {
- name: "/m/050gv4"
- id: 173
- display_name: "Plate"
-}
-item {
- name: "/m/02vqfm"
- id: 174
- display_name: "Coffee"
-}
-item {
- name: "/m/09kx5"
- id: 175
- display_name: "Deer"
-}
-item {
- name: "/m/019w40"
- id: 176
- display_name: "Surfboard"
-}
-item {
- name: "/m/09dzg"
- id: 177
- display_name: "Turtle"
-}
-item {
- name: "/m/07k1x"
- id: 178
- display_name: "Tool"
-}
-item {
- name: "/m/080hkjn"
- id: 179
- display_name: "Handbag"
-}
-item {
- name: "/m/07qxg_"
- id: 180
- display_name: "Football helmet"
-}
-item {
- name: "/m/0ph39"
- id: 181
- display_name: "Canoe"
-}
-item {
- name: "/m/018p4k"
- id: 182
- display_name: "Cart"
-}
-item {
- name: "/m/02h19r"
- id: 183
- display_name: "Scarf"
-}
-item {
- name: "/m/015h_t"
- id: 184
- display_name: "Beard"
-}
-item {
- name: "/m/0fqfqc"
- id: 185
- display_name: "Drawer"
-}
-item {
- name: "/m/025rp__"
- id: 186
- display_name: "Cowboy hat"
-}
-item {
- name: "/m/01x3z"
- id: 187
- display_name: "Clock"
-}
-item {
- name: "/m/0crjs"
- id: 188
- display_name: "Convenience store"
-}
-item {
- name: "/m/0l515"
- id: 189
- display_name: "Sandwich"
-}
-item {
- name: "/m/015qff"
- id: 190
- display_name: "Traffic light"
-}
-item {
- name: "/m/09kmb"
- id: 191
- display_name: "Spider"
-}
-item {
- name: "/m/09728"
- id: 192
- display_name: "Bread"
-}
-item {
- name: "/m/071qp"
- id: 193
- display_name: "Squirrel"
-}
-item {
- name: "/m/02s195"
- id: 194
- display_name: "Vase"
-}
-item {
- name: "/m/06c54"
- id: 195
- display_name: "Rifle"
-}
-item {
- name: "/m/01xqw"
- id: 196
- display_name: "Cello"
-}
-item {
- name: "/m/05zsy"
- id: 197
- display_name: "Pumpkin"
-}
-item {
- name: "/m/0bwd_0j"
- id: 198
- display_name: "Elephant"
-}
-item {
- name: "/m/04m9y"
- id: 199
- display_name: "Lizard"
-}
-item {
- name: "/m/052sf"
- id: 200
- display_name: "Mushroom"
-}
-item {
- name: "/m/03grzl"
- id: 201
- display_name: "Baseball glove"
-}
-item {
- name: "/m/01z1kdw"
- id: 202
- display_name: "Juice"
-}
-item {
- name: "/m/02wv6h6"
- id: 203
- display_name: "Skirt"
-}
-item {
- name: "/m/016m2d"
- id: 204
- display_name: "Skull"
-}
-item {
- name: "/m/0dtln"
- id: 205
- display_name: "Lamp"
-}
-item {
- name: "/m/057cc"
- id: 206
- display_name: "Musical keyboard"
-}
-item {
- name: "/m/06k2mb"
- id: 207
- display_name: "High heels"
-}
-item {
- name: "/m/0f6wt"
- id: 208
- display_name: "Falcon"
-}
-item {
- name: "/m/0cxn2"
- id: 209
- display_name: "Ice cream"
-}
-item {
- name: "/m/02jvh9"
- id: 210
- display_name: "Mug"
-}
-item {
- name: "/m/0gjkl"
- id: 211
- display_name: "Watch"
-}
-item {
- name: "/m/01b638"
- id: 212
- display_name: "Boot"
-}
-item {
- name: "/m/071p9"
- id: 213
- display_name: "Ski"
-}
-item {
- name: "/m/0pg52"
- id: 214
- display_name: "Taxi"
-}
-item {
- name: "/m/0ftb8"
- id: 215
- display_name: "Sunflower"
-}
-item {
- name: "/m/0hnyx"
- id: 216
- display_name: "Pastry"
-}
-item {
- name: "/m/02jz0l"
- id: 217
- display_name: "Tap"
-}
-item {
- name: "/m/04kkgm"
- id: 218
- display_name: "Bowl"
-}
-item {
- name: "/m/0174n1"
- id: 219
- display_name: "Glove"
-}
-item {
- name: "/m/0gv1x"
- id: 220
- display_name: "Parrot"
-}
-item {
- name: "/m/09csl"
- id: 221
- display_name: "Eagle"
-}
-item {
- name: "/m/02jnhm"
- id: 222
- display_name: "Tin can"
-}
-item {
- name: "/m/099ssp"
- id: 223
- display_name: "Platter"
-}
-item {
- name: "/m/03nfch"
- id: 224
- display_name: "Sandal"
-}
-item {
- name: "/m/07y_7"
- id: 225
- display_name: "Violin"
-}
-item {
- name: "/m/05z6w"
- id: 226
- display_name: "Penguin"
-}
-item {
- name: "/m/03m3pdh"
- id: 227
- display_name: "Sofa bed"
-}
-item {
- name: "/m/09ld4"
- id: 228
- display_name: "Frog"
-}
-item {
- name: "/m/09b5t"
- id: 229
- display_name: "Chicken"
-}
-item {
- name: "/m/054xkw"
- id: 230
- display_name: "Lifejacket"
-}
-item {
- name: "/m/0130jx"
- id: 231
- display_name: "Sink"
-}
-item {
- name: "/m/07fbm7"
- id: 232
- display_name: "Strawberry"
-}
-item {
- name: "/m/01dws"
- id: 233
- display_name: "Bear"
-}
-item {
- name: "/m/01tcjp"
- id: 234
- display_name: "Muffin"
-}
-item {
- name: "/m/0dftk"
- id: 235
- display_name: "Swan"
-}
-item {
- name: "/m/0c06p"
- id: 236
- display_name: "Candle"
-}
-item {
- name: "/m/034c16"
- id: 237
- display_name: "Pillow"
-}
-item {
- name: "/m/09d5_"
- id: 238
- display_name: "Owl"
-}
-item {
- name: "/m/03hlz0c"
- id: 239
- display_name: "Kitchen utensil"
-}
-item {
- name: "/m/0ft9s"
- id: 240
- display_name: "Dragonfly"
-}
-item {
- name: "/m/011k07"
- id: 241
- display_name: "Tortoise"
-}
-item {
- name: "/m/054_l"
- id: 242
- display_name: "Mirror"
-}
-item {
- name: "/m/0jqgx"
- id: 243
- display_name: "Lily"
-}
-item {
- name: "/m/0663v"
- id: 244
- display_name: "Pizza"
-}
-item {
- name: "/m/0242l"
- id: 245
- display_name: "Coin"
-}
-item {
- name: "/m/014trl"
- id: 246
- display_name: "Cosmetics"
-}
-item {
- name: "/m/05r5c"
- id: 247
- display_name: "Piano"
-}
-item {
- name: "/m/07j87"
- id: 248
- display_name: "Tomato"
-}
-item {
- name: "/m/05kyg_"
- id: 249
- display_name: "Chest of drawers"
-}
-item {
- name: "/m/0kmg4"
- id: 250
- display_name: "Teddy bear"
-}
-item {
- name: "/m/07cmd"
- id: 251
- display_name: "Tank"
-}
-item {
- name: "/m/0dv77"
- id: 252
- display_name: "Squash"
-}
-item {
- name: "/m/096mb"
- id: 253
- display_name: "Lion"
-}
-item {
- name: "/m/01gmv2"
- id: 254
- display_name: "Brassiere"
-}
-item {
- name: "/m/07bgp"
- id: 255
- display_name: "Sheep"
-}
-item {
- name: "/m/0cmx8"
- id: 256
- display_name: "Spoon"
-}
-item {
- name: "/m/029tx"
- id: 257
- display_name: "Dinosaur"
-}
-item {
- name: "/m/073bxn"
- id: 258
- display_name: "Tripod"
-}
-item {
- name: "/m/0bh9flk"
- id: 259
- display_name: "Tablet computer"
-}
-item {
- name: "/m/06mf6"
- id: 260
- display_name: "Rabbit"
-}
-item {
- name: "/m/06_fw"
- id: 261
- display_name: "Skateboard"
-}
-item {
- name: "/m/078jl"
- id: 262
- display_name: "Snake"
-}
-item {
- name: "/m/0fbdv"
- id: 263
- display_name: "Shellfish"
-}
-item {
- name: "/m/0h23m"
- id: 264
- display_name: "Sparrow"
-}
-item {
- name: "/m/014j1m"
- id: 265
- display_name: "Apple"
-}
-item {
- name: "/m/03fwl"
- id: 266
- display_name: "Goat"
-}
-item {
- name: "/m/02y6n"
- id: 267
- display_name: "French fries"
-}
-item {
- name: "/m/06c7f7"
- id: 268
- display_name: "Lipstick"
-}
-item {
- name: "/m/026qbn5"
- id: 269
- display_name: "studio couch"
-}
-item {
- name: "/m/0cdn1"
- id: 270
- display_name: "Hamburger"
-}
-item {
- name: "/m/07clx"
- id: 271
- display_name: "Tea"
-}
-item {
- name: "/m/07cx4"
- id: 272
- display_name: "Telephone"
-}
-item {
- name: "/m/03g8mr"
- id: 273
- display_name: "Baseball bat"
-}
-item {
- name: "/m/0cnyhnx"
- id: 274
- display_name: "Bull"
-}
-item {
- name: "/m/01b7fy"
- id: 275
- display_name: "Headphones"
-}
-item {
- name: "/m/04gth"
- id: 276
- display_name: "Lavender"
-}
-item {
- name: "/m/0cyfs"
- id: 277
- display_name: "Parachute"
-}
-item {
- name: "/m/021mn"
- id: 278
- display_name: "Cookie"
-}
-item {
- name: "/m/07dm6"
- id: 279
- display_name: "Tiger"
-}
-item {
- name: "/m/0k1tl"
- id: 280
- display_name: "Pen"
-}
-item {
- name: "/m/0dv9c"
- id: 281
- display_name: "Racket"
-}
-item {
- name: "/m/0dt3t"
- id: 282
- display_name: "Fork"
-}
-item {
- name: "/m/04yqq2"
- id: 283
- display_name: "Bust"
-}
-item {
- name: "/m/01cmb2"
- id: 284
- display_name: "Miniskirt"
-}
-item {
- name: "/m/0gd36"
- id: 285
- display_name: "Sea lion"
-}
-item {
- name: "/m/033cnk"
- id: 286
- display_name: "Egg"
-}
-item {
- name: "/m/06ncr"
- id: 287
- display_name: "Saxophone"
-}
-item {
- name: "/m/03bk1"
- id: 288
- display_name: "Giraffe"
-}
-item {
- name: "/m/0bjyj5"
- id: 289
- display_name: "Waste container"
-}
-item {
- name: "/m/06__v"
- id: 290
- display_name: "Snowboard"
-}
-item {
- name: "/m/0qmmr"
- id: 291
- display_name: "Wheelchair"
-}
-item {
- name: "/m/01xgg_"
- id: 292
- display_name: "Medical equipment"
-}
-item {
- name: "/m/0czz2"
- id: 293
- display_name: "Antelope"
-}
-item {
- name: "/m/02l8p9"
- id: 294
- display_name: "Harbor seal"
-}
-item {
- name: "/m/09g1w"
- id: 295
- display_name: "Toilet"
-}
-item {
- name: "/m/0ll1f78"
- id: 296
- display_name: "Shrimp"
-}
-item {
- name: "/m/0cyhj_"
- id: 297
- display_name: "Orange"
-}
-item {
- name: "/m/0642b4"
- id: 298
- display_name: "Cupboard"
-}
-item {
- name: "/m/0h8mzrc"
- id: 299
- display_name: "Wall clock"
-}
-item {
- name: "/m/068zj"
- id: 300
- display_name: "Pig"
-}
-item {
- name: "/m/02z51p"
- id: 301
- display_name: "Nightstand"
-}
-item {
- name: "/m/0h8nr_l"
- id: 302
- display_name: "Bathroom accessory"
-}
-item {
- name: "/m/0388q"
- id: 303
- display_name: "Grape"
-}
-item {
- name: "/m/02hj4"
- id: 304
- display_name: "Dolphin"
-}
-item {
- name: "/m/01jfsr"
- id: 305
- display_name: "Lantern"
-}
-item {
- name: "/m/07gql"
- id: 306
- display_name: "Trumpet"
-}
-item {
- name: "/m/0h8my_4"
- id: 307
- display_name: "Tennis racket"
-}
-item {
- name: "/m/0n28_"
- id: 308
- display_name: "Crab"
-}
-item {
- name: "/m/0120dh"
- id: 309
- display_name: "Sea turtle"
-}
-item {
- name: "/m/020kz"
- id: 310
- display_name: "Cannon"
-}
-item {
- name: "/m/0mkg"
- id: 311
- display_name: "Accordion"
-}
-item {
- name: "/m/03c7gz"
- id: 312
- display_name: "Door handle"
-}
-item {
- name: "/m/09k_b"
- id: 313
- display_name: "Lemon"
-}
-item {
- name: "/m/031n1"
- id: 314
- display_name: "Foot"
-}
-item {
- name: "/m/04rmv"
- id: 315
- display_name: "Mouse"
-}
-item {
- name: "/m/084rd"
- id: 316
- display_name: "Wok"
-}
-item {
- name: "/m/02rgn06"
- id: 317
- display_name: "Volleyball"
-}
-item {
- name: "/m/05z55"
- id: 318
- display_name: "Pasta"
-}
-item {
- name: "/m/01r546"
- id: 319
- display_name: "Earrings"
-}
-item {
- name: "/m/09qck"
- id: 320
- display_name: "Banana"
-}
-item {
- name: "/m/012w5l"
- id: 321
- display_name: "Ladder"
-}
-item {
- name: "/m/01940j"
- id: 322
- display_name: "Backpack"
-}
-item {
- name: "/m/09f_2"
- id: 323
- display_name: "Crocodile"
-}
-item {
- name: "/m/02p3w7d"
- id: 324
- display_name: "Roller skates"
-}
-item {
- name: "/m/057p5t"
- id: 325
- display_name: "Scoreboard"
-}
-item {
- name: "/m/0d8zb"
- id: 326
- display_name: "Jellyfish"
-}
-item {
- name: "/m/01nq26"
- id: 327
- display_name: "Sock"
-}
-item {
- name: "/m/01x_v"
- id: 328
- display_name: "Camel"
-}
-item {
- name: "/m/05gqfk"
- id: 329
- display_name: "Plastic bag"
-}
-item {
- name: "/m/0cydv"
- id: 330
- display_name: "Caterpillar"
-}
-item {
- name: "/m/07030"
- id: 331
- display_name: "Sushi"
-}
-item {
- name: "/m/084zz"
- id: 332
- display_name: "Whale"
-}
-item {
- name: "/m/0c29q"
- id: 333
- display_name: "Leopard"
-}
-item {
- name: "/m/02zn6n"
- id: 334
- display_name: "Barrel"
-}
-item {
- name: "/m/03tw93"
- id: 335
- display_name: "Fireplace"
-}
-item {
- name: "/m/0fqt361"
- id: 336
- display_name: "Stool"
-}
-item {
- name: "/m/0f9_l"
- id: 337
- display_name: "Snail"
-}
-item {
- name: "/m/0gm28"
- id: 338
- display_name: "Candy"
-}
-item {
- name: "/m/09rvcxw"
- id: 339
- display_name: "Rocket"
-}
-item {
- name: "/m/01nkt"
- id: 340
- display_name: "Cheese"
-}
-item {
- name: "/m/04p0qw"
- id: 341
- display_name: "Billiard table"
-}
-item {
- name: "/m/03hj559"
- id: 342
- display_name: "Mixing bowl"
-}
-item {
- name: "/m/07pj7bq"
- id: 343
- display_name: "Bowling equipment"
-}
-item {
- name: "/m/04ctx"
- id: 344
- display_name: "Knife"
-}
-item {
- name: "/m/0703r8"
- id: 345
- display_name: "Loveseat"
-}
-item {
- name: "/m/03qrc"
- id: 346
- display_name: "Hamster"
-}
-item {
- name: "/m/020lf"
- id: 347
- display_name: "Mouse"
-}
-item {
- name: "/m/0by6g"
- id: 348
- display_name: "Shark"
-}
-item {
- name: "/m/01fh4r"
- id: 349
- display_name: "Teapot"
-}
-item {
- name: "/m/07c6l"
- id: 350
- display_name: "Trombone"
-}
-item {
- name: "/m/03bj1"
- id: 351
- display_name: "Panda"
-}
-item {
- name: "/m/0898b"
- id: 352
- display_name: "Zebra"
-}
-item {
- name: "/m/02x984l"
- id: 353
- display_name: "Mechanical fan"
-}
-item {
- name: "/m/0fj52s"
- id: 354
- display_name: "Carrot"
-}
-item {
- name: "/m/0cd4d"
- id: 355
- display_name: "Cheetah"
-}
-item {
- name: "/m/02068x"
- id: 356
- display_name: "Gondola"
-}
-item {
- name: "/m/01vbnl"
- id: 357
- display_name: "Bidet"
-}
-item {
- name: "/m/0449p"
- id: 358
- display_name: "Jaguar"
-}
-item {
- name: "/m/0gj37"
- id: 359
- display_name: "Ladybug"
-}
-item {
- name: "/m/0nl46"
- id: 360
- display_name: "Crown"
-}
-item {
- name: "/m/0152hh"
- id: 361
- display_name: "Snowman"
-}
-item {
- name: "/m/03dnzn"
- id: 362
- display_name: "Bathtub"
-}
-item {
- name: "/m/05_5p_0"
- id: 363
- display_name: "Table tennis racket"
-}
-item {
- name: "/m/02jfl0"
- id: 364
- display_name: "Sombrero"
-}
-item {
- name: "/m/01dxs"
- id: 365
- display_name: "Brown bear"
-}
-item {
- name: "/m/0cjq5"
- id: 366
- display_name: "Lobster"
-}
-item {
- name: "/m/040b_t"
- id: 367
- display_name: "Refrigerator"
-}
-item {
- name: "/m/0_cp5"
- id: 368
- display_name: "Oyster"
-}
-item {
- name: "/m/0gxl3"
- id: 369
- display_name: "Handgun"
-}
-item {
- name: "/m/029bxz"
- id: 370
- display_name: "Oven"
-}
-item {
- name: "/m/02zt3"
- id: 371
- display_name: "Kite"
-}
-item {
- name: "/m/03d443"
- id: 372
- display_name: "Rhinoceros"
-}
-item {
- name: "/m/0306r"
- id: 373
- display_name: "Fox"
-}
-item {
- name: "/m/0h8l4fh"
- id: 374
- display_name: "Light bulb"
-}
-item {
- name: "/m/0633h"
- id: 375
- display_name: "Polar bear"
-}
-item {
- name: "/m/01s55n"
- id: 376
- display_name: "Suitcase"
-}
-item {
- name: "/m/0hkxq"
- id: 377
- display_name: "Broccoli"
-}
-item {
- name: "/m/0cn6p"
- id: 378
- display_name: "Otter"
-}
-item {
- name: "/m/0dbzx"
- id: 379
- display_name: "Mule"
-}
-item {
- name: "/m/01dy8n"
- id: 380
- display_name: "Woodpecker"
-}
-item {
- name: "/m/01h8tj"
- id: 381
- display_name: "Starfish"
-}
-item {
- name: "/m/03s_tn"
- id: 382
- display_name: "Kettle"
-}
-item {
- name: "/m/01xs3r"
- id: 383
- display_name: "Jet ski"
-}
-item {
- name: "/m/031b6r"
- id: 384
- display_name: "Window blind"
-}
-item {
- name: "/m/06j2d"
- id: 385
- display_name: "Raven"
-}
-item {
- name: "/m/0hqkz"
- id: 386
- display_name: "Grapefruit"
-}
-item {
- name: "/m/01_5g"
- id: 387
- display_name: "Chopsticks"
-}
-item {
- name: "/m/02zvsm"
- id: 388
- display_name: "Tart"
-}
-item {
- name: "/m/0kpqd"
- id: 389
- display_name: "Watermelon"
-}
-item {
- name: "/m/015x4r"
- id: 390
- display_name: "Cucumber"
-}
-item {
- name: "/m/061hd_"
- id: 391
- display_name: "Infant bed"
-}
-item {
- name: "/m/04ylt"
- id: 392
- display_name: "Missile"
-}
-item {
- name: "/m/02wv84t"
- id: 393
- display_name: "Gas stove"
-}
-item {
- name: "/m/04y4h8h"
- id: 394
- display_name: "Bathroom cabinet"
-}
-item {
- name: "/m/01gllr"
- id: 395
- display_name: "Beehive"
-}
-item {
- name: "/m/0pcr"
- id: 396
- display_name: "Alpaca"
-}
-item {
- name: "/m/0jy4k"
- id: 397
- display_name: "Doughnut"
-}
-item {
- name: "/m/09f20"
- id: 398
- display_name: "Hippopotamus"
-}
-item {
- name: "/m/0mcx2"
- id: 399
- display_name: "Ipod"
-}
-item {
- name: "/m/04c0y"
- id: 400
- display_name: "Kangaroo"
-}
-item {
- name: "/m/0_k2"
- id: 401
- display_name: "Ant"
-}
-item {
- name: "/m/0jg57"
- id: 402
- display_name: "Bell pepper"
-}
-item {
- name: "/m/03fj2"
- id: 403
- display_name: "Goldfish"
-}
-item {
- name: "/m/03ldnb"
- id: 404
- display_name: "Ceiling fan"
-}
-item {
- name: "/m/06nrc"
- id: 405
- display_name: "Shotgun"
-}
-item {
- name: "/m/01btn"
- id: 406
- display_name: "Barge"
-}
-item {
- name: "/m/05vtc"
- id: 407
- display_name: "Potato"
-}
-item {
- name: "/m/08hvt4"
- id: 408
- display_name: "Jug"
-}
-item {
- name: "/m/0fx9l"
- id: 409
- display_name: "Microwave oven"
-}
-item {
- name: "/m/01h44"
- id: 410
- display_name: "Bat"
-}
-item {
- name: "/m/05n4y"
- id: 411
- display_name: "Ostrich"
-}
-item {
- name: "/m/0jly1"
- id: 412
- display_name: "Turkey"
-}
-item {
- name: "/m/06y5r"
- id: 413
- display_name: "Sword"
-}
-item {
- name: "/m/05ctyq"
- id: 414
- display_name: "Tennis ball"
-}
-item {
- name: "/m/0fp6w"
- id: 415
- display_name: "Pineapple"
-}
-item {
- name: "/m/0d4w1"
- id: 416
- display_name: "Closet"
-}
-item {
- name: "/m/02pv19"
- id: 417
- display_name: "Stop sign"
-}
-item {
- name: "/m/07crc"
- id: 418
- display_name: "Taco"
-}
-item {
- name: "/m/01dwwc"
- id: 419
- display_name: "Pancake"
-}
-item {
- name: "/m/01b9xk"
- id: 420
- display_name: "Hot dog"
-}
-item {
- name: "/m/013y1f"
- id: 421
- display_name: "Organ"
-}
-item {
- name: "/m/0m53l"
- id: 422
- display_name: "Rays and skates"
-}
-item {
- name: "/m/0174k2"
- id: 423
- display_name: "Washing machine"
-}
-item {
- name: "/m/01dwsz"
- id: 424
- display_name: "Waffle"
-}
-item {
- name: "/m/04vv5k"
- id: 425
- display_name: "Snowplow"
-}
-item {
- name: "/m/04cp_"
- id: 426
- display_name: "Koala"
-}
-item {
- name: "/m/0fz0h"
- id: 427
- display_name: "Honeycomb"
-}
-item {
- name: "/m/0llzx"
- id: 428
- display_name: "Sewing machine"
-}
-item {
- name: "/m/0319l"
- id: 429
- display_name: "Horn"
-}
-item {
- name: "/m/04v6l4"
- id: 430
- display_name: "Frying pan"
-}
-item {
- name: "/m/0dkzw"
- id: 431
- display_name: "Seat belt"
-}
-item {
- name: "/m/027pcv"
- id: 432
- display_name: "Zucchini"
-}
-item {
- name: "/m/0323sq"
- id: 433
- display_name: "Golf cart"
-}
-item {
- name: "/m/054fyh"
- id: 434
- display_name: "Pitcher"
-}
-item {
- name: "/m/01pns0"
- id: 435
- display_name: "Fire hydrant"
-}
-item {
- name: "/m/012n7d"
- id: 436
- display_name: "Ambulance"
-}
-item {
- name: "/m/044r5d"
- id: 437
- display_name: "Golf ball"
-}
-item {
- name: "/m/01krhy"
- id: 438
- display_name: "Tiara"
-}
-item {
- name: "/m/0dq75"
- id: 439
- display_name: "Raccoon"
-}
-item {
- name: "/m/0176mf"
- id: 440
- display_name: "Belt"
-}
-item {
- name: "/m/0h8lkj8"
- id: 441
- display_name: "Corded phone"
-}
-item {
- name: "/m/04tn4x"
- id: 442
- display_name: "Swim cap"
-}
-item {
- name: "/m/06l9r"
- id: 443
- display_name: "Red panda"
-}
-item {
- name: "/m/0cjs7"
- id: 444
- display_name: "Asparagus"
-}
-item {
- name: "/m/01lsmm"
- id: 445
- display_name: "Scissors"
-}
-item {
- name: "/m/01lcw4"
- id: 446
- display_name: "Limousine"
-}
-item {
- name: "/m/047j0r"
- id: 447
- display_name: "Filing cabinet"
-}
-item {
- name: "/m/01fb_0"
- id: 448
- display_name: "Bagel"
-}
-item {
- name: "/m/04169hn"
- id: 449
- display_name: "Wood-burning stove"
-}
-item {
- name: "/m/076bq"
- id: 450
- display_name: "Segway"
-}
-item {
- name: "/m/0hdln"
- id: 451
- display_name: "Ruler"
-}
-item {
- name: "/m/01g3x7"
- id: 452
- display_name: "Bow and arrow"
-}
-item {
- name: "/m/0l3ms"
- id: 453
- display_name: "Balance beam"
-}
-item {
- name: "/m/058qzx"
- id: 454
- display_name: "Kitchen knife"
-}
-item {
- name: "/m/0h8n6ft"
- id: 455
- display_name: "Cake stand"
-}
-item {
- name: "/m/018j2"
- id: 456
- display_name: "Banjo"
-}
-item {
- name: "/m/0l14j_"
- id: 457
- display_name: "Flute"
-}
-item {
- name: "/m/0wdt60w"
- id: 458
- display_name: "Rugby ball"
-}
-item {
- name: "/m/02gzp"
- id: 459
- display_name: "Dagger"
-}
-item {
- name: "/m/0h8n6f9"
- id: 460
- display_name: "Dog bed"
-}
-item {
- name: "/m/0fbw6"
- id: 461
- display_name: "Cabbage"
-}
-item {
- name: "/m/07kng9"
- id: 462
- display_name: "Picnic basket"
-}
-item {
- name: "/m/0dj6p"
- id: 463
- display_name: "Peach"
-}
-item {
- name: "/m/06pcq"
- id: 464
- display_name: "Submarine sandwich"
-}
-item {
- name: "/m/061_f"
- id: 465
- display_name: "Pear"
-}
-item {
- name: "/m/04g2r"
- id: 466
- display_name: "Lynx"
-}
-item {
- name: "/m/0jwn_"
- id: 467
- display_name: "Pomegranate"
-}
-item {
- name: "/m/02f9f_"
- id: 468
- display_name: "Shower"
-}
-item {
- name: "/m/01f8m5"
- id: 469
- display_name: "Blue jay"
-}
-item {
- name: "/m/01m4t"
- id: 470
- display_name: "Printer"
-}
-item {
- name: "/m/0cl4p"
- id: 471
- display_name: "Hedgehog"
-}
-item {
- name: "/m/07xyvk"
- id: 472
- display_name: "Coffeemaker"
-}
-item {
- name: "/m/084hf"
- id: 473
- display_name: "Worm"
-}
-item {
- name: "/m/03v5tg"
- id: 474
- display_name: "Drinking straw"
-}
-item {
- name: "/m/0qjjc"
- id: 475
- display_name: "Remote control"
-}
-item {
- name: "/m/015x5n"
- id: 476
- display_name: "Radish"
-}
-item {
- name: "/m/0ccs93"
- id: 477
- display_name: "Canary"
-}
-item {
- name: "/m/0nybt"
- id: 478
- display_name: "Seahorse"
-}
-item {
- name: "/m/02vkqh8"
- id: 479
- display_name: "Wardrobe"
-}
-item {
- name: "/m/09gtd"
- id: 480
- display_name: "Toilet paper"
-}
-item {
- name: "/m/019h78"
- id: 481
- display_name: "Centipede"
-}
-item {
- name: "/m/015wgc"
- id: 482
- display_name: "Croissant"
-}
-item {
- name: "/m/01x3jk"
- id: 483
- display_name: "Snowmobile"
-}
-item {
- name: "/m/01j3zr"
- id: 484
- display_name: "Burrito"
-}
-item {
- name: "/m/0c568"
- id: 485
- display_name: "Porcupine"
-}
-item {
- name: "/m/02pdsw"
- id: 486
- display_name: "Cutting board"
-}
-item {
- name: "/m/029b3"
- id: 487
- display_name: "Dice"
-}
-item {
- name: "/m/03q5t"
- id: 488
- display_name: "Harpsichord"
-}
-item {
- name: "/m/0p833"
- id: 489
- display_name: "Perfume"
-}
-item {
- name: "/m/01d380"
- id: 490
- display_name: "Drill"
-}
-item {
- name: "/m/024d2"
- id: 491
- display_name: "Calculator"
-}
-item {
- name: "/m/0mw_6"
- id: 492
- display_name: "Willow"
-}
-item {
- name: "/m/01f91_"
- id: 493
- display_name: "Pretzel"
-}
-item {
- name: "/m/02g30s"
- id: 494
- display_name: "Guacamole"
-}
-item {
- name: "/m/01hrv5"
- id: 495
- display_name: "Popcorn"
-}
-item {
- name: "/m/03m5k"
- id: 496
- display_name: "Harp"
-}
-item {
- name: "/m/0162_1"
- id: 497
- display_name: "Towel"
-}
-item {
- name: "/m/063rgb"
- id: 498
- display_name: "Mixer"
-}
-item {
- name: "/m/06_72j"
- id: 499
- display_name: "Digital clock"
-}
-item {
- name: "/m/046dlr"
- id: 500
- display_name: "Alarm clock"
-}
-item {
- name: "/m/047v4b"
- id: 501
- display_name: "Artichoke"
-}
-item {
- name: "/m/04zpv"
- id: 502
- display_name: "Milk"
-}
-item {
- name: "/m/043nyj"
- id: 503
- display_name: "Common fig"
-}
-item {
- name: "/m/03bbps"
- id: 504
- display_name: "Power plugs and sockets"
-}
-item {
- name: "/m/02w3r3"
- id: 505
- display_name: "Paper towel"
-}
-item {
- name: "/m/02pjr4"
- id: 506
- display_name: "Blender"
-}
-item {
- name: "/m/0755b"
- id: 507
- display_name: "Scorpion"
-}
-item {
- name: "/m/02lbcq"
- id: 508
- display_name: "Stretcher"
-}
-item {
- name: "/m/0fldg"
- id: 509
- display_name: "Mango"
-}
-item {
- name: "/m/012074"
- id: 510
- display_name: "Magpie"
-}
-item {
- name: "/m/035vxb"
- id: 511
- display_name: "Isopod"
-}
-item {
- name: "/m/02w3_ws"
- id: 512
- display_name: "Personal care"
-}
-item {
- name: "/m/0f6nr"
- id: 513
- display_name: "Unicycle"
-}
-item {
- name: "/m/0420v5"
- id: 514
- display_name: "Punching bag"
-}
-item {
- name: "/m/0frqm"
- id: 515
- display_name: "Envelope"
-}
-item {
- name: "/m/03txqz"
- id: 516
- display_name: "Scale"
-}
-item {
- name: "/m/0271qf7"
- id: 517
- display_name: "Wine rack"
-}
-item {
- name: "/m/074d1"
- id: 518
- display_name: "Submarine"
-}
-item {
- name: "/m/08p92x"
- id: 519
- display_name: "Cream"
-}
-item {
- name: "/m/01j4z9"
- id: 520
- display_name: "Chainsaw"
-}
-item {
- name: "/m/0kpt_"
- id: 521
- display_name: "Cantaloupe"
-}
-item {
- name: "/m/0h8n27j"
- id: 522
- display_name: "Serving tray"
-}
-item {
- name: "/m/03y6mg"
- id: 523
- display_name: "Food processor"
-}
-item {
- name: "/m/04h8sr"
- id: 524
- display_name: "Dumbbell"
-}
-item {
- name: "/m/065h6l"
- id: 525
- display_name: "Jacuzzi"
-}
-item {
- name: "/m/02tsc9"
- id: 526
- display_name: "Slow cooker"
-}
-item {
- name: "/m/012ysf"
- id: 527
- display_name: "Syringe"
-}
-item {
- name: "/m/0ky7b"
- id: 528
- display_name: "Dishwasher"
-}
-item {
- name: "/m/02wg_p"
- id: 529
- display_name: "Tree house"
-}
-item {
- name: "/m/0584n8"
- id: 530
- display_name: "Briefcase"
-}
-item {
- name: "/m/03kt2w"
- id: 531
- display_name: "Stationary bicycle"
-}
-item {
- name: "/m/05kms"
- id: 532
- display_name: "Oboe"
-}
-item {
- name: "/m/030610"
- id: 533
- display_name: "Treadmill"
-}
-item {
- name: "/m/0lt4_"
- id: 534
- display_name: "Binoculars"
-}
-item {
- name: "/m/076lb9"
- id: 535
- display_name: "Bench"
-}
-item {
- name: "/m/02ctlc"
- id: 536
- display_name: "Cricket ball"
-}
-item {
- name: "/m/02x8cch"
- id: 537
- display_name: "Salt and pepper shakers"
-}
-item {
- name: "/m/09gys"
- id: 538
- display_name: "Squid"
-}
-item {
- name: "/m/03jbxj"
- id: 539
- display_name: "Light switch"
-}
-item {
- name: "/m/012xff"
- id: 540
- display_name: "Toothbrush"
-}
-item {
- name: "/m/0h8kx63"
- id: 541
- display_name: "Spice rack"
-}
-item {
- name: "/m/073g6"
- id: 542
- display_name: "Stethoscope"
-}
-item {
- name: "/m/02cvgx"
- id: 543
- display_name: "Winter melon"
-}
-item {
- name: "/m/027rl48"
- id: 544
- display_name: "Ladle"
-}
-item {
- name: "/m/01kb5b"
- id: 545
- display_name: "Flashlight"
-}
diff --git a/object_detection/data/pascal_label_map.pbtxt b/object_detection/data/pascal_label_map.pbtxt
deleted file mode 100644
index c9e9e2af..00000000
--- a/object_detection/data/pascal_label_map.pbtxt
+++ /dev/null
@@ -1,99 +0,0 @@
-item {
- id: 1
- name: 'aeroplane'
-}
-
-item {
- id: 2
- name: 'bicycle'
-}
-
-item {
- id: 3
- name: 'bird'
-}
-
-item {
- id: 4
- name: 'boat'
-}
-
-item {
- id: 5
- name: 'bottle'
-}
-
-item {
- id: 6
- name: 'bus'
-}
-
-item {
- id: 7
- name: 'car'
-}
-
-item {
- id: 8
- name: 'cat'
-}
-
-item {
- id: 9
- name: 'chair'
-}
-
-item {
- id: 10
- name: 'cow'
-}
-
-item {
- id: 11
- name: 'diningtable'
-}
-
-item {
- id: 12
- name: 'dog'
-}
-
-item {
- id: 13
- name: 'horse'
-}
-
-item {
- id: 14
- name: 'motorbike'
-}
-
-item {
- id: 15
- name: 'person'
-}
-
-item {
- id: 16
- name: 'pottedplant'
-}
-
-item {
- id: 17
- name: 'sheep'
-}
-
-item {
- id: 18
- name: 'sofa'
-}
-
-item {
- id: 19
- name: 'train'
-}
-
-item {
- id: 20
- name: 'tvmonitor'
-}
diff --git a/object_detection/data/pet_label_map.pbtxt b/object_detection/data/pet_label_map.pbtxt
deleted file mode 100644
index 54d7d351..00000000
--- a/object_detection/data/pet_label_map.pbtxt
+++ /dev/null
@@ -1,184 +0,0 @@
-item {
- id: 1
- name: 'Abyssinian'
-}
-
-item {
- id: 2
- name: 'american_bulldog'
-}
-
-item {
- id: 3
- name: 'american_pit_bull_terrier'
-}
-
-item {
- id: 4
- name: 'basset_hound'
-}
-
-item {
- id: 5
- name: 'beagle'
-}
-
-item {
- id: 6
- name: 'Bengal'
-}
-
-item {
- id: 7
- name: 'Birman'
-}
-
-item {
- id: 8
- name: 'Bombay'
-}
-
-item {
- id: 9
- name: 'boxer'
-}
-
-item {
- id: 10
- name: 'British_Shorthair'
-}
-
-item {
- id: 11
- name: 'chihuahua'
-}
-
-item {
- id: 12
- name: 'Egyptian_Mau'
-}
-
-item {
- id: 13
- name: 'english_cocker_spaniel'
-}
-
-item {
- id: 14
- name: 'english_setter'
-}
-
-item {
- id: 15
- name: 'german_shorthaired'
-}
-
-item {
- id: 16
- name: 'great_pyrenees'
-}
-
-item {
- id: 17
- name: 'havanese'
-}
-
-item {
- id: 18
- name: 'japanese_chin'
-}
-
-item {
- id: 19
- name: 'keeshond'
-}
-
-item {
- id: 20
- name: 'leonberger'
-}
-
-item {
- id: 21
- name: 'Maine_Coon'
-}
-
-item {
- id: 22
- name: 'miniature_pinscher'
-}
-
-item {
- id: 23
- name: 'newfoundland'
-}
-
-item {
- id: 24
- name: 'Persian'
-}
-
-item {
- id: 25
- name: 'pomeranian'
-}
-
-item {
- id: 26
- name: 'pug'
-}
-
-item {
- id: 27
- name: 'Ragdoll'
-}
-
-item {
- id: 28
- name: 'Russian_Blue'
-}
-
-item {
- id: 29
- name: 'saint_bernard'
-}
-
-item {
- id: 30
- name: 'samoyed'
-}
-
-item {
- id: 31
- name: 'scottish_terrier'
-}
-
-item {
- id: 32
- name: 'shiba_inu'
-}
-
-item {
- id: 33
- name: 'Siamese'
-}
-
-item {
- id: 34
- name: 'Sphynx'
-}
-
-item {
- id: 35
- name: 'staffordshire_bull_terrier'
-}
-
-item {
- id: 36
- name: 'wheaten_terrier'
-}
-
-item {
- id: 37
- name: 'yorkshire_terrier'
-}
diff --git a/object_detection/data_decoders/BUILD b/object_detection/data_decoders/BUILD
deleted file mode 100644
index d6b48ac0..00000000
--- a/object_detection/data_decoders/BUILD
+++ /dev/null
@@ -1,29 +0,0 @@
-# Tensorflow Object Detection API: data decoders.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-# Apache 2.0
-
-py_library(
- name = "tf_example_decoder",
- srcs = ["tf_example_decoder.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:data_decoder",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
-
-py_test(
- name = "tf_example_decoder_test",
- srcs = ["tf_example_decoder_test.py"],
- deps = [
- ":tf_example_decoder",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
diff --git a/object_detection/data_decoders/__init__.py b/object_detection/data_decoders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc b/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index dd75c900..00000000
Binary files a/object_detection/data_decoders/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc b/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc
deleted file mode 100644
index 9e5bf93a..00000000
Binary files a/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/data_decoders/tf_example_decoder.py b/object_detection/data_decoders/tf_example_decoder.py
deleted file mode 100644
index 4dc3dc5c..00000000
--- a/object_detection/data_decoders/tf_example_decoder.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tensorflow Example proto decoder for object detection.
-
-A decoder to decode string tensors containing serialized tensorflow.Example
-protos for object detection.
-"""
-import tensorflow as tf
-
-from object_detection.core import data_decoder
-from object_detection.core import standard_fields as fields
-from object_detection.utils import label_map_util
-
-slim_example_decoder = tf.contrib.slim.tfexample_decoder
-
-
-class TfExampleDecoder(data_decoder.DataDecoder):
- """Tensorflow Example proto decoder."""
-
- def __init__(self,
- load_instance_masks=False,
- label_map_proto_file=None,
- use_display_name=False):
- """Constructor sets keys_to_features and items_to_handlers.
-
- Args:
- load_instance_masks: whether or not to load and handle instance masks.
- label_map_proto_file: a file path to a
- object_detection.protos.StringIntLabelMap proto. If provided, then the
- mapped IDs of 'image/object/class/text' will take precedence over the
- existing 'image/object/class/label' ID. Also, if provided, it is
- assumed that 'image/object/class/text' will be in the data.
- use_display_name: whether or not to use the `display_name` for label
- mapping (instead of `name`). Only used if label_map_proto_file is
- provided.
- """
- self.keys_to_features = {
- 'image/encoded':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/format':
- tf.FixedLenFeature((), tf.string, default_value='jpeg'),
- 'image/filename':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/key/sha256':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/source_id':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/height':
- tf.FixedLenFeature((), tf.int64, 1),
- 'image/width':
- tf.FixedLenFeature((), tf.int64, 1),
- # Object boxes and classes.
- 'image/object/bbox/xmin':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/xmax':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/ymin':
- tf.VarLenFeature(tf.float32),
- 'image/object/bbox/ymax':
- tf.VarLenFeature(tf.float32),
- 'image/object/class/label':
- tf.VarLenFeature(tf.int64),
- 'image/object/class/text':
- tf.VarLenFeature(tf.string),
- 'image/object/area':
- tf.VarLenFeature(tf.float32),
- 'image/object/is_crowd':
- tf.VarLenFeature(tf.int64),
- 'image/object/difficult':
- tf.VarLenFeature(tf.int64),
- 'image/object/group_of':
- tf.VarLenFeature(tf.int64),
- }
- self.items_to_handlers = {
- fields.InputDataFields.image: slim_example_decoder.Image(
- image_key='image/encoded', format_key='image/format', channels=3),
- fields.InputDataFields.source_id: (
- slim_example_decoder.Tensor('image/source_id')),
- fields.InputDataFields.key: (
- slim_example_decoder.Tensor('image/key/sha256')),
- fields.InputDataFields.filename: (
- slim_example_decoder.Tensor('image/filename')),
- # Object boxes and classes.
- fields.InputDataFields.groundtruth_boxes: (
- slim_example_decoder.BoundingBox(
- ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
- fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
- 'image/object/area'),
- fields.InputDataFields.groundtruth_is_crowd: (
- slim_example_decoder.Tensor('image/object/is_crowd')),
- fields.InputDataFields.groundtruth_difficult: (
- slim_example_decoder.Tensor('image/object/difficult')),
- fields.InputDataFields.groundtruth_group_of: (
- slim_example_decoder.Tensor('image/object/group_of'))
- }
- if load_instance_masks:
- self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.float32)
- self.items_to_handlers[
- fields.InputDataFields.groundtruth_instance_masks] = (
- slim_example_decoder.ItemHandlerCallback(
- ['image/object/mask', 'image/height', 'image/width'],
- self._reshape_instance_masks))
- # TODO: Add label_handler that decodes from 'image/object/class/text'
- # primarily after the recent tf.contrib.slim changes make into a release
- # supported by cloudml.
- label_handler = slim_example_decoder.Tensor('image/object/class/label')
- self.items_to_handlers[
- fields.InputDataFields.groundtruth_classes] = label_handler
-
- def decode(self, tf_example_string_tensor):
- """Decodes serialized tensorflow example and returns a tensor dictionary.
-
- Args:
- tf_example_string_tensor: a string tensor holding a serialized tensorflow
- example proto.
-
- Returns:
- A dictionary of the following tensors.
- fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
- containing image.
- fields.InputDataFields.source_id - string tensor containing original
- image id.
- fields.InputDataFields.key - string tensor with unique sha256 hash key.
- fields.InputDataFields.filename - string tensor with original dataset
- filename.
- fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
- [None, 4] containing box corners.
- fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
- [None] containing classes for the boxes.
- fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
- [None] containing containing object mask area in pixel squared.
- fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
- [None] indicating if the boxes enclose a crowd.
- Optional:
- fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
- [None] indicating if the boxes represent `difficult` instances.
- fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
- [None] indicating if the boxes represent `group_of` instances.
- fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
- shape [None, None, None] containing instance masks.
- """
- serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
- decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
- self.items_to_handlers)
- keys = decoder.list_items()
- tensors = decoder.decode(serialized_example, items=keys)
- tensor_dict = dict(zip(keys, tensors))
- is_crowd = fields.InputDataFields.groundtruth_is_crowd
- tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
- tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
- return tensor_dict
-
- def _reshape_instance_masks(self, keys_to_tensors):
- """Reshape instance segmentation masks.
-
- The instance segmentation masks are reshaped to [num_instances, height,
- width] and cast to boolean type to save memory.
-
- Args:
- keys_to_tensors: a dictionary from keys to tensors.
-
- Returns:
- A 3-D float tensor of shape [num_instances, height, width] with values
- in {0, 1}.
- """
- height = keys_to_tensors['image/height']
- width = keys_to_tensors['image/width']
- to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
- masks = keys_to_tensors['image/object/mask']
- if isinstance(masks, tf.SparseTensor):
- masks = tf.sparse_tensor_to_dense(masks)
- masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
- return tf.cast(masks, tf.float32)
diff --git a/object_detection/data_decoders/tf_example_decoder_test.py b/object_detection/data_decoders/tf_example_decoder_test.py
deleted file mode 100644
index 04d00531..00000000
--- a/object_detection/data_decoders/tf_example_decoder_test.py
+++ /dev/null
@@ -1,350 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.data_decoders.tf_example_decoder."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import standard_fields as fields
-from object_detection.data_decoders import tf_example_decoder
-
-
-class TfExampleDecoderTest(tf.test.TestCase):
-
- def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- elif encoding_type == 'png':
- image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_encoded
-
- def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
- elif encoding_type == 'png':
- image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_decoded
-
- def _Int64Feature(self, value):
- return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
- def _FloatFeature(self, value):
- return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
- def _BytesFeature(self, value):
- if isinstance(value, list):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
- def testDecodeJpegImage(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- decoded_jpeg = self._DecodeImage(encoded_jpeg)
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/source_id': self._BytesFeature('image_id'),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
- get_shape().as_list()), [None, None, 3])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
- self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
-
- def testDecodeImageKeyAndFilename(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/key/sha256': self._BytesFeature('abc'),
- 'image/filename': self._BytesFeature('filename')
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
- self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
-
- def testDecodePngImage(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
- decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_png),
- 'image/format': self._BytesFeature('png'),
- 'image/source_id': self._BytesFeature('image_id')
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
- get_shape().as_list()), [None, None, 3])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
- self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
-
- def testDecodeBoundingBox(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_ymins = [0.0, 4.0]
- bbox_xmins = [1.0, 5.0]
- bbox_ymaxs = [2.0, 6.0]
- bbox_xmaxs = [3.0, 7.0]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
- 'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
- 'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
- 'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
- get_shape().as_list()), [None, 4])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
- bbox_ymaxs, bbox_xmaxs]).transpose()
- self.assertAllEqual(expected_boxes,
- tensor_dict[fields.InputDataFields.groundtruth_boxes])
-
- def testDecodeObjectLabel(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- bbox_classes = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/class/label': self._Int64Feature(bbox_classes),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
- [None])
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(bbox_classes,
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testDecodeObjectArea(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_area = [100., 174.]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/area': self._FloatFeature(object_area),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
- get_shape().as_list()), [None])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(object_area,
- tensor_dict[fields.InputDataFields.groundtruth_area])
-
- def testDecodeObjectIsCrowd(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_is_crowd = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/is_crowd': self._Int64Feature(object_is_crowd),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
- [None])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([bool(item) for item in object_is_crowd],
- tensor_dict[
- fields.InputDataFields.groundtruth_is_crowd])
-
- def testDecodeObjectDifficult(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_difficult = [0, 1]
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/difficult': self._Int64Feature(object_difficult),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
- [None])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual([bool(item) for item in object_difficult],
- tensor_dict[
- fields.InputDataFields.groundtruth_difficult])
-
- def testDecodeObjectGroupOf(self):
- image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- object_group_of = [0, 1]
- example = tf.train.Example(features=tf.train.Features(
- feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/object/group_of': self._Int64Feature(object_group_of),
- })).SerializeToString()
-
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((tensor_dict[
- fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
- [None])
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- [bool(item) for item in object_group_of],
- tensor_dict[fields.InputDataFields.groundtruth_group_of])
-
- def testDecodeInstanceSegmentation(self):
- num_instances = 4
- image_height = 5
- image_width = 3
-
- # Randomly generate image.
- image_tensor = np.random.randint(255, size=(image_height,
- image_width,
- 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
-
- # Randomly generate instance segmentation masks.
- instance_masks = (
- np.random.randint(2, size=(num_instances,
- image_height,
- image_width)).astype(np.float32))
- instance_masks_flattened = np.reshape(instance_masks, [-1])
-
- # Randomly generate class labels for each instance.
- object_classes = np.random.randint(
- 100, size=(num_instances)).astype(np.int64)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/height': self._Int64Feature([image_height]),
- 'image/width': self._Int64Feature([image_width]),
- 'image/object/mask': self._FloatFeature(instance_masks_flattened),
- 'image/object/class/label': self._Int64Feature(
- object_classes)})).SerializeToString()
- example_decoder = tf_example_decoder.TfExampleDecoder(
- load_instance_masks=True)
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-
- self.assertAllEqual((
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
- get_shape().as_list()), [None, None, None])
-
- self.assertAllEqual((
- tensor_dict[fields.InputDataFields.groundtruth_classes].
- get_shape().as_list()), [None])
-
- with self.test_session() as sess:
- tensor_dict = sess.run(tensor_dict)
-
- self.assertAllEqual(
- instance_masks.astype(np.float32),
- tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
- self.assertAllEqual(
- object_classes,
- tensor_dict[fields.InputDataFields.groundtruth_classes])
-
- def testInstancesNotAvailableByDefault(self):
- num_instances = 4
- image_height = 5
- image_width = 3
- # Randomly generate image.
- image_tensor = np.random.randint(255, size=(image_height,
- image_width,
- 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
-
- # Randomly generate instance segmentation masks.
- instance_masks = (
- np.random.randint(2, size=(num_instances,
- image_height,
- image_width)).astype(np.float32))
- instance_masks_flattened = np.reshape(instance_masks, [-1])
-
- # Randomly generate class labels for each instance.
- object_classes = np.random.randint(
- 100, size=(num_instances)).astype(np.int64)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': self._BytesFeature(encoded_jpeg),
- 'image/format': self._BytesFeature('jpeg'),
- 'image/height': self._Int64Feature([image_height]),
- 'image/width': self._Int64Feature([image_width]),
- 'image/object/mask': self._FloatFeature(instance_masks_flattened),
- 'image/object/class/label': self._Int64Feature(
- object_classes)})).SerializeToString()
- example_decoder = tf_example_decoder.TfExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
- self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
- not in tensor_dict)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/dataset_tools/BUILD b/object_detection/dataset_tools/BUILD
deleted file mode 100644
index bb5ce2e5..00000000
--- a/object_detection/dataset_tools/BUILD
+++ /dev/null
@@ -1,107 +0,0 @@
-# Tensorflow Object Detection API: main runnables.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_binary(
- name = "create_kitti_tf_record",
- srcs = [
- "create_kitti_tf_record.py",
- ],
- deps = [
- "//third_party/py/PIL:pil",
- "//third_party/py/lxml",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:dataset_util",
- "//tensorflow_models/object_detection/utils:label_map_util",
- "//tensorflow_models/object_detection/utils:np_box_ops",
- ],
-)
-
-py_test(
- name = "create_kitti_tf_record_test",
- srcs = [
- "create_kitti_tf_record_test.py",
- ],
- deps = [
- ":create_kitti_tf_record",
- "//tensorflow",
- ],
-)
-
-py_binary(
- name = "create_pascal_tf_record",
- srcs = [
- "create_pascal_tf_record.py",
- ],
- deps = [
- "//third_party/py/PIL:pil",
- "//third_party/py/lxml",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:dataset_util",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
-
-py_test(
- name = "create_pascal_tf_record_test",
- srcs = [
- "create_pascal_tf_record_test.py",
- ],
- deps = [
- ":create_pascal_tf_record",
- "//tensorflow",
- ],
-)
-
-py_binary(
- name = "create_pet_tf_record",
- srcs = [
- "create_pet_tf_record.py",
- ],
- deps = [
- "//third_party/py/PIL:pil",
- "//third_party/py/lxml",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:dataset_util",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
-
-py_library(
- name = "oid_tfrecord_creation",
- srcs = ["oid_tfrecord_creation.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:dataset_util",
- ],
-)
-
-py_test(
- name = "oid_tfrecord_creation_test",
- srcs = ["oid_tfrecord_creation_test.py"],
- deps = [
- ":oid_tfrecord_creation",
- "//third_party/py/contextlib2",
- "//third_party/py/pandas",
- "//third_party/py/tensorflow",
- ],
-)
-
-py_binary(
- name = "create_oid_tf_record",
- srcs = ["create_oid_tf_record.py"],
- deps = [
- ":oid_tfrecord_creation",
- "//third_party/py/contextlib2",
- "//third_party/py/pandas",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
diff --git a/object_detection/dataset_tools/create_kitti_tf_record.py b/object_detection/dataset_tools/create_kitti_tf_record.py
deleted file mode 100644
index 2bf2ff34..00000000
--- a/object_detection/dataset_tools/create_kitti_tf_record.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert raw KITTI detection dataset to TFRecord for object_detection.
-
-Converts KITTI detection dataset to TFRecords with a standard format allowing
- to use this dataset to train object detectors. The raw dataset can be
- downloaded from:
- http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip.
- http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip
- Permission can be requested at the main website.
-
- KITTI detection dataset contains 7481 training images. Using this code with
- the default settings will set aside the first 500 images as a validation set.
- This can be altered using the flags, see details below.
-
-Example usage:
- python object_detection/dataset_tools/create_kitti_tf_record.py \
- --data_dir=/home/user/kitti \
- --output_path=/home/user/kitti.record
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import hashlib
-import io
-import os
-
-import numpy as np
-import PIL.Image as pil
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-from object_detection.utils.np_box_ops import iou
-
-tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the '
- 'data. Folder structure is assumed to be:'
- '/training/label_2 (annotations) and'
- '/data_object_image_2/training/image_2'
- '(images).')
-tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files'
- 'will be written. The TFRecord with the training set'
- 'will be located at: _train.tfrecord.'
- 'And the TFRecord with the validation set will be'
- 'located at: _val.tfrecord')
-tf.app.flags.DEFINE_list('classes_to_use', ['car', 'pedestrian', 'dontcare'],
- 'Which classes of bounding boxes to use. Adding the'
- 'dontcare class will remove all bboxs in the dontcare'
- 'regions.')
-tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt',
- 'Path to label map proto.')
-tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to'
- 'be used as a validation set.')
-FLAGS = tf.app.flags.FLAGS
-
-
-def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
- label_map_path, validation_set_size):
- """Convert the KITTI detection dataset to TFRecords.
-
- Args:
- data_dir: The full path to the unzipped folder containing the unzipped data
- from data_object_image_2 and data_object_label_2.zip.
- Folder structure is assumed to be: data_dir/training/label_2 (annotations)
- and data_dir/data_object_image_2/training/image_2 (images).
- output_path: The path to which TFRecord files will be written. The TFRecord
- with the training set will be located at: _train.tfrecord
- And the TFRecord with the validation set will be located at:
- _val.tfrecord
- classes_to_use: List of strings naming the classes for which data should be
- converted. Use the same names as presented in the KIITI README file.
- Adding dontcare class will remove all other bounding boxes that overlap
- with areas marked as dontcare regions.
- label_map_path: Path to label map proto
- validation_set_size: How many images should be left as the validation set.
- (Ffirst `validation_set_size` examples are selected to be in the
- validation set).
- """
- label_map_dict = label_map_util.get_label_map_dict(label_map_path)
- train_count = 0
- val_count = 0
-
- annotation_dir = os.path.join(data_dir,
- 'training',
- 'label_2')
-
- image_dir = os.path.join(data_dir,
- 'data_object_image_2',
- 'training',
- 'image_2')
-
- train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%
- output_path)
- val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%
- output_path)
-
- images = sorted(tf.gfile.ListDirectory(image_dir))
- for img_name in images:
- img_num = int(img_name.split('.')[0])
- is_validation_img = img_num < validation_set_size
- img_anno = read_annotation_file(os.path.join(annotation_dir,
- str(img_num).zfill(6)+'.txt'))
-
- image_path = os.path.join(image_dir, img_name)
-
- # Filter all bounding boxes of this frame that are of a legal class, and
- # don't overlap with a dontcare region.
- # TODO(talremez) filter out targets that are truncated or heavily occluded.
- annotation_for_image = filter_annotations(img_anno, classes_to_use)
-
- example = prepare_example(image_path, annotation_for_image, label_map_dict)
- if is_validation_img:
- val_writer.write(example.SerializeToString())
- val_count += 1
- else:
- train_writer.write(example.SerializeToString())
- train_count += 1
-
- train_writer.close()
- val_writer.close()
-
-
-def prepare_example(image_path, annotations, label_map_dict):
- """Converts a dictionary with annotations for an image to tf.Example proto.
-
- Args:
- image_path: The complete path to image.
- annotations: A dictionary representing the annotation of a single object
- that appears in the image.
- label_map_dict: A map from string label names to integer ids.
-
- Returns:
- example: The converted tf.Example.
- """
- with tf.gfile.GFile(image_path, 'rb') as fid:
- encoded_png = fid.read()
- encoded_png_io = io.BytesIO(encoded_png)
- image = pil.open(encoded_png_io)
- image = np.asarray(image)
-
- key = hashlib.sha256(encoded_png).hexdigest()
-
- width = int(image.shape[1])
- height = int(image.shape[0])
-
- xmin_norm = annotations['2d_bbox_left'] / float(width)
- ymin_norm = annotations['2d_bbox_top'] / float(height)
- xmax_norm = annotations['2d_bbox_right'] / float(width)
- ymax_norm = annotations['2d_bbox_bottom'] / float(height)
-
- difficult_obj = [0]*len(xmin_norm)
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_png),
- 'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
- 'image/object/class/text': dataset_util.bytes_list_feature(
- [x.encode('utf8') for x in annotations['type']]),
- 'image/object/class/label': dataset_util.int64_list_feature(
- [label_map_dict[x] for x in annotations['type']]),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.float_list_feature(
- annotations['truncated']),
- 'image/object/alpha': dataset_util.float_list_feature(
- annotations['alpha']),
- 'image/object/3d_bbox/height': dataset_util.float_list_feature(
- annotations['3d_bbox_height']),
- 'image/object/3d_bbox/width': dataset_util.float_list_feature(
- annotations['3d_bbox_width']),
- 'image/object/3d_bbox/length': dataset_util.float_list_feature(
- annotations['3d_bbox_length']),
- 'image/object/3d_bbox/x': dataset_util.float_list_feature(
- annotations['3d_bbox_x']),
- 'image/object/3d_bbox/y': dataset_util.float_list_feature(
- annotations['3d_bbox_y']),
- 'image/object/3d_bbox/z': dataset_util.float_list_feature(
- annotations['3d_bbox_z']),
- 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
- annotations['3d_bbox_rot_y']),
- }))
-
- return example
-
-
-def filter_annotations(img_all_annotations, used_classes):
- """Filters out annotations from the unused classes and dontcare regions.
-
- Filters out the annotations that belong to classes we do now wish to use and
- (optionally) also removes all boxes that overlap with dontcare regions.
-
- Args:
- img_all_annotations: A list of annotation dictionaries. See documentation of
- read_annotation_file for more details about the format of the annotations.
- used_classes: A list of strings listing the classes we want to keep, if the
- list contains "dontcare", all bounding boxes with overlapping with dont
- care regions will also be filtered out.
-
- Returns:
- img_filtered_annotations: A list of annotation dictionaries that have passed
- the filtering.
- """
-
- img_filtered_annotations = {}
-
- # Filter the type of the objects.
- relevant_annotation_indices = [
- i for i, x in enumerate(img_all_annotations['type']) if x in used_classes
- ]
-
- for key in img_all_annotations.keys():
- img_filtered_annotations[key] = (
- img_all_annotations[key][relevant_annotation_indices])
-
- if 'dontcare' in used_classes:
- dont_care_indices = [i for i,
- x in enumerate(img_filtered_annotations['type'])
- if x == 'dontcare']
-
- # bounding box format [y_min, x_min, y_max, x_max]
- all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'],
- img_filtered_annotations['2d_bbox_left'],
- img_filtered_annotations['2d_bbox_bottom'],
- img_filtered_annotations['2d_bbox_right']],
- axis=1)
-
- ious = iou(boxes1=all_boxes,
- boxes2=all_boxes[dont_care_indices])
-
- # Remove all bounding boxes that overlap with a dontcare region.
- if ious.size > 0:
- boxes_to_remove = np.amax(ious, axis=1) > 0.0
- for key in img_all_annotations.keys():
- img_filtered_annotations[key] = (
- img_filtered_annotations[key][np.logical_not(boxes_to_remove)])
-
- return img_filtered_annotations
-
-
-def read_annotation_file(filename):
- """Reads a KITTI annotation file.
-
- Converts a KITTI annotation file into a dictionary containing all the
- relevant information.
-
- Args:
- filename: the path to the annotataion text file.
-
- Returns:
- anno: A dictionary with the converted annotation information. See annotation
- README file for details on the different fields.
- """
- with open(filename) as f:
- content = f.readlines()
- content = [x.strip().split(' ') for x in content]
-
- anno = {}
- anno['type'] = np.array([x[0].lower() for x in content])
- anno['truncated'] = np.array([float(x[1]) for x in content])
- anno['occluded'] = np.array([int(x[2]) for x in content])
- anno['alpha'] = np.array([float(x[3]) for x in content])
-
- anno['2d_bbox_left'] = np.array([float(x[4]) for x in content])
- anno['2d_bbox_top'] = np.array([float(x[5]) for x in content])
- anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
- anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content])
-
- anno['3d_bbox_height'] = np.array([float(x[8]) for x in content])
- anno['3d_bbox_width'] = np.array([float(x[9]) for x in content])
- anno['3d_bbox_length'] = np.array([float(x[10]) for x in content])
- anno['3d_bbox_x'] = np.array([float(x[11]) for x in content])
- anno['3d_bbox_y'] = np.array([float(x[12]) for x in content])
- anno['3d_bbox_z'] = np.array([float(x[13]) for x in content])
- anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content])
-
- return anno
-
-
-def main(_):
- convert_kitti_to_tfrecords(
- data_dir=FLAGS.data_dir,
- output_path=FLAGS.output_path,
- classes_to_use=FLAGS.classes_to_use,
- label_map_path=FLAGS.label_map_path,
- validation_set_size=FLAGS.validation_set_size)
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/dataset_tools/create_kitti_tf_record_test.py b/object_detection/dataset_tools/create_kitti_tf_record_test.py
deleted file mode 100644
index 22f27f1a..00000000
--- a/object_detection/dataset_tools/create_kitti_tf_record_test.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Test for create_kitti_tf_record.py."""
-
-import os
-
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import create_kitti_tf_record
-
-
-class DictToTFExampleTest(tf.test.TestCase):
-
- def _assertProtoEqual(self, proto_field, expectation):
- """Helper function to assert if a proto field equals some value.
-
- Args:
- proto_field: The protobuf field to compare.
- expectation: The expected value of the protobuf field.
- """
- proto_list = [p for p in proto_field]
- self.assertListEqual(proto_list, expectation)
-
- def test_dict_to_tf_example(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(256, 256, 3)
- save_path = os.path.join(self.get_temp_dir(), image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- annotations = {}
- annotations['2d_bbox_left'] = np.array([64])
- annotations['2d_bbox_top'] = np.array([64])
- annotations['2d_bbox_right'] = np.array([192])
- annotations['2d_bbox_bottom'] = np.array([192])
- annotations['type'] = ['car']
- annotations['truncated'] = np.array([1])
- annotations['alpha'] = np.array([2])
- annotations['3d_bbox_height'] = np.array([10])
- annotations['3d_bbox_width'] = np.array([11])
- annotations['3d_bbox_length'] = np.array([12])
- annotations['3d_bbox_x'] = np.array([13])
- annotations['3d_bbox_y'] = np.array([14])
- annotations['3d_bbox_z'] = np.array([15])
- annotations['3d_bbox_rot_y'] = np.array([4])
-
- label_map_dict = {
- 'background': 0,
- 'car': 1,
- }
-
- example = create_kitti_tf_record.prepare_example(
- save_path,
- annotations,
- label_map_dict)
-
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [save_path])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [save_path])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['png'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/class/text'].bytes_list.value,
- ['car'])
- self._assertProtoEqual(
- example.features.feature['image/object/class/label'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/truncated'].float_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/alpha'].float_list.value,
- [2])
- self._assertProtoEqual(example.features.feature[
- 'image/object/3d_bbox/height'].float_list.value, [10])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/width'].float_list.value,
- [11])
- self._assertProtoEqual(example.features.feature[
- 'image/object/3d_bbox/length'].float_list.value, [12])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/x'].float_list.value,
- [13])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/y'].float_list.value,
- [14])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/z'].float_list.value,
- [15])
- self._assertProtoEqual(
- example.features.feature['image/object/3d_bbox/rot_y'].float_list.value,
- [4])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/dataset_tools/create_oid_tf_record.py b/object_detection/dataset_tools/create_oid_tf_record.py
deleted file mode 100644
index f58efee2..00000000
--- a/object_detection/dataset_tools/create_oid_tf_record.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Creates TFRecords of Open Images dataset for object detection.
-
-Example usage:
- python object_detection/dataset_tools/create_oid_tf_record.py \
- --input_annotations_csv=/path/to/input/annotations-human-bbox.csv \
- --input_images_directory=/path/to/input/image_pixels_directory \
- --input_label_map=/path/to/input/labels_bbox_545.labelmap \
- --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
-
-CSVs with bounding box annotations and image metadata (including the image URLs)
-can be downloaded from the Open Images GitHub repository:
-https://github.com/openimages/dataset
-
-This script will include every image found in the input_images_directory in the
-output TFRecord, even if the image has no corresponding bounding box annotations
-in the input_annotations_csv.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import contextlib2
-import pandas as pd
-import tensorflow as tf
-
-from object_detection.dataset_tools import oid_tfrecord_creation
-from object_detection.utils import label_map_util
-
-tf.flags.DEFINE_string('input_annotations_csv', None,
- 'Path to CSV containing image bounding box annotations')
-tf.flags.DEFINE_string('input_images_directory', None,
- 'Directory containing the image pixels '
- 'downloaded from the OpenImages GitHub repository.')
-tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
-tf.flags.DEFINE_string(
- 'output_tf_record_path_prefix', None,
- 'Path to the output TFRecord. The shard index and the number of shards '
- 'will be appended for each output shard.')
-tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards')
-
-FLAGS = tf.flags.FLAGS
-
-
-def main(_):
- tf.logging.set_verbosity(tf.logging.INFO)
-
- required_flags = [
- 'input_annotations_csv', 'input_images_directory', 'input_label_map',
- 'output_tf_record_path_prefix'
- ]
- for flag_name in required_flags:
- if not getattr(FLAGS, flag_name):
- raise ValueError('Flag --{} is required'.format(flag_name))
-
- label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
- all_annotations = pd.read_csv(FLAGS.input_annotations_csv)
- all_images = tf.gfile.Glob(
- os.path.join(FLAGS.input_images_directory, '*.jpg'))
- all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
- all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
- all_annotations = pd.concat([all_annotations, all_image_ids])
-
- tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
-
- with contextlib2.ExitStack() as tf_record_close_stack:
- output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
- tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
- FLAGS.num_shards)
-
- for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
- counter)
-
- image_id, image_annotations = image_data
- # In OID image file names are formed by appending ".jpg" to the image ID.
- image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
- with tf.gfile.Open(image_path) as image_file:
- encoded_image = image_file.read()
-
- tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
- image_annotations, label_map, encoded_image)
- if tf_example:
- shard_idx = long(image_id, 16) % FLAGS.num_shards
- output_tfrecords[shard_idx].write(tf_example.SerializeToString())
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/dataset_tools/create_pascal_tf_record.py b/object_detection/dataset_tools/create_pascal_tf_record.py
deleted file mode 100644
index 83d2b128..00000000
--- a/object_detection/dataset_tools/create_pascal_tf_record.py
+++ /dev/null
@@ -1,184 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert raw PASCAL dataset to TFRecord for object_detection.
-
-Example usage:
- python object_detection/dataset_tools/create_pascal_tf_record.py \
- --data_dir=/home/user/VOCdevkit \
- --year=VOC2012 \
- --output_path=/home/user/pascal.record
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import hashlib
-import io
-import logging
-import os
-
-from lxml import etree
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-
-flags = tf.app.flags
-flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
-flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
- 'merged set.')
-flags.DEFINE_string('annotations_dir', 'Annotations',
- '(Relative) path to annotations directory.')
-flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
-flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
-flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
- 'Path to label map proto')
-flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
- 'difficult instances')
-FLAGS = flags.FLAGS
-
-SETS = ['train', 'val', 'trainval', 'test']
-YEARS = ['VOC2007', 'VOC2012', 'merged']
-
-
-def dict_to_tf_example(data,
- dataset_directory,
- label_map_dict,
- ignore_difficult_instances=False,
- image_subdirectory='JPEGImages'):
- """Convert XML derived dict to tf.Example proto.
-
- Notice that this function normalizes the bounding box coordinates provided
- by the raw data.
-
- Args:
- data: dict holding PASCAL XML fields for a single image (obtained by
- running dataset_util.recursive_parse_xml_to_dict)
- dataset_directory: Path to root directory holding PASCAL dataset
- label_map_dict: A map from string label names to integers ids.
- ignore_difficult_instances: Whether to skip difficult instances in the
- dataset (default: False).
- image_subdirectory: String specifying subdirectory within the
- PASCAL dataset directory holding the actual image data.
-
- Returns:
- example: The converted tf.Example.
-
- Raises:
- ValueError: if the image pointed to by data['filename'] is not a valid JPEG
- """
- img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
- full_path = os.path.join(dataset_directory, img_path)
- with tf.gfile.GFile(full_path, 'rb') as fid:
- encoded_jpg = fid.read()
- encoded_jpg_io = io.BytesIO(encoded_jpg)
- image = PIL.Image.open(encoded_jpg_io)
- if image.format != 'JPEG':
- raise ValueError('Image format not JPEG')
- key = hashlib.sha256(encoded_jpg).hexdigest()
-
- width = int(data['size']['width'])
- height = int(data['size']['height'])
-
- xmin = []
- ymin = []
- xmax = []
- ymax = []
- classes = []
- classes_text = []
- truncated = []
- poses = []
- difficult_obj = []
- for obj in data['object']:
- difficult = bool(int(obj['difficult']))
- if ignore_difficult_instances and difficult:
- continue
-
- difficult_obj.append(int(difficult))
-
- xmin.append(float(obj['bndbox']['xmin']) / width)
- ymin.append(float(obj['bndbox']['ymin']) / height)
- xmax.append(float(obj['bndbox']['xmax']) / width)
- ymax.append(float(obj['bndbox']['ymax']) / height)
- classes_text.append(obj['name'].encode('utf8'))
- classes.append(label_map_dict[obj['name']])
- truncated.append(int(obj['truncated']))
- poses.append(obj['pose'].encode('utf8'))
-
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
- 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.int64_list_feature(truncated),
- 'image/object/view': dataset_util.bytes_list_feature(poses),
- }))
- return example
-
-
-def main(_):
- if FLAGS.set not in SETS:
- raise ValueError('set must be in : {}'.format(SETS))
- if FLAGS.year not in YEARS:
- raise ValueError('year must be in : {}'.format(YEARS))
-
- data_dir = FLAGS.data_dir
- years = ['VOC2007', 'VOC2012']
- if FLAGS.year != 'merged':
- years = [FLAGS.year]
-
- writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
-
- label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
-
- for year in years:
- logging.info('Reading from PASCAL %s dataset.', year)
- examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
- 'aeroplane_' + FLAGS.set + '.txt')
- annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
- examples_list = dataset_util.read_examples_list(examples_path)
- for idx, example in enumerate(examples_list):
- if idx % 100 == 0:
- logging.info('On image %d of %d', idx, len(examples_list))
- path = os.path.join(annotations_dir, example + '.xml')
- with tf.gfile.GFile(path, 'r') as fid:
- xml_str = fid.read()
- xml = etree.fromstring(xml_str)
- data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
-
- tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
- FLAGS.ignore_difficult_instances)
- writer.write(tf_example.SerializeToString())
-
- writer.close()
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/dataset_tools/create_pascal_tf_record_test.py b/object_detection/dataset_tools/create_pascal_tf_record_test.py
deleted file mode 100644
index a1c31fac..00000000
--- a/object_detection/dataset_tools/create_pascal_tf_record_test.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Test for create_pascal_tf_record.py."""
-
-import os
-
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.dataset_tools import create_pascal_tf_record
-
-
-class DictToTFExampleTest(tf.test.TestCase):
-
- def _assertProtoEqual(self, proto_field, expectation):
- """Helper function to assert if a proto field equals some value.
-
- Args:
- proto_field: The protobuf field to compare.
- expectation: The expected value of the protobuf field.
- """
- proto_list = [p for p in proto_field]
- self.assertListEqual(proto_list, expectation)
-
- def test_dict_to_tf_example(self):
- image_file_name = 'tmp_image.jpg'
- image_data = np.random.rand(256, 256, 3)
- save_path = os.path.join(self.get_temp_dir(), image_file_name)
- image = PIL.Image.fromarray(image_data, 'RGB')
- image.save(save_path)
-
- data = {
- 'folder': '',
- 'filename': image_file_name,
- 'size': {
- 'height': 256,
- 'width': 256,
- },
- 'object': [
- {
- 'difficult': 1,
- 'bndbox': {
- 'xmin': 64,
- 'ymin': 64,
- 'xmax': 192,
- 'ymax': 192,
- },
- 'name': 'person',
- 'truncated': 0,
- 'pose': '',
- },
- ],
- }
-
- label_map_dict = {
- 'background': 0,
- 'person': 1,
- 'notperson': 2,
- }
-
- example = create_pascal_tf_record.dict_to_tf_example(
- data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
- self._assertProtoEqual(
- example.features.feature['image/height'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/width'].int64_list.value, [256])
- self._assertProtoEqual(
- example.features.feature['image/filename'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/source_id'].bytes_list.value,
- [image_file_name])
- self._assertProtoEqual(
- example.features.feature['image/format'].bytes_list.value, ['jpeg'])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymin'].float_list.value,
- [0.25])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/xmax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/bbox/ymax'].float_list.value,
- [0.75])
- self._assertProtoEqual(
- example.features.feature['image/object/class/text'].bytes_list.value,
- ['person'])
- self._assertProtoEqual(
- example.features.feature['image/object/class/label'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/difficult'].int64_list.value,
- [1])
- self._assertProtoEqual(
- example.features.feature['image/object/truncated'].int64_list.value,
- [0])
- self._assertProtoEqual(
- example.features.feature['image/object/view'].bytes_list.value, [''])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/dataset_tools/create_pet_tf_record.py b/object_detection/dataset_tools/create_pet_tf_record.py
deleted file mode 100644
index a8663297..00000000
--- a/object_detection/dataset_tools/create_pet_tf_record.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Convert the Oxford pet dataset to TFRecord for object_detection.
-
-See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
- Cats and Dogs
- IEEE Conference on Computer Vision and Pattern Recognition, 2012
- http://www.robots.ox.ac.uk/~vgg/data/pets/
-
-Example usage:
- python object_detection/dataset_tools/create_pet_tf_record.py \
- --data_dir=/home/user/pet \
- --output_dir=/home/user/pet/output
-"""
-
-import hashlib
-import io
-import logging
-import os
-import random
-import re
-
-from lxml import etree
-import numpy as np
-import PIL.Image
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-from object_detection.utils import label_map_util
-
-flags = tf.app.flags
-flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
-flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
-flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
- 'Path to label map proto')
-flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
- 'for pet faces. Otherwise generates bounding boxes (as '
- 'well as segmentations for full pet bodies). Note that '
- 'in the latter case, the resulting files are much larger.')
-FLAGS = flags.FLAGS
-
-
-def get_class_name_from_filename(file_name):
- """Gets the class name from a file.
-
- Args:
- file_name: The file name to get the class name from.
- ie. "american_pit_bull_terrier_105.jpg"
-
- Returns:
- A string of the class name.
- """
- match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
- return match.groups()[0]
-
-
-def dict_to_tf_example(data,
- mask_path,
- label_map_dict,
- image_subdirectory,
- ignore_difficult_instances=False,
- faces_only=True):
- """Convert XML derived dict to tf.Example proto.
-
- Notice that this function normalizes the bounding box coordinates provided
- by the raw data.
-
- Args:
- data: dict holding PASCAL XML fields for a single image (obtained by
- running dataset_util.recursive_parse_xml_to_dict)
- mask_path: String path to PNG encoded mask.
- label_map_dict: A map from string label names to integers ids.
- image_subdirectory: String specifying subdirectory within the
- Pascal dataset directory holding the actual image data.
- ignore_difficult_instances: Whether to skip difficult instances in the
- dataset (default: False).
- faces_only: If True, generates bounding boxes for pet faces. Otherwise
- generates bounding boxes (as well as segmentations for full pet bodies).
-
- Returns:
- example: The converted tf.Example.
-
- Raises:
- ValueError: if the image pointed to by data['filename'] is not a valid JPEG
- """
- img_path = os.path.join(image_subdirectory, data['filename'])
- with tf.gfile.GFile(img_path, 'rb') as fid:
- encoded_jpg = fid.read()
- encoded_jpg_io = io.BytesIO(encoded_jpg)
- image = PIL.Image.open(encoded_jpg_io)
- if image.format != 'JPEG':
- raise ValueError('Image format not JPEG')
- key = hashlib.sha256(encoded_jpg).hexdigest()
-
- with tf.gfile.GFile(mask_path, 'rb') as fid:
- encoded_mask_png = fid.read()
- encoded_png_io = io.BytesIO(encoded_mask_png)
- mask = PIL.Image.open(encoded_png_io)
- if mask.format != 'PNG':
- raise ValueError('Mask format not PNG')
-
- mask_np = np.asarray(mask)
- nonbackground_indices_x = np.any(mask_np != 2, axis=0)
- nonbackground_indices_y = np.any(mask_np != 2, axis=1)
- nonzero_x_indices = np.where(nonbackground_indices_x)
- nonzero_y_indices = np.where(nonbackground_indices_y)
-
- width = int(data['size']['width'])
- height = int(data['size']['height'])
-
- xmins = []
- ymins = []
- xmaxs = []
- ymaxs = []
- classes = []
- classes_text = []
- truncated = []
- poses = []
- difficult_obj = []
- masks = []
- for obj in data['object']:
- difficult = bool(int(obj['difficult']))
- if ignore_difficult_instances and difficult:
- continue
- difficult_obj.append(int(difficult))
-
- if faces_only:
- xmin = float(obj['bndbox']['xmin'])
- xmax = float(obj['bndbox']['xmax'])
- ymin = float(obj['bndbox']['ymin'])
- ymax = float(obj['bndbox']['ymax'])
- else:
- xmin = float(np.min(nonzero_x_indices))
- xmax = float(np.max(nonzero_x_indices))
- ymin = float(np.min(nonzero_y_indices))
- ymax = float(np.max(nonzero_y_indices))
-
- xmins.append(xmin / width)
- ymins.append(ymin / height)
- xmaxs.append(xmax / width)
- ymaxs.append(ymax / height)
- class_name = get_class_name_from_filename(data['filename'])
- classes_text.append(class_name.encode('utf8'))
- classes.append(label_map_dict[class_name])
- truncated.append(int(obj['truncated']))
- poses.append(obj['pose'].encode('utf8'))
- if not faces_only:
- mask_remapped = mask_np != 2
- masks.append(mask_remapped)
-
- feature_dict = {
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/source_id': dataset_util.bytes_feature(
- data['filename'].encode('utf8')),
- 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
- 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
- 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
- 'image/object/truncated': dataset_util.int64_list_feature(truncated),
- 'image/object/view': dataset_util.bytes_list_feature(poses),
- }
- if not faces_only:
- mask_stack = np.stack(masks).astype(np.float32)
- masks_flattened = np.reshape(mask_stack, [-1])
- feature_dict['image/object/mask'] = (
- dataset_util.float_list_feature(masks_flattened.tolist()))
-
- example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
- return example
-
-
-def create_tf_record(output_filename,
- label_map_dict,
- annotations_dir,
- image_dir,
- examples,
- faces_only=True):
- """Creates a TFRecord file from examples.
-
- Args:
- output_filename: Path to where output file is saved.
- label_map_dict: The label map dictionary.
- annotations_dir: Directory where annotation files are stored.
- image_dir: Directory where image files are stored.
- examples: Examples to parse and save to tf record.
- faces_only: If True, generates bounding boxes for pet faces. Otherwise
- generates bounding boxes (as well as segmentations for full pet bodies).
- """
- writer = tf.python_io.TFRecordWriter(output_filename)
- for idx, example in enumerate(examples):
- if idx % 100 == 0:
- logging.info('On image %d of %d', idx, len(examples))
- xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
- mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')
-
- if not os.path.exists(xml_path):
- logging.warning('Could not find %s, ignoring example.', xml_path)
- continue
- with tf.gfile.GFile(xml_path, 'r') as fid:
- xml_str = fid.read()
- xml = etree.fromstring(xml_str)
- data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
-
- try:
- tf_example = dict_to_tf_example(
- data, mask_path, label_map_dict, image_dir, faces_only=faces_only)
- writer.write(tf_example.SerializeToString())
- except ValueError:
- logging.warning('Invalid example: %s, ignoring.', xml_path)
-
- writer.close()
-
-
-# TODO(derekjchow): Add test for pet/PASCAL main files.
-def main(_):
- data_dir = FLAGS.data_dir
- label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
-
- logging.info('Reading from Pet dataset.')
- image_dir = os.path.join(data_dir, 'images')
- annotations_dir = os.path.join(data_dir, 'annotations')
- examples_path = os.path.join(annotations_dir, 'trainval.txt')
- examples_list = dataset_util.read_examples_list(examples_path)
-
- # Test images are not included in the downloaded data set, so we shall perform
- # our own split.
- random.seed(42)
- random.shuffle(examples_list)
- num_examples = len(examples_list)
- num_train = int(0.7 * num_examples)
- train_examples = examples_list[:num_train]
- val_examples = examples_list[num_train:]
- logging.info('%d training and %d validation examples.',
- len(train_examples), len(val_examples))
-
- train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
- val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
- if FLAGS.faces_only:
- train_output_path = os.path.join(FLAGS.output_dir,
- 'pet_train_with_masks.record')
- val_output_path = os.path.join(FLAGS.output_dir,
- 'pet_val_with_masks.record')
- create_tf_record(train_output_path, label_map_dict, annotations_dir,
- image_dir, train_examples, faces_only=FLAGS.faces_only)
- create_tf_record(val_output_path, label_map_dict, annotations_dir,
- image_dir, val_examples, faces_only=FLAGS.faces_only)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/dataset_tools/oid_tfrecord_creation.py b/object_detection/dataset_tools/oid_tfrecord_creation.py
deleted file mode 100644
index 1bc41c0b..00000000
--- a/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from object_detection.core import standard_fields
-from object_detection.utils import dataset_util
-
-
-def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
- encoded_image):
- """Populates a TF Example message with image annotations from a data frame.
-
- Args:
- annotations_data_frame: Data frame containing the annotations for a single
- image.
- label_map: String to integer label map.
- encoded_image: The encoded image string
-
- Returns:
- The populated TF Example, if the label of at least one object is present in
- label_map. Otherwise, returns None.
- """
-
- filtered_data_frame = annotations_data_frame[
- annotations_data_frame.LabelName.isin(label_map)]
-
- image_id = annotations_data_frame.ImageID.iloc[0]
-
- feature_map = {
- standard_fields.TfExampleFields.object_bbox_ymin:
- dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
- standard_fields.TfExampleFields.object_bbox_xmin:
- dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
- standard_fields.TfExampleFields.object_bbox_ymax:
- dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
- standard_fields.TfExampleFields.object_bbox_xmax:
- dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
- standard_fields.TfExampleFields.object_class_text:
- dataset_util.bytes_list_feature(
- filtered_data_frame.LabelName.as_matrix()),
- standard_fields.TfExampleFields.object_class_label:
- dataset_util.int64_list_feature(
- filtered_data_frame.LabelName.map(lambda x: label_map[x])
- .as_matrix()),
- standard_fields.TfExampleFields.filename:
- dataset_util.bytes_feature('{}.jpg'.format(image_id)),
- standard_fields.TfExampleFields.source_id:
- dataset_util.bytes_feature(image_id),
- standard_fields.TfExampleFields.image_encoded:
- dataset_util.bytes_feature(encoded_image),
- }
-
- if 'IsGroupOf' in filtered_data_frame.columns:
- feature_map[standard_fields.TfExampleFields.
- object_group_of] = dataset_util.int64_list_feature(
- filtered_data_frame.IsGroupOf.as_matrix().astype(int))
- if 'IsOccluded' in filtered_data_frame.columns:
- feature_map[standard_fields.TfExampleFields.
- object_occluded] = dataset_util.int64_list_feature(
- filtered_data_frame.IsOccluded.as_matrix().astype(int))
- if 'IsTruncated' in filtered_data_frame.columns:
- feature_map[standard_fields.TfExampleFields.
- object_truncated] = dataset_util.int64_list_feature(
- filtered_data_frame.IsTruncated.as_matrix().astype(int))
- if 'IsDepiction' in filtered_data_frame.columns:
- feature_map[standard_fields.TfExampleFields.
- object_depiction] = dataset_util.int64_list_feature(
- filtered_data_frame.IsDepiction.as_matrix().astype(int))
-
- return tf.train.Example(features=tf.train.Features(feature=feature_map))
-
-
-def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
- """Opens all TFRecord shards for writing and adds them to an exit stack.
-
- Args:
- exit_stack: A context2.ExitStack used to automatically closed the TFRecords
- opened in this function.
- base_path: The base path for all shards
- num_shards: The number of shards
-
- Returns:
- The list of opened TFRecords. Position k in the list corresponds to shard k.
- """
- tf_record_output_filenames = [
- '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
- for idx in xrange(num_shards)
- ]
-
- tfrecords = [
- exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
- for file_name in tf_record_output_filenames
- ]
-
- return tfrecords
diff --git a/object_detection/dataset_tools/oid_tfrecord_creation_test.py b/object_detection/dataset_tools/oid_tfrecord_creation_test.py
deleted file mode 100644
index 383af8a8..00000000
--- a/object_detection/dataset_tools/oid_tfrecord_creation_test.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for oid_tfrecord_creation.py."""
-
-import os
-import contextlib2
-import pandas as pd
-import tensorflow as tf
-
-from object_detection.dataset_tools import oid_tfrecord_creation
-
-
-def create_test_data():
- data = {
- 'ImageID': ['i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
- 'LabelName': ['a', 'a', 'b', 'b', 'b', 'c'],
- 'YMin': [0.3, 0.6, 0.8, 0.1, 0.0, 0.0],
- 'XMin': [0.1, 0.3, 0.7, 0.0, 0.1, 0.1],
- 'XMax': [0.2, 0.3, 0.8, 0.5, 0.9, 0.9],
- 'YMax': [0.3, 0.6, 1, 0.8, 0.8, 0.8],
- 'IsOccluded': [0, 1, 1, 0, 0, 0],
- 'IsTruncated': [0, 0, 0, 1, 0, 0],
- 'IsGroupOf': [0, 0, 0, 0, 0, 1],
- 'IsDepiction': [1, 0, 0, 0, 0, 0],
- }
- df = pd.DataFrame(data=data)
- label_map = {'a': 0, 'b': 1, 'c': 2}
- return label_map, df
-
-
-class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
-
- def test_simple(self):
- label_map, df = create_test_data()
-
- tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
- df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
- self.assertProtoEquals("""
- features {
- feature {
- key: "image/encoded"
- value { bytes_list { value: "encoded_image_test" } } }
- feature {
- key: "image/filename"
- value { bytes_list { value: "i1.jpg" } } }
- feature {
- key: "image/object/bbox/ymin"
- value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } }
- feature {
- key: "image/object/bbox/xmin"
- value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } }
- feature {
- key: "image/object/bbox/ymax"
- value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } }
- feature {
- key: "image/object/bbox/xmax"
- value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } }
- feature {
- key: "image/object/class/label"
- value { int64_list { value: [0, 0, 1, 1] } } }
- feature {
- key: "image/object/class/text"
- value { bytes_list { value: ["a", "a", "b", "b"] } } }
- feature {
- key: "image/source_id"
- value { bytes_list { value: "i1" } } }
- feature {
- key: "image/object/depiction"
- value { int64_list { value: [1, 0, 0, 0] } } }
- feature {
- key: "image/object/group_of"
- value { int64_list { value: [0, 0, 0, 0] } } }
- feature {
- key: "image/object/occluded"
- value { int64_list { value: [0, 1, 1, 0] } } }
- feature {
- key: "image/object/truncated"
- value { int64_list { value: [0, 0, 0, 1] } } } }
- """, tf_example)
-
- def test_no_attributes(self):
- label_map, df = create_test_data()
-
- del df['IsDepiction']
- del df['IsGroupOf']
- del df['IsOccluded']
- del df['IsTruncated']
-
- tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
- df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
- self.assertProtoEquals("""
- features {
- feature {
- key: "image/encoded"
- value { bytes_list { value: "encoded_image_test" } } }
- feature {
- key: "image/filename"
- value { bytes_list { value: "i2.jpg" } } }
- feature {
- key: "image/object/bbox/ymin"
- value { float_list { value: [0.0, 0.0] } } }
- feature {
- key: "image/object/bbox/xmin"
- value { float_list { value: [0.1, 0.1] } } }
- feature {
- key: "image/object/bbox/ymax"
- value { float_list { value: [0.8, 0.8] } } }
- feature {
- key: "image/object/bbox/xmax"
- value { float_list { value: [0.9, 0.9] } } }
- feature {
- key: "image/object/class/label"
- value { int64_list { value: [1, 2] } } }
- feature {
- key: "image/object/class/text"
- value { bytes_list { value: ["b", "c"] } } }
- feature {
- key: "image/source_id"
- value { bytes_list { value: "i2" } } } }
- """, tf_example)
-
- def test_label_filtering(self):
- label_map, df = create_test_data()
-
- label_map = {'a': 0}
-
- tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
- df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
- self.assertProtoEquals("""
- features {
- feature {
- key: "image/encoded"
- value { bytes_list { value: "encoded_image_test" } } }
- feature {
- key: "image/filename"
- value { bytes_list { value: "i1.jpg" } } }
- feature {
- key: "image/object/bbox/ymin"
- value { float_list { value: [0.3, 0.6] } } }
- feature {
- key: "image/object/bbox/xmin"
- value { float_list { value: [0.1, 0.3] } } }
- feature {
- key: "image/object/bbox/ymax"
- value { float_list { value: [0.3, 0.6] } } }
- feature {
- key: "image/object/bbox/xmax"
- value { float_list { value: [0.2, 0.3] } } }
- feature {
- key: "image/object/class/label"
- value { int64_list { value: [0, 0] } } }
- feature {
- key: "image/object/class/text"
- value { bytes_list { value: ["a", "a"] } } }
- feature {
- key: "image/source_id"
- value { bytes_list { value: "i1" } } }
- feature {
- key: "image/object/depiction"
- value { int64_list { value: [1, 0] } } }
- feature {
- key: "image/object/group_of"
- value { int64_list { value: [0, 0] } } }
- feature {
- key: "image/object/occluded"
- value { int64_list { value: [0, 1] } } }
- feature {
- key: "image/object/truncated"
- value { int64_list { value: [0, 0] } } } }
- """, tf_example)
-
-
-class OpenOutputTfrecordsTests(tf.test.TestCase):
-
- def test_sharded_tfrecord_writes(self):
- with contextlib2.ExitStack() as tf_record_close_stack:
- output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
- tf_record_close_stack,
- os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
- for idx in range(10):
- output_tfrecords[idx].write('test_{}'.format(idx))
-
- for idx in range(10):
- tf_record_path = '{}-{:05d}-of-00010'.format(
- os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
- records = list(tf.python_io.tf_record_iterator(tf_record_path))
- self.assertAllEqual(records, ['test_{}'.format(idx)])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/eval.py b/object_detection/eval.py
deleted file mode 100644
index 175ac1ee..00000000
--- a/object_detection/eval.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Evaluation executable for detection models.
-
-This executable is used to evaluate DetectionModels. There are two ways of
-configuring the eval job.
-
-1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
-In this mode, the --eval_training_data flag may be given to force the pipeline
-to evaluate on training data instead.
-
-Example usage:
- ./eval \
- --logtostderr \
- --checkpoint_dir=path/to/checkpoint_dir \
- --eval_dir=path/to/eval_dir \
- --pipeline_config_path=pipeline_config.pbtxt
-
-2) Three configuration files may be provided: a model_pb2.DetectionModel
-configuration file to define what type of DetectionModel is being evaluated, an
-input_reader_pb2.InputReader file to specify what data the model is evaluating
-and an eval_pb2.EvalConfig file to configure evaluation parameters.
-
-Example usage:
- ./eval \
- --logtostderr \
- --checkpoint_dir=path/to/checkpoint_dir \
- --eval_dir=path/to/eval_dir \
- --eval_config_path=eval_config.pbtxt \
- --model_config_path=model_config.pbtxt \
- --input_config_path=eval_input_config.pbtxt
-"""
-import functools
-import os
-import tensorflow as tf
-
-from object_detection import evaluator
-from object_detection.builders import input_reader_builder
-from object_detection.builders import model_builder
-from object_detection.utils import config_util
-from object_detection.utils import label_map_util
-
-
-tf.logging.set_verbosity(tf.logging.INFO)
-
-flags = tf.app.flags
-flags.DEFINE_boolean('eval_training_data', False,
- 'If training data should be evaluated for this job.')
-flags.DEFINE_string('checkpoint_dir', '',
- 'Directory containing checkpoints to evaluate, typically '
- 'set to `train_dir` used in the training job.')
-flags.DEFINE_string('eval_dir', '',
- 'Directory to write eval summaries to.')
-flags.DEFINE_string('pipeline_config_path', '',
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file. If provided, other configs are ignored')
-flags.DEFINE_string('eval_config_path', '',
- 'Path to an eval_pb2.EvalConfig config file.')
-flags.DEFINE_string('input_config_path', '',
- 'Path to an input_reader_pb2.InputReader config file.')
-flags.DEFINE_string('model_config_path', '',
- 'Path to a model_pb2.DetectionModel config file.')
-flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
- 'evaluation. Overrides the `max_evals` parameter in the '
- 'provided config.')
-FLAGS = flags.FLAGS
-
-
-def main(unused_argv):
- assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
- assert FLAGS.eval_dir, '`eval_dir` is missing.'
- tf.gfile.MakeDirs(FLAGS.eval_dir)
- if FLAGS.pipeline_config_path:
- configs = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
- tf.gfile.Copy(FLAGS.pipeline_config_path,
- os.path.join(FLAGS.eval_dir, 'pipeline.config'),
- overwrite=True)
- else:
- configs = config_util.get_configs_from_multiple_files(
- model_config_path=FLAGS.model_config_path,
- eval_config_path=FLAGS.eval_config_path,
- eval_input_config_path=FLAGS.input_config_path)
- for name, config in [('model.config', FLAGS.model_config_path),
- ('eval.config', FLAGS.eval_config_path),
- ('input.config', FLAGS.input_config_path)]:
- tf.gfile.Copy(config,
- os.path.join(FLAGS.eval_dir, name),
- overwrite=True)
-
- model_config = configs['model']
- eval_config = configs['eval_config']
- if FLAGS.eval_training_data:
- input_config = configs['train_input_config']
- else:
- input_config = configs['eval_input_config']
-
- model_fn = functools.partial(
- model_builder.build,
- model_config=model_config,
- is_training=False)
-
- create_input_dict_fn = functools.partial(
- input_reader_builder.build,
- input_config)
-
- label_map = label_map_util.load_labelmap(input_config.label_map_path)
- max_num_classes = max([item.id for item in label_map.item])
- categories = label_map_util.convert_label_map_to_categories(
- label_map, max_num_classes)
-
- if FLAGS.run_once:
- eval_config.max_evals = 1
-
- evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
- FLAGS.checkpoint_dir, FLAGS.eval_dir)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/eval_util.py b/object_detection/eval_util.py
deleted file mode 100644
index 6a37be76..00000000
--- a/object_detection/eval_util.py
+++ /dev/null
@@ -1,516 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Common functions for repeatedly evaluating a checkpoint."""
-import logging
-import os
-import time
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import keypoint_ops
-from object_detection.core import standard_fields as fields
-from object_detection.utils import label_map_util
-from object_detection.utils import ops
-from object_detection.utils import visualization_utils as vis_utils
-
-slim = tf.contrib.slim
-
-
-def write_metrics(metrics, global_step, summary_dir):
- """Write metrics to a summary directory.
-
- Args:
- metrics: A dictionary containing metric names and values.
- global_step: Global step at which the metrics are computed.
- summary_dir: Directory to write tensorflow summaries to.
- """
- logging.info('Writing metrics to tf summary.')
- summary_writer = tf.summary.FileWriter(summary_dir)
- for key in sorted(metrics):
- summary = tf.Summary(value=[
- tf.Summary.Value(tag=key, simple_value=metrics[key]),
- ])
- summary_writer.add_summary(summary, global_step)
- logging.info('%s: %f', key, metrics[key])
- summary_writer.close()
- logging.info('Metrics written to tf summary.')
-
-
-# TODO: Add tests.
-def visualize_detection_results(result_dict,
- tag,
- global_step,
- categories,
- summary_dir='',
- export_dir='',
- agnostic_mode=False,
- show_groundtruth=False,
- min_score_thresh=.5,
- max_num_predictions=20):
- """Visualizes detection results and writes visualizations to image summaries.
-
- This function visualizes an image with its detected bounding boxes and writes
- to image summaries which can be viewed on tensorboard. It optionally also
- writes images to a directory. In the case of missing entry in the label map,
- unknown class name in the visualization is shown as "N/A".
-
- Args:
- result_dict: a dictionary holding groundtruth and detection
- data corresponding to each image being evaluated. The following keys
- are required:
- 'original_image': a numpy array representing the image with shape
- [1, height, width, 3]
- 'detection_boxes': a numpy array of shape [N, 4]
- 'detection_scores': a numpy array of shape [N]
- 'detection_classes': a numpy array of shape [N]
- The following keys are optional:
- 'groundtruth_boxes': a numpy array of shape [N, 4]
- 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
- Detections are assumed to be provided in decreasing order of score and for
- display, and we assume that scores are probabilities between 0 and 1.
- tag: tensorboard tag (string) to associate with image.
- global_step: global step at which the visualization are generated.
- categories: a list of dictionaries representing all possible categories.
- Each dict in this list has the following keys:
- 'id': (required) an integer id uniquely identifying this category
- 'name': (required) string representing category name
- e.g., 'cat', 'dog', 'pizza'
- 'supercategory': (optional) string representing the supercategory
- e.g., 'animal', 'vehicle', 'food', etc
- summary_dir: the output directory to which the image summaries are written.
- export_dir: the output directory to which images are written. If this is
- empty (default), then images are not exported.
- agnostic_mode: boolean (default: False) controlling whether to evaluate in
- class-agnostic mode or not.
- show_groundtruth: boolean (default: False) controlling whether to show
- groundtruth boxes in addition to detected boxes
- min_score_thresh: minimum score threshold for a box to be visualized
- max_num_predictions: maximum number of detections to visualize
- Raises:
- ValueError: if result_dict does not contain the expected keys (i.e.,
- 'original_image', 'detection_boxes', 'detection_scores',
- 'detection_classes')
- """
- if not set([
- 'original_image', 'detection_boxes', 'detection_scores',
- 'detection_classes'
- ]).issubset(set(result_dict.keys())):
- raise ValueError('result_dict does not contain all expected keys.')
- if show_groundtruth and 'groundtruth_boxes' not in result_dict:
- raise ValueError('If show_groundtruth is enabled, result_dict must contain '
- 'groundtruth_boxes.')
- logging.info('Creating detection visualizations.')
- category_index = label_map_util.create_category_index(categories)
-
- image = np.squeeze(result_dict['original_image'], axis=0)
- detection_boxes = result_dict['detection_boxes']
- detection_scores = result_dict['detection_scores']
- detection_classes = np.int32((result_dict['detection_classes']))
- detection_keypoints = result_dict.get('detection_keypoints', None)
- detection_masks = result_dict.get('detection_masks', None)
-
- # Plot groundtruth underneath detections
- if show_groundtruth:
- groundtruth_boxes = result_dict['groundtruth_boxes']
- groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
- vis_utils.visualize_boxes_and_labels_on_image_array(
- image,
- groundtruth_boxes,
- None,
- None,
- category_index,
- keypoints=groundtruth_keypoints,
- use_normalized_coordinates=False,
- max_boxes_to_draw=None)
- vis_utils.visualize_boxes_and_labels_on_image_array(
- image,
- detection_boxes,
- detection_classes,
- detection_scores,
- category_index,
- instance_masks=detection_masks,
- keypoints=detection_keypoints,
- use_normalized_coordinates=False,
- max_boxes_to_draw=max_num_predictions,
- min_score_thresh=min_score_thresh,
- agnostic_mode=agnostic_mode)
-
- if export_dir:
- export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
- vis_utils.save_image_array_as_png(image, export_path)
-
- summary = tf.Summary(value=[
- tf.Summary.Value(
- tag=tag,
- image=tf.Summary.Image(
- encoded_image_string=vis_utils.encode_image_array_as_png_str(
- image)))
- ])
- summary_writer = tf.summary.FileWriter(summary_dir)
- summary_writer.add_summary(summary, global_step)
- summary_writer.close()
-
- logging.info('Detection visualizations written to summary with tag %s.', tag)
-
-
-def _run_checkpoint_once(tensor_dict,
- evaluators=None,
- batch_processor=None,
- checkpoint_dirs=None,
- variables_to_restore=None,
- restore_fn=None,
- num_batches=1,
- master='',
- save_graph=False,
- save_graph_dir=''):
- """Evaluates metrics defined in evaluators.
-
- This function loads the latest checkpoint in checkpoint_dirs and evaluates
- all metrics defined in evaluators. The metrics are processed in batch by the
- batch_processor.
-
- Args:
- tensor_dict: a dictionary holding tensors representing a batch of detections
- and corresponding groundtruth annotations.
- evaluators: a list of object of type DetectionEvaluator to be used for
- evaluation. Note that the metric names produced by different evaluators
- must be unique.
- batch_processor: a function taking four arguments:
- 1. tensor_dict: the same tensor_dict that is passed in as the first
- argument to this function.
- 2. sess: a tensorflow session
- 3. batch_index: an integer representing the index of the batch amongst
- all batches
- By default, batch_processor is None, which defaults to running:
- return sess.run(tensor_dict)
- To skip an image, it suffices to return an empty dictionary in place of
- result_dict.
- checkpoint_dirs: list of directories to load into an EnsembleModel. If it
- has only one directory, EnsembleModel will not be used --
- a DetectionModel
- will be instantiated directly. Not used if restore_fn is set.
- variables_to_restore: None, or a dictionary mapping variable names found in
- a checkpoint to model variables. The dictionary would normally be
- generated by creating a tf.train.ExponentialMovingAverage object and
- calling its variables_to_restore() method. Not used if restore_fn is set.
- restore_fn: None, or a function that takes a tf.Session object and correctly
- restores all necessary variables from the correct checkpoint file. If
- None, attempts to restore from the first directory in checkpoint_dirs.
- num_batches: the number of batches to use for evaluation.
- master: the location of the Tensorflow session.
- save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
- save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
- is True this must be non-empty.
-
- Returns:
- global_step: the count of global steps.
- all_evaluator_metrics: A dictionary containing metric names and values.
-
- Raises:
- ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
- one element.
- ValueError: if save_graph is True and save_graph_dir is not defined.
- """
- if save_graph and not save_graph_dir:
- raise ValueError('`save_graph_dir` must be defined.')
- sess = tf.Session(master, graph=tf.get_default_graph())
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
- sess.run(tf.tables_initializer())
- if restore_fn:
- restore_fn(sess)
- else:
- if not checkpoint_dirs:
- raise ValueError('`checkpoint_dirs` must have at least one entry.')
- checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
- saver = tf.train.Saver(variables_to_restore)
- saver.restore(sess, checkpoint_file)
-
- if save_graph:
- tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
-
- counters = {'skipped': 0, 'success': 0}
- with tf.contrib.slim.queues.QueueRunners(sess):
- try:
- for batch in range(int(num_batches)):
- if (batch + 1) % 100 == 0:
- logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
- if not batch_processor:
- try:
- result_dict = sess.run(tensor_dict)
- counters['success'] += 1
- except tf.errors.InvalidArgumentError:
- logging.info('Skipping image')
- counters['skipped'] += 1
- result_dict = {}
- else:
- result_dict = batch_processor(tensor_dict, sess, batch, counters)
- for evaluator in evaluators:
- # TODO: Use image_id tensor once we fix the input data
- # decoders to return correct image_id.
- # TODO: result_dict contains batches of images, while
- # add_single_ground_truth_image_info expects a single image. Fix
- evaluator.add_single_ground_truth_image_info(
- image_id=batch, groundtruth_dict=result_dict)
- evaluator.add_single_detected_image_info(
- image_id=batch, detections_dict=result_dict)
- logging.info('Running eval batches done.')
- except tf.errors.OutOfRangeError:
- logging.info('Done evaluating -- epoch limit reached')
- finally:
- # When done, ask the threads to stop.
- logging.info('# success: %d', counters['success'])
- logging.info('# skipped: %d', counters['skipped'])
- all_evaluator_metrics = {}
- for evaluator in evaluators:
- metrics = evaluator.evaluate()
- evaluator.clear()
- if any(key in all_evaluator_metrics for key in metrics):
- raise ValueError('Metric names between evaluators must not collide.')
- all_evaluator_metrics.update(metrics)
- global_step = tf.train.global_step(sess, tf.train.get_global_step())
- sess.close()
- return (global_step, all_evaluator_metrics)
-
-
-# TODO: Add tests.
-def repeated_checkpoint_run(tensor_dict,
- summary_dir,
- evaluators,
- batch_processor=None,
- checkpoint_dirs=None,
- variables_to_restore=None,
- restore_fn=None,
- num_batches=1,
- eval_interval_secs=120,
- max_number_of_evaluations=None,
- master='',
- save_graph=False,
- save_graph_dir=''):
- """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
-
- This function repeatedly loads a checkpoint and evaluates a desired
- set of tensors (provided by tensor_dict) and hands the resulting numpy
- arrays to a function result_processor which can be used to further
- process/save/visualize the results.
-
- Args:
- tensor_dict: a dictionary holding tensors representing a batch of detections
- and corresponding groundtruth annotations.
- summary_dir: a directory to write metrics summaries.
- evaluators: a list of object of type DetectionEvaluator to be used for
- evaluation. Note that the metric names produced by different evaluators
- must be unique.
- batch_processor: a function taking three arguments:
- 1. tensor_dict: the same tensor_dict that is passed in as the first
- argument to this function.
- 2. sess: a tensorflow session
- 3. batch_index: an integer representing the index of the batch amongst
- all batches
- By default, batch_processor is None, which defaults to running:
- return sess.run(tensor_dict)
- checkpoint_dirs: list of directories to load into a DetectionModel or an
- EnsembleModel if restore_fn isn't set. Also used to determine when to run
- next evaluation. Must have at least one element.
- variables_to_restore: None, or a dictionary mapping variable names found in
- a checkpoint to model variables. The dictionary would normally be
- generated by creating a tf.train.ExponentialMovingAverage object and
- calling its variables_to_restore() method. Not used if restore_fn is set.
- restore_fn: a function that takes a tf.Session object and correctly restores
- all necessary variables from the correct checkpoint file.
- num_batches: the number of batches to use for evaluation.
- eval_interval_secs: the number of seconds between each evaluation run.
- max_number_of_evaluations: the max number of iterations of the evaluation.
- If the value is left as None the evaluation continues indefinitely.
- master: the location of the Tensorflow session.
- save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
- save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
- is True this must be non-empty.
-
- Returns:
- metrics: A dictionary containing metric names and values in the latest
- evaluation.
-
- Raises:
- ValueError: if max_num_of_evaluations is not None or a positive number.
- ValueError: if checkpoint_dirs doesn't have at least one element.
- """
- if max_number_of_evaluations and max_number_of_evaluations <= 0:
- raise ValueError(
- '`number_of_steps` must be either None or a positive number.')
-
- if not checkpoint_dirs:
- raise ValueError('`checkpoint_dirs` must have at least one entry.')
-
- last_evaluated_model_path = None
- number_of_evaluations = 0
- while True:
- start = time.time()
- logging.info('Starting evaluation at ' + time.strftime(
- '%Y-%m-%d-%H:%M:%S', time.gmtime()))
- model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
- if not model_path:
- logging.info('No model found in %s. Will try again in %d seconds',
- checkpoint_dirs[0], eval_interval_secs)
- elif model_path == last_evaluated_model_path:
- logging.info('Found already evaluated checkpoint. Will try again in %d '
- 'seconds', eval_interval_secs)
- else:
- last_evaluated_model_path = model_path
- global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators,
- batch_processor,
- checkpoint_dirs,
- variables_to_restore,
- restore_fn, num_batches,
- master, save_graph,
- save_graph_dir)
- write_metrics(metrics, global_step, summary_dir)
- number_of_evaluations += 1
-
- if (max_number_of_evaluations and
- number_of_evaluations >= max_number_of_evaluations):
- logging.info('Finished evaluation!')
- break
- time_to_next_eval = start + eval_interval_secs - time.time()
- if time_to_next_eval > 0:
- time.sleep(time_to_next_eval)
-
- return metrics
-
-
-def result_dict_for_single_example(image,
- key,
- detections,
- groundtruth=None,
- class_agnostic=False,
- scale_to_absolute=False):
- """Merges all detection and groundtruth information for a single example.
-
- Note that evaluation tools require classes that are 1-indexed, and so this
- function performs the offset. If `class_agnostic` is True, all output classes
- have label 1.
-
- Args:
- image: A single 4D image tensor of shape [1, H, W, C].
- key: A single string tensor identifying the image.
- detections: A dictionary of detections, returned from
- DetectionModel.postprocess().
- groundtruth: (Optional) Dictionary of groundtruth items, with fields:
- 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
- normalized coordinates.
- 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
- 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
- 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_instance_masks': 3D int64 tensor of instance masks
- (Optional).
- class_agnostic: Boolean indicating whether the detections are class-agnostic
- (i.e. binary). Default False.
- scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should
- be scaled to absolute coordinates. Note that for IoU based evaluations,
- it does not matter whether boxes are expressed in absolute or relative
- coordinates. Default False.
-
- Returns:
- A dictionary with:
- 'original_image': A [1, H, W, C] uint8 image tensor.
- 'key': A string tensor with image identifier.
- 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
- normalized or absolute coordinates, depending on the value of
- `scale_to_absolute`.
- 'detection_scores': [max_detections] float32 tensor of scores.
- 'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
- 'detection_masks': [max_detections, None, None] float32 tensor of binarized
- masks. (Only present if available in `detections`)
- 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
- normalized or absolute coordinates, depending on the value of
- `scale_to_absolute`. (Optional)
- 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
- (Optional)
- 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
- 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
- 'groundtruth_instance_masks': 3D int64 tensor of instance masks
- (Optional).
-
- """
- label_id_offset = 1 # Applying label id offset (b/63711816)
-
- input_data_fields = fields.InputDataFields()
- output_dict = {
- input_data_fields.original_image: image,
- input_data_fields.key: key,
- }
-
- detection_fields = fields.DetectionResultFields
- detection_boxes = detections[detection_fields.detection_boxes][0]
- output_dict[detection_fields.detection_boxes] = detection_boxes
- image_shape = tf.shape(image)
- if scale_to_absolute:
- absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
- box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
- output_dict[detection_fields.detection_boxes] = (
- absolute_detection_boxlist.get())
- detection_scores = detections[detection_fields.detection_scores][0]
- output_dict[detection_fields.detection_scores] = detection_scores
-
- if class_agnostic:
- detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
- else:
- detection_classes = (
- tf.to_int64(detections[detection_fields.detection_classes][0]) +
- label_id_offset)
- output_dict[detection_fields.detection_classes] = detection_classes
-
- if detection_fields.detection_masks in detections:
- detection_masks = detections[detection_fields.detection_masks][0]
- output_dict[detection_fields.detection_masks] = detection_masks
- if scale_to_absolute:
- # TODO: This should be done in model's postprocess
- # function ideally.
- detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
- detection_masks, detection_boxes, image_shape[1], image_shape[2])
- detection_masks_reframed = tf.to_float(
- tf.greater(detection_masks_reframed, 0.5))
- output_dict[detection_fields.detection_masks] = detection_masks_reframed
- if detection_fields.detection_keypoints in detections:
- detection_keypoints = detections[detection_fields.detection_keypoints][0]
- output_dict[detection_fields.detection_keypoints] = detection_keypoints
- if scale_to_absolute:
- absolute_detection_keypoints = keypoint_ops.scale(
- detection_keypoints, image_shape[1], image_shape[2])
- output_dict[detection_fields.detection_keypoints] = (
- absolute_detection_keypoints)
-
- if groundtruth:
- output_dict.update(groundtruth)
- if scale_to_absolute:
- groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
- absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
- box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
- output_dict[input_data_fields.groundtruth_boxes] = (
- absolute_gt_boxlist.get())
- # For class-agnostic models, groundtruth classes all become 1.
- if class_agnostic:
- groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
- groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
- output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes
-
- return output_dict
diff --git a/object_detection/evaluator.py b/object_detection/evaluator.py
deleted file mode 100644
index 74722d00..00000000
--- a/object_detection/evaluator.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Detection model evaluator.
-
-This file provides a generic evaluation method that can be used to evaluate a
-DetectionModel.
-"""
-
-import logging
-import tensorflow as tf
-
-from object_detection import eval_util
-from object_detection.core import prefetcher
-from object_detection.core import standard_fields as fields
-from object_detection.utils import object_detection_evaluation
-
-# A dictionary of metric names to classes that implement the metric. The classes
-# in the dictionary must implement
-# utils.object_detection_evaluation.DetectionEvaluator interface.
-EVAL_METRICS_CLASS_DICT = {
- 'pascal_voc_metrics':
- object_detection_evaluation.PascalDetectionEvaluator,
- 'weighted_pascal_voc_metrics':
- object_detection_evaluation.WeightedPascalDetectionEvaluator,
- 'open_images_metrics':
- object_detection_evaluation.OpenImagesDetectionEvaluator
-}
-
-
-def _extract_prediction_tensors(model,
- create_input_dict_fn,
- ignore_groundtruth=False):
- """Restores the model in a tensorflow session.
-
- Args:
- model: model to perform predictions with.
- create_input_dict_fn: function to create input tensor dictionaries.
- ignore_groundtruth: whether groundtruth should be ignored.
-
- Returns:
- tensor_dict: A tensor dictionary with evaluations.
- """
- input_dict = create_input_dict_fn()
- prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
- input_dict = prefetch_queue.dequeue()
- original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
- preprocessed_image = model.preprocess(tf.to_float(original_image))
- prediction_dict = model.predict(preprocessed_image)
- detections = model.postprocess(prediction_dict)
-
- groundtruth = None
- if not ignore_groundtruth:
- groundtruth = {
- fields.InputDataFields.groundtruth_boxes:
- input_dict[fields.InputDataFields.groundtruth_boxes],
- fields.InputDataFields.groundtruth_classes:
- input_dict[fields.InputDataFields.groundtruth_classes],
- fields.InputDataFields.groundtruth_area:
- input_dict[fields.InputDataFields.groundtruth_area],
- fields.InputDataFields.groundtruth_is_crowd:
- input_dict[fields.InputDataFields.groundtruth_is_crowd],
- fields.InputDataFields.groundtruth_difficult:
- input_dict[fields.InputDataFields.groundtruth_difficult]
- }
- if fields.InputDataFields.groundtruth_group_of in input_dict:
- groundtruth[fields.InputDataFields.groundtruth_group_of] = (
- input_dict[fields.InputDataFields.groundtruth_group_of])
- if fields.DetectionResultFields.detection_masks in detections:
- groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
- input_dict[fields.InputDataFields.groundtruth_instance_masks])
-
- return eval_util.result_dict_for_single_example(
- original_image,
- input_dict[fields.InputDataFields.source_id],
- detections,
- groundtruth,
- class_agnostic=(
- fields.DetectionResultFields.detection_classes not in detections),
- scale_to_absolute=True)
-
-
-def get_evaluators(eval_config, categories):
- """Returns the evaluator class according to eval_config, valid for categories.
-
- Args:
- eval_config: evaluation configurations.
- categories: a list of categories to evaluate.
- Returns:
- An list of instances of DetectionEvaluator.
-
- Raises:
- ValueError: if metric is not in the metric class dictionary.
- """
- eval_metric_fn_key = eval_config.metrics_set
- if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT:
- raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
- return [
- EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](
- categories=categories)
- ]
-
-
-def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
- checkpoint_dir, eval_dir):
- """Evaluation function for detection models.
-
- Args:
- create_input_dict_fn: a function to create a tensor input dictionary.
- create_model_fn: a function that creates a DetectionModel.
- eval_config: a eval_pb2.EvalConfig protobuf.
- categories: a list of category dictionaries. Each dict in the list should
- have an integer 'id' field and string 'name' field.
- checkpoint_dir: directory to load the checkpoints to evaluate from.
- eval_dir: directory to write evaluation metrics summary to.
-
- Returns:
- metrics: A dictionary containing metric names and values from the latest
- run.
- """
-
- model = create_model_fn()
-
- if eval_config.ignore_groundtruth and not eval_config.export_path:
- logging.fatal('If ignore_groundtruth=True then an export_path is '
- 'required. Aborting!!!')
-
- tensor_dict = _extract_prediction_tensors(
- model=model,
- create_input_dict_fn=create_input_dict_fn,
- ignore_groundtruth=eval_config.ignore_groundtruth)
-
- def _process_batch(tensor_dict, sess, batch_index, counters):
- """Evaluates tensors in tensor_dict, visualizing the first K examples.
-
- This function calls sess.run on tensor_dict, evaluating the original_image
- tensor only on the first K examples and visualizing detections overlaid
- on this original_image.
-
- Args:
- tensor_dict: a dictionary of tensors
- sess: tensorflow session
- batch_index: the index of the batch amongst all batches in the run.
- counters: a dictionary holding 'success' and 'skipped' fields which can
- be updated to keep track of number of successful and failed runs,
- respectively. If these fields are not updated, then the success/skipped
- counter values shown at the end of evaluation will be incorrect.
-
- Returns:
- result_dict: a dictionary of numpy arrays
- """
- try:
- result_dict = sess.run(tensor_dict)
- counters['success'] += 1
- except tf.errors.InvalidArgumentError:
- logging.info('Skipping image')
- counters['skipped'] += 1
- return {}
- global_step = tf.train.global_step(sess, tf.train.get_global_step())
- if batch_index < eval_config.num_visualizations:
- tag = 'image-{}'.format(batch_index)
- eval_util.visualize_detection_results(
- result_dict,
- tag,
- global_step,
- categories=categories,
- summary_dir=eval_dir,
- export_dir=eval_config.visualization_export_dir,
- show_groundtruth=eval_config.visualization_export_dir)
- return result_dict
-
- variables_to_restore = tf.global_variables()
- global_step = tf.train.get_or_create_global_step()
- variables_to_restore.append(global_step)
- if eval_config.use_moving_averages:
- variable_averages = tf.train.ExponentialMovingAverage(0.0)
- variables_to_restore = variable_averages.variables_to_restore()
- saver = tf.train.Saver(variables_to_restore)
-
- def _restore_latest_checkpoint(sess):
- latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
- saver.restore(sess, latest_checkpoint)
-
- metrics = eval_util.repeated_checkpoint_run(
- tensor_dict=tensor_dict,
- summary_dir=eval_dir,
- evaluators=get_evaluators(eval_config, categories),
- batch_processor=_process_batch,
- checkpoint_dirs=[checkpoint_dir],
- variables_to_restore=None,
- restore_fn=_restore_latest_checkpoint,
- num_batches=eval_config.num_examples,
- eval_interval_secs=eval_config.eval_interval_secs,
- max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else
- eval_config.max_evals
- if eval_config.max_evals else None),
- master=eval_config.eval_master,
- save_graph=eval_config.save_graph,
- save_graph_dir=(eval_dir if eval_config.save_graph else ''))
-
- return metrics
diff --git a/object_detection/export_inference_graph.py b/object_detection/export_inference_graph.py
deleted file mode 100644
index 279d1d16..00000000
--- a/object_detection/export_inference_graph.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Tool to export an object detection model for inference.
-
-Prepares an object detection tensorflow graph for inference using model
-configuration and an optional trained checkpoint. Outputs inference
-graph, associated checkpoint files, a frozen inference graph and a
-SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
-
-The inference graph contains one of three input nodes depending on the user
-specified option.
- * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3]
- * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None]
- containing encoded PNG or JPEG images. Image resolutions are expected to be
- the same if more than 1 image is provided.
- * `tf_example`: Accepts a 1-D string tensor of shape [None] containing
- serialized TFExample protos. Image resolutions are expected to be the same
- if more than 1 image is provided.
-
-and the following output nodes returned by the model.postprocess(..):
- * `num_detections`: Outputs float32 tensors of the form [batch]
- that specifies the number of valid boxes per image in the batch.
- * `detection_boxes`: Outputs float32 tensors of the form
- [batch, num_boxes, 4] containing detected boxes.
- * `detection_scores`: Outputs float32 tensors of the form
- [batch, num_boxes] containing class scores for the detections.
- * `detection_classes`: Outputs float32 tensors of the form
- [batch, num_boxes] containing classes for the detections.
- * `detection_masks`: Outputs float32 tensors of the form
- [batch, num_boxes, mask_height, mask_width] containing predicted instance
- masks for each box if its present in the dictionary of postprocessed
- tensors returned by the model.
-
-Notes:
- * This tool uses `use_moving_averages` from eval_config to decide which
- weights to freeze.
-
-Example Usage:
---------------
-python export_inference_graph \
- --input_type image_tensor \
- --pipeline_config_path path/to/ssd_inception_v2.config \
- --trained_checkpoint_prefix path/to/model.ckpt \
- --output_directory path/to/exported_model_directory
-
-The expected output would be in the directory
-path/to/exported_model_directory (which is created if it does not exist)
-with contents:
- - graph.pbtxt
- - model.ckpt.data-00000-of-00001
- - model.ckpt.info
- - model.ckpt.meta
- - frozen_inference_graph.pb
- + saved_model (a directory)
-"""
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection import exporter
-from object_detection.protos import pipeline_pb2
-
-slim = tf.contrib.slim
-flags = tf.app.flags
-
-flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
- 'one of [`image_tensor`, `encoded_image_string_tensor`, '
- '`tf_example`]')
-flags.DEFINE_string('input_shape', None,
- 'If input_type is `image_tensor`, this can explicitly set '
- 'the shape of this input tensor to a fixed size. The '
- 'dimensions are to be provided as a comma-separated list '
- 'of integers. A value of -1 can be used for unknown '
- 'dimensions. If not specified, for an `image_tensor, the '
- 'default shape will be partially specified as '
- '`[None, None, None, 3]`.')
-flags.DEFINE_string('pipeline_config_path', None,
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file.')
-flags.DEFINE_string('trained_checkpoint_prefix', None,
- 'Path to trained checkpoint, typically of the form '
- 'path/to/model.ckpt')
-flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
-
-tf.app.flags.mark_flag_as_required('pipeline_config_path')
-tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
-tf.app.flags.mark_flag_as_required('output_directory')
-FLAGS = flags.FLAGS
-
-
-def main(_):
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
- text_format.Merge(f.read(), pipeline_config)
- if FLAGS.input_shape:
- input_shape = [
- int(dim) if dim != '-1' else None
- for dim in FLAGS.input_shape.split(',')
- ]
- else:
- input_shape = None
- exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
- FLAGS.trained_checkpoint_prefix,
- FLAGS.output_directory, input_shape)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/exporter.py b/object_detection/exporter.py
deleted file mode 100644
index 95469e68..00000000
--- a/object_detection/exporter.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to export object detection inference graph."""
-import logging
-import os
-import tempfile
-import tensorflow as tf
-from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.client import session
-from tensorflow.python.framework import graph_util
-from tensorflow.python.platform import gfile
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import saver as saver_lib
-from object_detection.builders import model_builder
-from object_detection.core import standard_fields as fields
-from object_detection.data_decoders import tf_example_decoder
-
-slim = tf.contrib.slim
-
-
-# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when
-# newer version of Tensorflow becomes more common.
-def freeze_graph_with_def_protos(
- input_graph_def,
- input_saver_def,
- input_checkpoint,
- output_node_names,
- restore_op_name,
- filename_tensor_name,
- clear_devices,
- initializer_nodes,
- optimize_graph=True,
- variable_names_blacklist=''):
- """Converts all variables in a graph and checkpoint into constants."""
- del restore_op_name, filename_tensor_name # Unused by updated loading code.
-
- # 'input_checkpoint' may be a prefix if we're using Saver V2 format
- if not saver_lib.checkpoint_exists(input_checkpoint):
- raise ValueError(
- 'Input checkpoint "' + input_checkpoint + '" does not exist!')
-
- if not output_node_names:
- raise ValueError(
- 'You must supply the name of a node to --output_node_names.')
-
- # Remove all the explicit device specifications for this node. This helps to
- # make the graph more portable.
- if clear_devices:
- for node in input_graph_def.node:
- node.device = ''
-
- with tf.Graph().as_default():
- tf.import_graph_def(input_graph_def, name='')
-
- if optimize_graph:
- logging.info('Graph Rewriter optimizations enabled')
- rewrite_options = rewriter_config_pb2.RewriterConfig()
- rewrite_options.optimizers.append('pruning')
- rewrite_options.optimizers.append('constfold')
- rewrite_options.optimizers.append('layout')
- graph_options = tf.GraphOptions(
- rewrite_options=rewrite_options, infer_shapes=True)
- else:
- logging.info('Graph Rewriter optimizations disabled')
- graph_options = tf.GraphOptions()
- config = tf.ConfigProto(graph_options=graph_options)
- with session.Session(config=config) as sess:
- if input_saver_def:
- saver = saver_lib.Saver(saver_def=input_saver_def)
- saver.restore(sess, input_checkpoint)
- else:
- var_list = {}
- reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
- var_to_shape_map = reader.get_variable_to_shape_map()
- for key in var_to_shape_map:
- try:
- tensor = sess.graph.get_tensor_by_name(key + ':0')
- except KeyError:
- # This tensor doesn't exist in the graph (for example it's
- # 'global_step' or a similar housekeeping element) so skip it.
- continue
- var_list[key] = tensor
- saver = saver_lib.Saver(var_list=var_list)
- saver.restore(sess, input_checkpoint)
- if initializer_nodes:
- sess.run(initializer_nodes)
-
- variable_names_blacklist = (variable_names_blacklist.split(',') if
- variable_names_blacklist else None)
- output_graph_def = graph_util.convert_variables_to_constants(
- sess,
- input_graph_def,
- output_node_names.split(','),
- variable_names_blacklist=variable_names_blacklist)
-
- return output_graph_def
-
-
-def replace_variable_values_with_moving_averages(graph,
- current_checkpoint_file,
- new_checkpoint_file):
- """Replaces variable values in the checkpoint with their moving averages.
-
- If the current checkpoint has shadow variables maintaining moving averages of
- the variables defined in the graph, this function generates a new checkpoint
- where the variables contain the values of their moving averages.
-
- Args:
- graph: a tf.Graph object.
- current_checkpoint_file: a checkpoint containing both original variables and
- their moving averages.
- new_checkpoint_file: file path to write a new checkpoint.
- """
- with graph.as_default():
- variable_averages = tf.train.ExponentialMovingAverage(0.0)
- ema_variables_to_restore = variable_averages.variables_to_restore()
- with tf.Session() as sess:
- read_saver = tf.train.Saver(ema_variables_to_restore)
- read_saver.restore(sess, current_checkpoint_file)
- write_saver = tf.train.Saver()
- write_saver.save(sess, new_checkpoint_file)
-
-
-def _image_tensor_input_placeholder(input_shape=None):
- """Returns input placeholder and a 4-D uint8 image tensor."""
- if input_shape is None:
- input_shape = (None, None, None, 3)
- input_tensor = tf.placeholder(
- dtype=tf.uint8, shape=input_shape, name='image_tensor')
- return input_tensor, input_tensor
-
-
-def _tf_example_input_placeholder():
- """Returns input that accepts a batch of strings with tf examples.
-
- Returns:
- a tuple of input placeholder and the output decoded images.
- """
- batch_tf_example_placeholder = tf.placeholder(
- tf.string, shape=[None], name='tf_example')
- def decode(tf_example_string_tensor):
- tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
- tf_example_string_tensor)
- image_tensor = tensor_dict[fields.InputDataFields.image]
- return image_tensor
- return (batch_tf_example_placeholder,
- tf.map_fn(decode,
- elems=batch_tf_example_placeholder,
- dtype=tf.uint8,
- parallel_iterations=32,
- back_prop=False))
-
-
-def _encoded_image_string_tensor_input_placeholder():
- """Returns input that accepts a batch of PNG or JPEG strings.
-
- Returns:
- a tuple of input placeholder and the output decoded images.
- """
- batch_image_str_placeholder = tf.placeholder(
- dtype=tf.string,
- shape=[None],
- name='encoded_image_string_tensor')
- def decode(encoded_image_string_tensor):
- image_tensor = tf.image.decode_image(encoded_image_string_tensor,
- channels=3)
- image_tensor.set_shape((None, None, 3))
- return image_tensor
- return (batch_image_str_placeholder,
- tf.map_fn(
- decode,
- elems=batch_image_str_placeholder,
- dtype=tf.uint8,
- parallel_iterations=32,
- back_prop=False))
-
-
-input_placeholder_fn_map = {
- 'image_tensor': _image_tensor_input_placeholder,
- 'encoded_image_string_tensor':
- _encoded_image_string_tensor_input_placeholder,
- 'tf_example': _tf_example_input_placeholder,
-}
-
-
-def _add_output_tensor_nodes(postprocessed_tensors,
- output_collection_name='inference_op'):
- """Adds output nodes for detection boxes and scores.
-
- Adds the following nodes for output tensors -
- * num_detections: float32 tensor of shape [batch_size].
- * detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4]
- containing detected boxes.
- * detection_scores: float32 tensor of shape [batch_size, num_boxes]
- containing scores for the detected boxes.
- * detection_classes: float32 tensor of shape [batch_size, num_boxes]
- containing class predictions for the detected boxes.
- * detection_masks: (Optional) float32 tensor of shape
- [batch_size, num_boxes, mask_height, mask_width] containing masks for each
- detection box.
-
- Args:
- postprocessed_tensors: a dictionary containing the following fields
- 'detection_boxes': [batch, max_detections, 4]
- 'detection_scores': [batch, max_detections]
- 'detection_classes': [batch, max_detections]
- 'detection_masks': [batch, max_detections, mask_height, mask_width]
- (optional).
- 'num_detections': [batch]
- output_collection_name: Name of collection to add output tensors to.
-
- Returns:
- A tensor dict containing the added output tensor nodes.
- """
- label_id_offset = 1
- boxes = postprocessed_tensors.get('detection_boxes')
- scores = postprocessed_tensors.get('detection_scores')
- classes = postprocessed_tensors.get('detection_classes') + label_id_offset
- masks = postprocessed_tensors.get('detection_masks')
- num_detections = postprocessed_tensors.get('num_detections')
- outputs = {}
- outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes')
- outputs['detection_scores'] = tf.identity(scores, name='detection_scores')
- outputs['detection_classes'] = tf.identity(classes, name='detection_classes')
- outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
- if masks is not None:
- outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
- for output_key in outputs:
- tf.add_to_collection(output_collection_name, outputs[output_key])
- if masks is not None:
- tf.add_to_collection(output_collection_name, outputs['detection_masks'])
- return outputs
-
-
-def _write_frozen_graph(frozen_graph_path, frozen_graph_def):
- """Writes frozen graph to disk.
-
- Args:
- frozen_graph_path: Path to write inference graph.
- frozen_graph_def: tf.GraphDef holding frozen graph.
- """
- with gfile.GFile(frozen_graph_path, 'wb') as f:
- f.write(frozen_graph_def.SerializeToString())
- logging.info('%d ops in the final graph.', len(frozen_graph_def.node))
-
-
-def _write_saved_model(saved_model_path,
- frozen_graph_def,
- inputs,
- outputs):
- """Writes SavedModel to disk.
-
- If checkpoint_path is not None bakes the weights into the graph thereby
- eliminating the need of checkpoint files during inference. If the model
- was trained with moving averages, setting use_moving_averages to true
- restores the moving averages, otherwise the original set of variables
- is restored.
-
- Args:
- saved_model_path: Path to write SavedModel.
- frozen_graph_def: tf.GraphDef holding frozen graph.
- inputs: The input image tensor to use for detection.
- outputs: A tensor dictionary containing the outputs of a DetectionModel.
- """
- with tf.Graph().as_default():
- with session.Session() as sess:
-
- tf.import_graph_def(frozen_graph_def, name='')
-
- builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
-
- tensor_info_inputs = {
- 'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
- tensor_info_outputs = {}
- for k, v in outputs.items():
- tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
-
- detection_signature = (
- tf.saved_model.signature_def_utils.build_signature_def(
- inputs=tensor_info_inputs,
- outputs=tensor_info_outputs,
- method_name=signature_constants.PREDICT_METHOD_NAME))
-
- builder.add_meta_graph_and_variables(
- sess, [tf.saved_model.tag_constants.SERVING],
- signature_def_map={
- signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
- detection_signature,
- },
- )
- builder.save()
-
-
-def _write_graph_and_checkpoint(inference_graph_def,
- model_path,
- input_saver_def,
- trained_checkpoint_prefix):
- for node in inference_graph_def.node:
- node.device = ''
- with tf.Graph().as_default():
- tf.import_graph_def(inference_graph_def, name='')
- with session.Session() as sess:
- saver = saver_lib.Saver(saver_def=input_saver_def,
- save_relative_paths=True)
- saver.restore(sess, trained_checkpoint_prefix)
- saver.save(sess, model_path)
-
-
-def _export_inference_graph(input_type,
- detection_model,
- use_moving_averages,
- trained_checkpoint_prefix,
- output_directory,
- additional_output_tensor_names=None,
- input_shape=None,
- optimize_graph=True,
- output_collection_name='inference_op'):
- """Export helper."""
- tf.gfile.MakeDirs(output_directory)
- frozen_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- saved_model_path = os.path.join(output_directory, 'saved_model')
- model_path = os.path.join(output_directory, 'model.ckpt')
-
- if input_type not in input_placeholder_fn_map:
- raise ValueError('Unknown input type: {}'.format(input_type))
- placeholder_args = {}
- if input_shape is not None:
- if input_type != 'image_tensor':
- raise ValueError('Can only specify input shape for `image_tensor` '
- 'inputs.')
- placeholder_args['input_shape'] = input_shape
- placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
- **placeholder_args)
- inputs = tf.to_float(input_tensors)
- preprocessed_inputs = detection_model.preprocess(inputs)
- output_tensors = detection_model.predict(preprocessed_inputs)
- postprocessed_tensors = detection_model.postprocess(output_tensors)
- outputs = _add_output_tensor_nodes(postprocessed_tensors,
- output_collection_name)
- # Add global step to the graph.
- slim.get_or_create_global_step()
-
- if use_moving_averages:
- temp_checkpoint_file = tempfile.NamedTemporaryFile()
- replace_variable_values_with_moving_averages(
- tf.get_default_graph(), trained_checkpoint_prefix,
- temp_checkpoint_file.name)
- checkpoint_to_use = temp_checkpoint_file.name
- else:
- checkpoint_to_use = trained_checkpoint_prefix
-
- saver = tf.train.Saver()
- input_saver_def = saver.as_saver_def()
-
- _write_graph_and_checkpoint(
- inference_graph_def=tf.get_default_graph().as_graph_def(),
- model_path=model_path,
- input_saver_def=input_saver_def,
- trained_checkpoint_prefix=checkpoint_to_use)
-
- if additional_output_tensor_names is not None:
- output_node_names = ','.join(outputs.keys()+additional_output_tensor_names)
- else:
- output_node_names = ','.join(outputs.keys())
-
- frozen_graph_def = freeze_graph_with_def_protos(
- input_graph_def=tf.get_default_graph().as_graph_def(),
- input_saver_def=input_saver_def,
- input_checkpoint=checkpoint_to_use,
- output_node_names=output_node_names,
- restore_op_name='save/restore_all',
- filename_tensor_name='save/Const:0',
- clear_devices=True,
- optimize_graph=optimize_graph,
- initializer_nodes='')
- _write_frozen_graph(frozen_graph_path, frozen_graph_def)
- _write_saved_model(saved_model_path, frozen_graph_def,
- placeholder_tensor, outputs)
-
-
-def export_inference_graph(input_type,
- pipeline_config,
- trained_checkpoint_prefix,
- output_directory,
- input_shape=None,
- optimize_graph=True,
- output_collection_name='inference_op',
- additional_output_tensor_names=None):
- """Exports inference graph for the model specified in the pipeline config.
-
- Args:
- input_type: Type of input for the graph. Can be one of [`image_tensor`,
- `tf_example`].
- pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
- trained_checkpoint_prefix: Path to the trained checkpoint file.
- output_directory: Path to write outputs.
- input_shape: Sets a fixed shape for an `image_tensor` input. If not
- specified, will default to [None, None, None, 3].
- optimize_graph: Whether to optimize graph using Grappler.
- output_collection_name: Name of collection to add output tensors to.
- If None, does not add output tensors to a collection.
- additional_output_tensor_names: list of additional output
- tensors to include in the frozen graph.
- """
- detection_model = model_builder.build(pipeline_config.model,
- is_training=False)
- _export_inference_graph(input_type, detection_model,
- pipeline_config.eval_config.use_moving_averages,
- trained_checkpoint_prefix,
- output_directory, additional_output_tensor_names,
- input_shape, optimize_graph, output_collection_name)
diff --git a/object_detection/exporter_test.py b/object_detection/exporter_test.py
deleted file mode 100644
index 0a999005..00000000
--- a/object_detection/exporter_test.py
+++ /dev/null
@@ -1,604 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.export_inference_graph."""
-import os
-import numpy as np
-import six
-import tensorflow as tf
-from object_detection import exporter
-from object_detection.builders import model_builder
-from object_detection.core import model
-from object_detection.protos import pipeline_pb2
-
-if six.PY2:
- import mock # pylint: disable=g-import-not-at-top
-else:
- from unittest import mock # pylint: disable=g-import-not-at-top
-
-slim = tf.contrib.slim
-
-
-class FakeModel(model.DetectionModel):
-
- def __init__(self, add_detection_masks=False):
- self._add_detection_masks = add_detection_masks
-
- def preprocess(self, inputs):
- return tf.identity(inputs)
-
- def predict(self, preprocessed_inputs):
- return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
-
- def postprocess(self, prediction_dict):
- with tf.control_dependencies(prediction_dict.values()):
- postprocessed_tensors = {
- 'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]], tf.float32),
- 'detection_scores': tf.constant([[0.7, 0.6],
- [0.9, 0.0]], tf.float32),
- 'detection_classes': tf.constant([[0, 1],
- [1, 0]], tf.float32),
- 'num_detections': tf.constant([2, 1], tf.float32)
- }
- if self._add_detection_masks:
- postprocessed_tensors['detection_masks'] = tf.constant(
- np.arange(64).reshape([2, 2, 4, 4]), tf.float32)
- return postprocessed_tensors
-
- def restore_map(self, checkpoint_path, from_detection_checkpoint):
- pass
-
- def loss(self, prediction_dict):
- pass
-
-
-class ExportInferenceGraphTest(tf.test.TestCase):
-
- def _save_checkpoint_from_mock_model(self, checkpoint_path,
- use_moving_averages):
- g = tf.Graph()
- with g.as_default():
- mock_model = FakeModel()
- preprocessed_inputs = mock_model.preprocess(
- tf.placeholder(tf.float32, shape=[None, None, None, 3]))
- predictions = mock_model.predict(preprocessed_inputs)
- mock_model.postprocess(predictions)
- if use_moving_averages:
- tf.train.ExponentialMovingAverage(0.0).apply()
- slim.get_or_create_global_step()
- saver = tf.train.Saver()
- init = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init)
- saver.save(sess, checkpoint_path)
-
- def _load_inference_graph(self, inference_graph_path):
- od_graph = tf.Graph()
- with od_graph.as_default():
- od_graph_def = tf.GraphDef()
- with tf.gfile.GFile(inference_graph_path) as fid:
- serialized_graph = fid.read()
- od_graph_def.ParseFromString(serialized_graph)
- tf.import_graph_def(od_graph_def, name='')
- return od_graph
-
- def _create_tf_example(self, image_array):
- with self.test_session():
- encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval()
- def _bytes_feature(value):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
- example = tf.train.Example(features=tf.train.Features(feature={
- 'image/encoded': _bytes_feature(encoded_image),
- 'image/format': _bytes_feature('jpg'),
- 'image/source_id': _bytes_feature('image_id')
- })).SerializeToString()
- return example
-
- def test_export_graph_with_image_tensor_input(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=False)
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel()
- output_directory = os.path.join(tmp_dir, 'output')
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- self.assertTrue(os.path.exists(os.path.join(
- output_directory, 'saved_model', 'saved_model.pb')))
-
- def test_export_graph_with_fixed_size_image_tensor_input(self):
- input_shape = [1, 320, 320, 3]
-
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(
- trained_checkpoint_prefix, use_moving_averages=False)
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel()
- output_directory = os.path.join(tmp_dir, 'output')
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory,
- input_shape=input_shape)
- saved_model_path = os.path.join(output_directory, 'saved_model')
- self.assertTrue(
- os.path.exists(os.path.join(saved_model_path, 'saved_model.pb')))
-
- with tf.Graph().as_default() as od_graph:
- with self.test_session(graph=od_graph) as sess:
- meta_graph = tf.saved_model.loader.load(
- sess, [tf.saved_model.tag_constants.SERVING], saved_model_path)
- signature = meta_graph.signature_def['serving_default']
- input_tensor_name = signature.inputs['inputs'].name
- image_tensor = od_graph.get_tensor_by_name(input_tensor_name)
- self.assertSequenceEqual(image_tensor.get_shape().as_list(),
- input_shape)
-
- def test_export_graph_with_tf_example_input(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=False)
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel()
- output_directory = os.path.join(tmp_dir, 'output')
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='tf_example',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- self.assertTrue(os.path.exists(os.path.join(
- output_directory, 'saved_model', 'saved_model.pb')))
-
- def test_export_graph_with_encoded_image_string_input(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=False)
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel()
- output_directory = os.path.join(tmp_dir, 'output')
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='encoded_image_string_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- self.assertTrue(os.path.exists(os.path.join(
- output_directory, 'saved_model', 'saved_model.pb')))
-
- def _get_variables_in_checkpoint(self, checkpoint_file):
- return set([
- var_name
- for var_name, _ in tf.train.list_variables(checkpoint_file)])
-
- def test_replace_variable_values_with_moving_averages(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- new_checkpoint_prefix = os.path.join(tmp_dir, 'new.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- graph = tf.Graph()
- with graph.as_default():
- fake_model = FakeModel()
- preprocessed_inputs = fake_model.preprocess(
- tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]))
- predictions = fake_model.predict(preprocessed_inputs)
- fake_model.postprocess(predictions)
- exporter.replace_variable_values_with_moving_averages(
- graph, trained_checkpoint_prefix, new_checkpoint_prefix)
-
- expected_variables = set(['conv2d/bias', 'conv2d/kernel'])
- variables_in_old_ckpt = self._get_variables_in_checkpoint(
- trained_checkpoint_prefix)
- self.assertIn('conv2d/bias/ExponentialMovingAverage',
- variables_in_old_ckpt)
- self.assertIn('conv2d/kernel/ExponentialMovingAverage',
- variables_in_old_ckpt)
- variables_in_new_ckpt = self._get_variables_in_checkpoint(
- new_checkpoint_prefix)
- self.assertTrue(expected_variables.issubset(variables_in_new_ckpt))
- self.assertNotIn('conv2d/bias/ExponentialMovingAverage',
- variables_in_new_ckpt)
- self.assertNotIn('conv2d/kernel/ExponentialMovingAverage',
- variables_in_new_ckpt)
-
- def test_export_graph_with_moving_averages(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel()
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = True
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- self.assertTrue(os.path.exists(os.path.join(
- output_directory, 'saved_model', 'saved_model.pb')))
- expected_variables = set(['conv2d/bias', 'conv2d/kernel', 'global_step'])
- actual_variables = set(
- [var_name for var_name, _ in tf.train.list_variables(output_directory)])
- self.assertTrue(expected_variables.issubset(actual_variables))
-
- def test_export_model_with_all_output_nodes(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- inference_graph = self._load_inference_graph(inference_graph_path)
- with self.test_session(graph=inference_graph):
- inference_graph.get_tensor_by_name('image_tensor:0')
- inference_graph.get_tensor_by_name('detection_boxes:0')
- inference_graph.get_tensor_by_name('detection_scores:0')
- inference_graph.get_tensor_by_name('detection_classes:0')
- inference_graph.get_tensor_by_name('detection_masks:0')
- inference_graph.get_tensor_by_name('num_detections:0')
-
- def test_export_model_with_detection_only_nodes(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=False)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
- inference_graph = self._load_inference_graph(inference_graph_path)
- with self.test_session(graph=inference_graph):
- inference_graph.get_tensor_by_name('image_tensor:0')
- inference_graph.get_tensor_by_name('detection_boxes:0')
- inference_graph.get_tensor_by_name('detection_scores:0')
- inference_graph.get_tensor_by_name('detection_classes:0')
- inference_graph.get_tensor_by_name('num_detections:0')
- with self.assertRaises(KeyError):
- inference_graph.get_tensor_by_name('detection_masks:0')
-
- def test_export_and_run_inference_with_image_tensor(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='image_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- inference_graph = self._load_inference_graph(inference_graph_path)
- with self.test_session(graph=inference_graph) as sess:
- image_tensor = inference_graph.get_tensor_by_name('image_tensor:0')
- boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
- scores = inference_graph.get_tensor_by_name('detection_scores:0')
- classes = inference_graph.get_tensor_by_name('detection_classes:0')
- masks = inference_graph.get_tensor_by_name('detection_masks:0')
- num_detections = inference_graph.get_tensor_by_name('num_detections:0')
- (boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run(
- [boxes, scores, classes, masks, num_detections],
- feed_dict={image_tensor: np.ones((2, 4, 4, 3)).astype(np.uint8)})
- self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]])
- self.assertAllClose(scores_np, [[0.7, 0.6],
- [0.9, 0.0]])
- self.assertAllClose(classes_np, [[1, 2],
- [2, 1]])
- self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
- self.assertAllClose(num_detections_np, [2, 1])
-
- def _create_encoded_image_string(self, image_array_np, encoding_format):
- od_graph = tf.Graph()
- with od_graph.as_default():
- if encoding_format == 'jpg':
- encoded_string = tf.image.encode_jpeg(image_array_np)
- elif encoding_format == 'png':
- encoded_string = tf.image.encode_png(image_array_np)
- else:
- raise ValueError('Supports only the following formats: `jpg`, `png`')
- with self.test_session(graph=od_graph):
- return encoded_string.eval()
-
- def test_export_and_run_inference_with_encoded_image_string_tensor(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='encoded_image_string_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- inference_graph = self._load_inference_graph(inference_graph_path)
- jpg_image_str = self._create_encoded_image_string(
- np.ones((4, 4, 3)).astype(np.uint8), 'jpg')
- png_image_str = self._create_encoded_image_string(
- np.ones((4, 4, 3)).astype(np.uint8), 'png')
- with self.test_session(graph=inference_graph) as sess:
- image_str_tensor = inference_graph.get_tensor_by_name(
- 'encoded_image_string_tensor:0')
- boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
- scores = inference_graph.get_tensor_by_name('detection_scores:0')
- classes = inference_graph.get_tensor_by_name('detection_classes:0')
- masks = inference_graph.get_tensor_by_name('detection_masks:0')
- num_detections = inference_graph.get_tensor_by_name('num_detections:0')
- for image_str in [jpg_image_str, png_image_str]:
- image_str_batch_np = np.hstack([image_str]* 2)
- (boxes_np, scores_np, classes_np, masks_np,
- num_detections_np) = sess.run(
- [boxes, scores, classes, masks, num_detections],
- feed_dict={image_str_tensor: image_str_batch_np})
- self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]])
- self.assertAllClose(scores_np, [[0.7, 0.6],
- [0.9, 0.0]])
- self.assertAllClose(classes_np, [[1, 2],
- [2, 1]])
- self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
- self.assertAllClose(num_detections_np, [2, 1])
-
- def test_raise_runtime_error_on_images_with_different_sizes(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='encoded_image_string_tensor',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- inference_graph = self._load_inference_graph(inference_graph_path)
- large_image = self._create_encoded_image_string(
- np.ones((4, 4, 3)).astype(np.uint8), 'jpg')
- small_image = self._create_encoded_image_string(
- np.ones((2, 2, 3)).astype(np.uint8), 'jpg')
-
- image_str_batch_np = np.hstack([large_image, small_image])
- with self.test_session(graph=inference_graph) as sess:
- image_str_tensor = inference_graph.get_tensor_by_name(
- 'encoded_image_string_tensor:0')
- boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
- scores = inference_graph.get_tensor_by_name('detection_scores:0')
- classes = inference_graph.get_tensor_by_name('detection_classes:0')
- masks = inference_graph.get_tensor_by_name('detection_masks:0')
- num_detections = inference_graph.get_tensor_by_name('num_detections:0')
- with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
- '^TensorArray has inconsistent shapes.'):
- sess.run([boxes, scores, classes, masks, num_detections],
- feed_dict={image_str_tensor: image_str_batch_np})
-
- def test_export_and_run_inference_with_tf_example(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=True)
- output_directory = os.path.join(tmp_dir, 'output')
- inference_graph_path = os.path.join(output_directory,
- 'frozen_inference_graph.pb')
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='tf_example',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- inference_graph = self._load_inference_graph(inference_graph_path)
- tf_example_np = np.expand_dims(self._create_tf_example(
- np.ones((4, 4, 3)).astype(np.uint8)), axis=0)
- with self.test_session(graph=inference_graph) as sess:
- tf_example = inference_graph.get_tensor_by_name('tf_example:0')
- boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
- scores = inference_graph.get_tensor_by_name('detection_scores:0')
- classes = inference_graph.get_tensor_by_name('detection_classes:0')
- masks = inference_graph.get_tensor_by_name('detection_masks:0')
- num_detections = inference_graph.get_tensor_by_name('num_detections:0')
- (boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run(
- [boxes, scores, classes, masks, num_detections],
- feed_dict={tf_example: tf_example_np})
- self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]])
- self.assertAllClose(scores_np, [[0.7, 0.6],
- [0.9, 0.0]])
- self.assertAllClose(classes_np, [[1, 2],
- [2, 1]])
- self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
- self.assertAllClose(num_detections_np, [2, 1])
-
- def test_export_saved_model_and_run_inference(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=False)
- output_directory = os.path.join(tmp_dir, 'output')
- saved_model_path = os.path.join(output_directory, 'saved_model')
-
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='tf_example',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- tf_example_np = np.hstack([self._create_tf_example(
- np.ones((4, 4, 3)).astype(np.uint8))] * 2)
- with tf.Graph().as_default() as od_graph:
- with self.test_session(graph=od_graph) as sess:
- meta_graph = tf.saved_model.loader.load(
- sess, [tf.saved_model.tag_constants.SERVING], saved_model_path)
-
- signature = meta_graph.signature_def['serving_default']
- input_tensor_name = signature.inputs['inputs'].name
- tf_example = od_graph.get_tensor_by_name(input_tensor_name)
-
- boxes = od_graph.get_tensor_by_name(
- signature.outputs['detection_boxes'].name)
- scores = od_graph.get_tensor_by_name(
- signature.outputs['detection_scores'].name)
- classes = od_graph.get_tensor_by_name(
- signature.outputs['detection_classes'].name)
- masks = od_graph.get_tensor_by_name(
- signature.outputs['detection_masks'].name)
- num_detections = od_graph.get_tensor_by_name(
- signature.outputs['num_detections'].name)
-
- (boxes_np, scores_np, classes_np, masks_np,
- num_detections_np) = sess.run(
- [boxes, scores, classes, masks, num_detections],
- feed_dict={tf_example: tf_example_np})
- self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]])
- self.assertAllClose(scores_np, [[0.7, 0.6],
- [0.9, 0.0]])
- self.assertAllClose(classes_np, [[1, 2],
- [2, 1]])
- self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
- self.assertAllClose(num_detections_np, [2, 1])
-
- def test_export_checkpoint_and_run_inference(self):
- tmp_dir = self.get_temp_dir()
- trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
- self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
- use_moving_averages=False)
- output_directory = os.path.join(tmp_dir, 'output')
- model_path = os.path.join(output_directory, 'model.ckpt')
- meta_graph_path = model_path + '.meta'
-
- with mock.patch.object(
- model_builder, 'build', autospec=True) as mock_builder:
- mock_builder.return_value = FakeModel(add_detection_masks=True)
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.eval_config.use_moving_averages = False
- exporter.export_inference_graph(
- input_type='tf_example',
- pipeline_config=pipeline_config,
- trained_checkpoint_prefix=trained_checkpoint_prefix,
- output_directory=output_directory)
-
- tf_example_np = np.hstack([self._create_tf_example(
- np.ones((4, 4, 3)).astype(np.uint8))] * 2)
- with tf.Graph().as_default() as od_graph:
- with self.test_session(graph=od_graph) as sess:
- new_saver = tf.train.import_meta_graph(meta_graph_path)
- new_saver.restore(sess, model_path)
-
- tf_example = od_graph.get_tensor_by_name('tf_example:0')
- boxes = od_graph.get_tensor_by_name('detection_boxes:0')
- scores = od_graph.get_tensor_by_name('detection_scores:0')
- classes = od_graph.get_tensor_by_name('detection_classes:0')
- masks = od_graph.get_tensor_by_name('detection_masks:0')
- num_detections = od_graph.get_tensor_by_name('num_detections:0')
- (boxes_np, scores_np, classes_np, masks_np,
- num_detections_np) = sess.run(
- [boxes, scores, classes, masks, num_detections],
- feed_dict={tf_example: tf_example_np})
- self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
- [0.5, 0.5, 0.8, 0.8]],
- [[0.5, 0.5, 1.0, 1.0],
- [0.0, 0.0, 0.0, 0.0]]])
- self.assertAllClose(scores_np, [[0.7, 0.6],
- [0.9, 0.0]])
- self.assertAllClose(classes_np, [[1, 2],
- [2, 1]])
- self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
- self.assertAllClose(num_detections_np, [2, 1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config b/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config
deleted file mode 100644
index 6f9a275e..00000000
--- a/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid.config
+++ /dev/null
@@ -1,146 +0,0 @@
-# Faster R-CNN with Inception Resnet v2, Atrous version;
-# Configured for Open Images Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 546
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- first_stage_features_stride: 8
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 8
- width_stride: 8
- }
- }
- first_stage_atrous_rate: 2
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.00006
- schedule {
- step: 0
- learning_rate: .00006
- }
- schedule {
- step: 6000000
- learning_rate: .000006
- }
- schedule {
- step: 7000000
- learning_rate: .0000006
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- # Note: The below line limits the training process to 800K steps, which we
- # empirically found to be sufficient enough to train the Open Images dataset.
- # This effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 8000000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_label_map.pbtxt"
-}
-
-eval_config: {
- metrics_set: "open_images_metrics"
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/oid_bbox_trainable_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/g3doc/configuring_jobs.md b/object_detection/g3doc/configuring_jobs.md
deleted file mode 100644
index 78f77bc4..00000000
--- a/object_detection/g3doc/configuring_jobs.md
+++ /dev/null
@@ -1,162 +0,0 @@
-# Configuring the Object Detection Training Pipeline
-
-## Overview
-
-The Tensorflow Object Detection API uses protobuf files to configure the
-training and evaluation process. The schema for the training pipeline can be
-found in object_detection/protos/pipeline.proto. At a high level, the config
-file is split into 5 parts:
-
-1. The `model` configuration. This defines what type of model will be trained
-(ie. meta-architecture, feature extractor).
-2. The `train_config`, which decides what parameters should be used to train
-model parameters (ie. SGD parameters, input preprocessing and feature extractor
-initialization values).
-3. The `eval_config`, which determines what set of metrics will be reported for
-evaluation (currently we only support the PASCAL VOC metrics).
-4. The `train_input_config`, which defines what dataset the model should be
-trained on.
-5. The `eval_input_config`, which defines what dataset the model will be
-evaluated on. Typically this should be different than the training input
-dataset.
-
-A skeleton configuration file is shown below:
-
-```
-model {
-(... Add model config here...)
-}
-
-train_config : {
-(... Add train_config here...)
-}
-
-train_input_reader: {
-(... Add train_input configuration here...)
-}
-
-eval_config: {
-}
-
-eval_input_reader: {
-(... Add eval_input configuration here...)
-}
-```
-
-## Picking Model Parameters
-
-There are a large number of model parameters to configure. The best settings
-will depend on your given application. Faster R-CNN models are better suited to
-cases where high accuracy is desired and latency is of lower priority.
-Conversely, if processing time is the most important factor, SSD models are
-recommended. Read [our paper](https://arxiv.org/abs/1611.10012) for a more
-detailed discussion on the speed vs accuracy tradeoff.
-
-To help new users get started, sample model configurations have been provided
-in the object_detection/samples/model_configs folder. The contents of these
-configuration files can be pasted into `model` field of the skeleton
-configuration. Users should note that the `num_classes` field should be changed
-to a value suited for the dataset the user is training on.
-
-## Defining Inputs
-
-The Tensorflow Object Detection API accepts inputs in the TFRecord file format.
-Users must specify the locations of both the training and evaluation files.
-Additionally, users should also specify a label map, which define the mapping
-between a class id and class name. The label map should be identical between
-training and evaluation datasets.
-
-An example input configuration looks as follows:
-
-```
-tf_record_input_reader {
- input_path: "/usr/home/username/data/train.record"
-}
-label_map_path: "/usr/home/username/data/label_map.pbtxt"
-```
-
-Users should substitute the `input_path` and `label_map_path` arguments and
-insert the input configuration into the `train_input_reader` and
-`eval_input_reader` fields in the skeleton configuration. Note that the paths
-can also point to Google Cloud Storage buckets (ie.
-"gs://project_bucket/train.record") for use on Google Cloud.
-
-## Configuring the Trainer
-
-The `train_config` defines parts of the training process:
-
-1. Model parameter initialization.
-2. Input preprocessing.
-3. SGD parameters.
-
-A sample `train_config` is below:
-
-```
-batch_size: 1
-optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0002
- schedule {
- step: 0
- learning_rate: .0002
- }
- schedule {
- step: 900000
- learning_rate: .00002
- }
- schedule {
- step: 1200000
- learning_rate: .000002
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
-}
-fine_tune_checkpoint: "/usr/home/username/tmp/model.ckpt-#####"
-from_detection_checkpoint: true
-gradient_clipping_by_norm: 10.0
-data_augmentation_options {
- random_horizontal_flip {
- }
-}
-```
-
-### Model Parameter Initialization
-
-While optional, it is highly recommended that users utilize other object
-detection checkpoints. Training an object detector from scratch can take days.
-To speed up the training process, it is recommended that users re-use the
-feature extractor parameters from a pre-existing object classification or
-detection checkpoint. `train_config` provides two fields to specify
-pre-existing checkpoints: `fine_tune_checkpoint` and
-`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to
-the pre-existing checkpoint
-(ie:"/usr/home/username/checkpoint/model.ckpt-#####").
-`from_detection_checkpoint` is a boolean value. If false, it assumes the
-checkpoint was from an object classification checkpoint. Note that starting
-from a detection checkpoint will usually result in a faster training job than
-a classification checkpoint.
-
-The list of provided checkpoints can be found [here](detection_model_zoo.md).
-
-### Input Preprocessing
-
-The `data_augmentation_options` in `train_config` can be used to specify
-how training data can be modified. This field is optional.
-
-### SGD Parameters
-
-The remainings parameters in `train_config` are hyperparameters for gradient
-descent. Please note that the optimal learning rates provided in these
-configuration files may depend on the specifics of the training setup (e.g.
-number of workers, gpu type).
-
-## Configuring the Evaluator
-
-Currently evaluation is fixed to generating metrics as defined by the PASCAL VOC
-challenge. The parameters for `eval_config` are set to reasonable defaults and
-typically do not need to be configured.
diff --git a/object_detection/g3doc/defining_your_own_model.md b/object_detection/g3doc/defining_your_own_model.md
deleted file mode 100644
index 865f6af1..00000000
--- a/object_detection/g3doc/defining_your_own_model.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# So you want to create a new model!
-
-In this section, we discuss some of the abstractions that we use
-for defining detection models. If you would like to define a new model
-architecture for detection and use it in the Tensorflow Detection API,
-then this section should also serve as a high level guide to the files that you
-will need to edit to get your new model working.
-
-## DetectionModels (`object_detection/core/model.py`)
-
-In order to be trained, evaluated, and exported for serving using our
-provided binaries, all models under the Tensorflow Object Detection API must
-implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular,
-each of these models are responsible for implementing 5 functions:
-
-* `preprocess`: Run any preprocessing (e.g., scaling/shifting/reshaping) of
- input values that is necessary prior to running the detector on an input
- image.
-* `predict`: Produce “raw” prediction tensors that can be passed to loss or
- postprocess functions.
-* `postprocess`: Convert predicted output tensors to final detections.
-* `loss`: Compute scalar loss tensors with respect to provided groundtruth.
-* `restore`: Load a checkpoint into the Tensorflow graph.
-
-Given a `DetectionModel` at training time, we pass each image batch through
-the following sequence of functions to compute a loss which can be optimized via
-SGD:
-
-```
-inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
-```
-
-And at eval time, we pass each image batch through the following sequence of
-functions to produce a set of detections:
-
-```
-inputs (images tensor) -> preprocess -> predict -> postprocess ->
- outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
-```
-
-Some conventions to be aware of:
-
-* `DetectionModel`s should make no assumptions about the input size or aspect
- ratio --- they are responsible for doing any resize/reshaping necessary
- (see docstring for the `preprocess` function).
-* Output classes are always integers in the range `[0, num_classes)`.
- Any mapping of these integers to semantic labels is to be handled outside
- of this class. We never explicitly emit a “background class” --- thus 0 is
- the first non-background class and any logic of predicting and removing
- implicit background classes must be handled internally by the implementation.
-* Detected boxes are to be interpreted as being in
- `[y_min, x_min, y_max, x_max]` format and normalized relative to the
- image window.
-* We do not specifically assume any kind of probabilistic interpretation of the
- scores --- the only important thing is their relative ordering. Thus
- implementations of the postprocess function are free to output logits,
- probabilities, calibrated probabilities, or anything else.
-
-## Defining a new Faster R-CNN or SSD Feature Extractor
-
-In most cases, you probably will not implement a `DetectionModel` from scratch
---- instead you might create a new feature extractor to be used by one of the
-SSD or Faster R-CNN meta-architectures. (We think of meta-architectures as
-classes that define entire families of models using the `DetectionModel`
-abstraction).
-
-Note: For the following discussion to make sense, we recommend first becoming
-familiar with the [Faster R-CNN](https://arxiv.org/abs/1506.01497) paper.
-
-Let’s now imagine that you have invented a brand new network architecture
-(say, “InceptionV100”) for classification and want to see how InceptionV100
-would behave as a feature extractor for detection (say, with Faster R-CNN).
-A similar procedure would hold for SSD models, but we’ll discuss Faster R-CNN.
-
-To use InceptionV100, we will have to define a new
-`FasterRCNNFeatureExtractor` and pass it to our `FasterRCNNMetaArch`
-constructor as input. See
-`object_detection/meta_architectures/faster_rcnn_meta_arch.py` for definitions
-of `FasterRCNNFeatureExtractor` and `FasterRCNNMetaArch`, respectively.
-A `FasterRCNNFeatureExtractor` must define a few
-functions:
-
-* `preprocess`: Run any preprocessing of input values that is necessary prior
- to running the detector on an input image.
-* `_extract_proposal_features`: Extract first stage Region Proposal Network
- (RPN) features.
-* `_extract_box_classifier_features`: Extract second stage Box Classifier
- features.
-* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the
- Tensorflow graph.
-
-See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py`
-definition as one example. Some remarks:
-
-* We typically initialize the weights of this feature extractor
- using those from the
- [Slim Resnet-101 classification checkpoint](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models),
- and we know
- that images were preprocessed when training this checkpoint
- by subtracting a channel mean from each input
- image. Thus, we implement the preprocess function to replicate the same
- channel mean subtraction behavior.
-* The “full” resnet classification network defined in slim is cut into two
- parts --- all but the last “resnet block” is put into the
- `_extract_proposal_features` function and the final block is separately
- defined in the `_extract_box_classifier_features function`. In general,
- some experimentation may be required to decide on an optimal layer at
- which to “cut” your feature extractor into these two pieces for Faster R-CNN.
-
-## Register your model for configuration
-
-Assuming that your new feature extractor does not require nonstandard
-configuration, you will want to ideally be able to simply change the
-“feature_extractor.type” fields in your configuration protos to point to a
-new feature extractor. In order for our API to know how to understand this
-new type though, you will first have to register your new feature
-extractor with the model builder (`object_detection/builders/model_builder.py`),
-whose job is to create models from config protos..
-
-Registration is simple --- just add a pointer to the new Feature Extractor
-class that you have defined in one of the SSD or Faster R-CNN Feature
-Extractor Class maps at the top of the
-`object_detection/builders/model_builder.py` file.
-We recommend adding a test in `object_detection/builders/model_builder_test.py`
-to make sure that parsing your proto will work as expected.
-
-## Taking your new model for a spin
-
-After registration you are ready to go with your model! Some final tips:
-
-* To save time debugging, try running your configuration file locally first
- (both training and evaluation).
-* Do a sweep of learning rates to figure out which learning rate is best
- for your model.
-* A small but often important detail: you may find it necessary to disable
- batchnorm training (that is, load the batch norm parameters from the
- classification checkpoint, but do not update them during gradient descent).
diff --git a/object_detection/g3doc/detection_model_zoo.md b/object_detection/g3doc/detection_model_zoo.md
deleted file mode 100644
index d6d31e0d..00000000
--- a/object_detection/g3doc/detection_model_zoo.md
+++ /dev/null
@@ -1,101 +0,0 @@
-# Tensorflow detection model zoo
-
-We provide a collection of detection models pre-trained on the [COCO
-dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/), and the
-[Open Images dataset](https://github.com/openimages/dataset). These models can
-be useful for
-out-of-the-box inference if you are interested in categories already in COCO
-(e.g., humans, cars, etc) or in Open Images (e.g.,
-surfboard, jacuzzi, etc). They are also useful for initializing your models when
-training on novel datasets.
-
-In the table below, we list each such pre-trained model including:
-
-* a model name that corresponds to a config file that was used to train this
- model in the `samples/configs` directory,
-* a download link to a tar.gz file containing the pre-trained model,
-* model speed --- we report running time in ms per 600x600 image (including all
- pre and post-processing), but please be
- aware that these timings depend highly on one's specific hardware
- configuration (these timings were performed using an Nvidia
- GeForce GTX TITAN X card) and should be treated more as relative timings in
- many cases.
-* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
- Here, higher is better, and we only report bounding box mAP rounded to the
- nearest integer.
-* Output types (currently only `Boxes`)
-
-You can un-tar each tar.gz file via, e.g.,:
-
-```
-tar -xzvf ssd_mobilenet_v1_coco.tar.gz
-```
-
-Inside the un-tar'ed directory, you will find:
-
-* a graph proto (`graph.pbtxt`)
-* a checkpoint
- (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`)
-* a frozen graph proto with weights baked into the graph as constants
- (`frozen_inference_graph.pb`) to be used for out of the box inference
- (try this out in the Jupyter notebook!)
-* a config file (`pipeline.config`) which was used to generate the graph. These
- directly correspond to a config file in the
- [samples/configs](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs)) directory but often with a modified score threshold. In the case
- of the heavier Faster R-CNN models, we also provide a version of the model
- that uses a highly reduced number of proposals for speed.
-
-Some remarks on frozen inference graphs:
-
-* If you try to evaluate the frozen graph, you may find performance numbers for
- some of the models to be slightly lower than what we report in the below
- tables. This is because we discard detections with scores below a
- threshold (typically 0.3) when creating the frozen graph. This corresponds
- effectively to picking a point on the precision recall curve of
- a detector (and discarding the part past that point), which negatively impacts
- standard mAP metrics.
-* Our frozen inference graphs are generated using the
- [v1.4.0](https://github.com/tensorflow/tensorflow/tree/v1.4.0)
- release version of Tensorflow and we do not guarantee that these will work
- with other versions; this being said, each frozen inference graph can be
- regenerated using your current version of Tensorflow by re-running the
- [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md),
- pointing it at the model directory as well as the config file inside of it.
-
-
-## COCO-trained models {#coco-models}
-
-| Model name | Speed (ms) | COCO mAP[^1] | Outputs |
-| ------------ | :--------------: | :--------------: | :-------------: |
-| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
-| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
-| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2017_11_08.tar.gz) | 58 | 28 | Boxes |
-| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2017_11_08.tar.gz) | 89 | 30 | Boxes |
-| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2017_11_08.tar.gz) | 64 | | Boxes |
-| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2017_11_08.tar.gz) | 92 | 30 | Boxes |
-| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2017_11_08.tar.gz) | 106 | 32 | Boxes |
-| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2017_11_08.tar.gz) | 82 | | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2017_11_08.tar.gz) | 620 | 37 | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2017_11_08.tar.gz) | 241 | | Boxes |
-| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2017_11_08.tar.gz) | 1833 | 43 | Boxes |
-| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2017_11_08.tar.gz) | 540 | | Boxes |
-
-
-
-## Kitti-trained models {#kitti-models}
-
-Model name | Speed (ms) | Pascal mAP@0.5 (ms) | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----:
-[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2017_11_08.tar.gz) | 79 | 87 | Boxes
-
-## Open Images-trained models {#open-images-models}
-
-Model name | Speed (ms) | Open Images mAP@0.5[^2] | Outputs
------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---: | :-------------: | :-----:
-[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08.tar.gz) | 727 | 37 | Boxes
-[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2017_11_08.tar.gz) | 347 | | Boxes
-
-
-[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
-[^2]: This is PASCAL mAP with a slightly different way of true positives computation: see [Open Images evaluation protocol](evaluation_protocols.md#open-images).
-
diff --git a/object_detection/g3doc/evaluation_protocols.md b/object_detection/g3doc/evaluation_protocols.md
deleted file mode 100644
index 033a1adf..00000000
--- a/object_detection/g3doc/evaluation_protocols.md
+++ /dev/null
@@ -1,94 +0,0 @@
-# Supported object detection evaluation protocols
-
-The Tensorflow Object Detection API currently supports three evaluation protocols,
-that can be configured in `EvalConfig` by setting `metrics_set` to the
-corresponding value.
-
-## PASCAL VOC 2007 metric
-
-`EvalConfig.metrics_set='pascal_voc_metrics'`
-
-The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007.
-The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf).
-
-
-## Weighted PASCAL VOC metric
-
-`EvalConfig.metrics_set='weighted_pascal_voc_metrics'`
-
-The weighted PASCAL metric computes the mean average precision as the average
-precision when treating all classes as a single class. In comparison,
-PASCAL metrics computes the mean average precision as the mean of the
-per-class average precisions.
-
-For example, the test set consists of two classes, "cat" and "dog", and there are ten times more boxes of "cat" than those of "dog".
-According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value,
-while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class.
-
-## Open Images metric {#open-images}
-
-`EvalConfig.metrics_set='open_images_metrics'`
-
-This metric is defined originally for evaluating detector performance on [Open Images V2 dataset](https://github.com/openimages/dataset)
-and is fairly similar to the PASCAL VOC 2007 metric mentioned above.
-It computes interpolated average precision (AP) for each class and averages it among all classes (mAP).
-
-The difference to the PASCAL VOC 2007 metric is the following: Open Images
-annotations contain `group-of` ground-truth boxes (see [Open Images data
-description](https://github.com/openimages/dataset#annotations-human-bboxcsv)),
-that are treated differently for the purpose of deciding whether detections are
-"true positives", "ignored", "false positives". Here we define these three
-cases:
-
-A detection is a "true positive" if there is a non-group-of ground-truth box,
-such that:
-
-* The detection box and the ground-truth box are of the same class, and
- intersection-over-union (IoU) between the detection box and the ground-truth
- box is greater than the IoU threshold (default value 0.5). \
- Illustration of handling non-group-of boxes: \
- {width="500" height="270"}
-
- * yellow box - ground-truth box;
- * green box - true positive;
- * red boxes - false positives.
-
-* This is the highest scoring detection for this ground truth box that
- satisfies the criteria above.
-
-A detection is "ignored" if it is not a true positive, and there is a `group-of`
-ground-truth box such that:
-
-* The detection box and the ground-truth box are of the same class, and the
- area of intersection between the detection box and the ground-truth box
- divided by the area of the detection is greater than 0.5. This is intended
- to measure whether the detection box is approximately inside the group-of
- ground-truth box. \
- Illustration of handling `group-of` boxes: \
- {width="500" height="270"}
-
- * yellow box - ground-truth box;
- * grey boxes - two detections on cars, that are ignored;
- * red box - false positive.
-
-A detection is a "false positive" if it is neither a "true positive" nor
-"ignored".
-
-Precision and recall are defined as:
-
-* Precision = number-of-true-positives/(number-of-true-positives + number-of-false-positives)
-* Recall = number-of-true-positives/number-of-non-group-of-boxes
-
-Note that detections ignored as firing on a `group-of` ground-truth box do not
-contribute to the number of true positives.
-
-The labels in Open Images are organized in a
-[hierarchy](https://storage.googleapis.com/openimages/2017_07/bbox_labels_vis/bbox_labels_vis.html).
-Ground-truth bounding-boxes are annotated with the most specific class available
-in the hierarchy. For example, "car" has two children "limousine" and "van". Any
-other kind of car is annotated as "car" (for example, a sedan). Given this
-convention, the evaluation software treats all classes independently, ignoring
-the hierarchy. To achieve high performance values, object detectors should
-output bounding-boxes labelled in the same manner.
diff --git a/object_detection/g3doc/exporting_models.md b/object_detection/g3doc/exporting_models.md
deleted file mode 100644
index 2da97908..00000000
--- a/object_detection/g3doc/exporting_models.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Exporting a trained model for inference
-
-After your model has been trained, you should export it to a Tensorflow
-graph proto. A checkpoint will typically consist of three files:
-
-* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001,
-* model.ckpt-${CHECKPOINT_NUMBER}.index
-* model.ckpt-${CHECKPOINT_NUMBER}.meta
-
-After you've identified a candidate checkpoint to export, run the following
-command from tensorflow/models/research/:
-
-``` bash
-# From tensorflow/models/research/
-python object_detection/export_inference_graph.py \
- --input_type image_tensor \
- --pipeline_config_path ${PIPELINE_CONFIG_PATH} \
- --trained_checkpoint_prefix ${TRAIN_PATH} \
- --output_directory output_inference_graph.pb
-```
-
-Afterwards, you should see a graph named output_inference_graph.pb.
diff --git a/object_detection/g3doc/img/dogs_detections_output.jpg b/object_detection/g3doc/img/dogs_detections_output.jpg
deleted file mode 100644
index 9e88a701..00000000
Binary files a/object_detection/g3doc/img/dogs_detections_output.jpg and /dev/null differ
diff --git a/object_detection/g3doc/img/example_cat.jpg b/object_detection/g3doc/img/example_cat.jpg
deleted file mode 100644
index 74c7ef4b..00000000
Binary files a/object_detection/g3doc/img/example_cat.jpg and /dev/null differ
diff --git a/object_detection/g3doc/img/groupof_case_eval.png b/object_detection/g3doc/img/groupof_case_eval.png
deleted file mode 100644
index 5abc9b69..00000000
Binary files a/object_detection/g3doc/img/groupof_case_eval.png and /dev/null differ
diff --git a/object_detection/g3doc/img/kites_detections_output.jpg b/object_detection/g3doc/img/kites_detections_output.jpg
deleted file mode 100644
index 7c0f3364..00000000
Binary files a/object_detection/g3doc/img/kites_detections_output.jpg and /dev/null differ
diff --git a/object_detection/g3doc/img/nongroupof_case_eval.png b/object_detection/g3doc/img/nongroupof_case_eval.png
deleted file mode 100644
index cbb76f49..00000000
Binary files a/object_detection/g3doc/img/nongroupof_case_eval.png and /dev/null differ
diff --git a/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg b/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg
deleted file mode 100644
index 1e9412ad..00000000
Binary files a/object_detection/g3doc/img/oid_bus_72e19c28aac34ed8.jpg and /dev/null differ
diff --git a/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg b/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg
deleted file mode 100644
index 46b1fb28..00000000
Binary files a/object_detection/g3doc/img/oid_monkey_3b4168c89cecbc5b.jpg and /dev/null differ
diff --git a/object_detection/g3doc/img/oxford_pet.png b/object_detection/g3doc/img/oxford_pet.png
deleted file mode 100644
index ddac415f..00000000
Binary files a/object_detection/g3doc/img/oxford_pet.png and /dev/null differ
diff --git a/object_detection/g3doc/img/tensorboard.png b/object_detection/g3doc/img/tensorboard.png
deleted file mode 100644
index fbcdbeb3..00000000
Binary files a/object_detection/g3doc/img/tensorboard.png and /dev/null differ
diff --git a/object_detection/g3doc/img/tensorboard2.png b/object_detection/g3doc/img/tensorboard2.png
deleted file mode 100644
index 97ad22da..00000000
Binary files a/object_detection/g3doc/img/tensorboard2.png and /dev/null differ
diff --git a/object_detection/g3doc/img/tf-od-api-logo.png b/object_detection/g3doc/img/tf-od-api-logo.png
deleted file mode 100644
index 9fa9cc9d..00000000
Binary files a/object_detection/g3doc/img/tf-od-api-logo.png and /dev/null differ
diff --git a/object_detection/g3doc/installation.md b/object_detection/g3doc/installation.md
deleted file mode 100644
index 81b7503b..00000000
--- a/object_detection/g3doc/installation.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Installation
-
-## Dependencies
-
-Tensorflow Object Detection API depends on the following libraries:
-
-* Protobuf 2.6
-* Pillow 1.0
-* lxml
-* tf Slim (which is included in the "tensorflow/models/research/" checkout)
-* Jupyter notebook
-* Matplotlib
-* Tensorflow
-
-For detailed steps to install Tensorflow, follow the [Tensorflow installation
-instructions](https://www.tensorflow.org/install/). A typical user can install
-Tensorflow using one of the following commands:
-
-``` bash
-# For CPU
-pip install tensorflow
-# For GPU
-pip install tensorflow-gpu
-```
-
-The remaining libraries can be installed on Ubuntu 16.04 using via apt-get:
-
-``` bash
-sudo apt-get install protobuf-compiler python-pil python-lxml
-sudo pip install jupyter
-sudo pip install matplotlib
-```
-
-Alternatively, users can install dependencies using pip:
-
-``` bash
-sudo pip install pillow
-sudo pip install lxml
-sudo pip install jupyter
-sudo pip install matplotlib
-```
-
-## Protobuf Compilation
-
-The Tensorflow Object Detection API uses Protobufs to configure model and
-training parameters. Before the framework can be used, the Protobuf libraries
-must be compiled. This should be done by running the following command from
-the tensorflow/models/research/ directory:
-
-
-``` bash
-# From tensorflow/models/research/
-protoc object_detection/protos/*.proto --python_out=.
-```
-
-## Add Libraries to PYTHONPATH
-
-When running locally, the tensorflow/models/research/ and slim directories
-should be appended to PYTHONPATH. This can be done by running the following from
-tensorflow/models/research/:
-
-
-``` bash
-# From tensorflow/models/research/
-export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
-```
-
-Note: This command needs to run from every new terminal you start. If you wish
-to avoid running this manually, you can add it as a new line to the end of your
-~/.bashrc file.
-
-# Testing the Installation
-
-You can test that you have correctly installed the Tensorflow Object Detection\
-API by running the following command:
-
-```bash
-python object_detection/builders/model_builder_test.py
-```
diff --git a/object_detection/g3doc/oid_inference_and_evaluation.md b/object_detection/g3doc/oid_inference_and_evaluation.md
deleted file mode 100644
index 164fdc2c..00000000
--- a/object_detection/g3doc/oid_inference_and_evaluation.md
+++ /dev/null
@@ -1,255 +0,0 @@
-# Inference and evaluation on the Open Images dataset
-
-This page presents a tutorial for running object detector inference and
-evaluation measure computations on the [Open Images
-dataset](https://github.com/openimages/dataset), using tools from the
-[TensorFlow Object Detection
-API](https://github.com/tensorflow/models/tree/master/research/object_detection).
-It shows how to download the images and annotations for the validation and test
-sets of Open Images; how to package the downloaded data in a format understood
-by the Object Detection API; where to find a trained object detector model for
-Open Images; how to run inference; and how to compute evaluation measures on the
-inferred detections.
-
-Inferred detections will look like the following:
-
-{height="300"}
-{height="300"}
-
-On the validation set of Open Images, this tutorial requires 27GB of free disk
-space and the inference step takes approximately 9 hours on a single NVIDIA
-Tesla P100 GPU. On the test set -- 75GB and 27 hours respectively. All other
-steps require less than two hours in total on both sets.
-
-## Installing TensorFlow, the Object Detection API, and Google Cloud SDK
-
-Please run through the [installation instructions](installation.md) to install
-TensorFlow and all its dependencies. Ensure the Protobuf libraries are compiled
-and the library directories are added to `PYTHONPATH`. You will also need to
-`pip` install `pandas` and `contextlib2`.
-
-Some of the data used in this tutorial lives in Google Cloud buckets. To access
-it, you will have to [install the Google Cloud
-SDK](https://cloud.google.com/sdk/downloads) on your workstation or laptop.
-
-## Preparing the Open Images validation and test sets
-
-In order to run inference and subsequent evaluation measure computations, we
-require a dataset of images and ground truth boxes, packaged as TFRecords of
-TFExamples. To create such a dataset for Open Images, you will need to first
-download ground truth boxes from the [Open Images
-website](https://github.com/openimages/dataset):
-
-```bash
-# From tensorflow/models/research
-mkdir oid
-cd oid
-wget https://storage.googleapis.com/openimages/2017_07/annotations_human_bbox_2017_07.tar.gz
-tar -xvf annotations_human_bbox_2017_07.tar.gz
-```
-
-Next, download the images. In this tutorial, we will use lower resolution images
-provided by [CVDF](http://www.cvdfoundation.org). Please follow the instructions
-on [CVDF's Open Images repository
-page](https://github.com/cvdfoundation/open-images-dataset) in order to gain
-access to the cloud bucket with the images. Then run:
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # Set SPLIT to "test" to download the images in the test set
-mkdir raw_images_${SPLIT}
-gsutil -m rsync -r gs://open-images-dataset/$SPLIT raw_images_${SPLIT}
-```
-
-Another option for downloading the images is to follow the URLs contained in the
-[image URLs and metadata CSV
-files](https://storage.googleapis.com/openimages/2017_07/images_2017_07.tar.gz)
-on the Open Images website.
-
-At this point, your `tensorflow/models/research/oid` directory should appear as
-follows:
-
-```lang-none
-|-- 2017_07
-| |-- test
-| | `-- annotations-human-bbox.csv
-| |-- train
-| | `-- annotations-human-bbox.csv
-| `-- validation
-| `-- annotations-human-bbox.csv
-|-- raw_images_validation (if you downloaded the validation split)
-| `-- ... (41,620 files matching regex "[0-9a-f]{16}.jpg")
-|-- raw_images_test (if you downloaded the test split)
-| `-- ... (125,436 files matching regex "[0-9a-f]{16}.jpg")
-`-- annotations_human_bbox_2017_07.tar.gz
-```
-
-Next, package the data into TFRecords of TFExamples by running:
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # Set SPLIT to "test" to create TFRecords for the test split
-mkdir ${SPLIT}_tfrecords
-
-PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \
-python -m object_detection/dataset_tools/create_oid_tf_record \
- --input_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \
- --input_images_directory raw_images_${SPLIT} \
- --input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \
- --output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \
- --num_shards=100
-```
-
-This results in 100 TFRecord files (shards), written to
-`oid/${SPLIT}_tfrecords`, with filenames matching
-`${SPLIT}.tfrecord-000[0-9][0-9]-of-00100`. Each shard contains approximately
-the same number of images and is defacto a representative random sample of the
-input data. [This enables](#accelerating_inference) a straightforward work
-division scheme for distributing inference and also approximate measure
-computations on subsets of the validation and test sets.
-
-## Inferring detections
-
-Inference requires a trained object detection model. In this tutorial we will
-use a model from the [detections model zoo](detection_model_zoo.md), which can
-be downloaded and unpacked by running the commands below. More information about
-the model, such as its architecture and how it was trained, is available in the
-[model zoo page](detection_model_zoo.md).
-
-```bash
-# From tensorflow/models/research/oid
-wget http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz
-tar -zxvf faster_rcnn_inception_resnet_v2_atrous_oid_14_10_2017.tar.gz
-```
-
-At this point, data is packed into TFRecords and we have an object detector
-model. We can run inference using:
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # or test
-TF_RECORD_FILES=$(ls -1 ${SPLIT}_tfrecords/* | tr '\n' ',')
-
-PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \
-python -m object_detection/inference/infer_detections \
- --input_tfrecord_paths=$TF_RECORD_FILES \
- --output_tfrecord_path=${SPLIT}_detections.tfrecord-00000-of-00001 \
- --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \
- --discard_image_pixels
-```
-
-Inference preserves all fields of the input TFExamples, and adds new fields to
-store the inferred detections. This allows [computing evaluation
-measures](#compute_evaluation_measures) on the output TFRecord alone, as ground
-truth boxes are preserved as well. Since measure computations don't require
-access to the images, `infer_detections` can optionally discard them with the
-`--discard_image_pixels` flag. Discarding the images drastically reduces the
-size of the output TFRecord.
-
-### Accelerating inference {#accelerating_inference}
-
-Running inference on the whole validation or test set can take a long time to
-complete due to the large number of images present in these sets (41,620 and
-125,436 respectively). For quick but approximate evaluation, inference and the
-subsequent measure computations can be run on a small number of shards. To run
-for example on 2% of all the data, it is enough to set `TF_RECORD_FILES` as
-shown below before running `infer_detections`:
-
-```bash
-TF_RECORD_FILES=$(ls ${SPLIT}_tfrecords/${SPLIT}.tfrecord-0000[0-1]-of-00100 | tr '\n' ',')
-```
-
-Please note that computing evaluation measures on a small subset of the data
-introduces variance and bias, since some classes of objects won't be seen during
-evaluation. In the example above, this leads to 13.2% higher mAP on the first
-two shards of the validation set compared to the mAP for the full set ([see mAP
-results](#expected-maps)).
-
-Another way to accelerate inference is to run it in parallel on multiple
-TensorFlow devices on possibly multiple machines. The script below uses
-[tmux](https://github.com/tmux/tmux/wiki) to run a separate `infer_detections`
-process for each GPU on different partition of the input data.
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # or test
-NUM_GPUS=4
-NUM_SHARDS=100
-
-tmux new-session -d -s "inference"
-function tmux_start { tmux new-window -d -n "inference:GPU$1" "${*:2}; exec bash"; }
-for gpu_index in $(seq 0 $(($NUM_GPUS-1))); do
- start_shard=$(( $gpu_index * $NUM_SHARDS / $NUM_GPUS ))
- end_shard=$(( ($gpu_index + 1) * $NUM_SHARDS / $NUM_GPUS - 1))
- TF_RECORD_FILES=$(seq -s, -f "${SPLIT}_tfrecords/${SPLIT}.tfrecord-%05.0f-of-$(printf '%05d' $NUM_SHARDS)" $start_shard $end_shard)
- tmux_start ${gpu_index} \
- PYTHONPATH=$PYTHONPATH:$(readlink -f ..) CUDA_VISIBLE_DEVICES=$gpu_index \
- python -m object_detection/inference/infer_detections \
- --input_tfrecord_paths=$TF_RECORD_FILES \
- --output_tfrecord_path=${SPLIT}_detections.tfrecord-$(printf "%05d" $gpu_index)-of-$(printf "%05d" $NUM_GPUS) \
- --inference_graph=faster_rcnn_inception_resnet_v2_atrous_oid/frozen_inference_graph.pb \
- --discard_image_pixels
-done
-```
-
-After all `infer_detections` processes finish, `tensorflow/models/research/oid`
-will contain one output TFRecord from each process, with name matching
-`validation_detections.tfrecord-0000[0-3]-of-00004`.
-
-## Computing evaluation measures {#compute_evaluation_measures}
-
-To compute evaluation measures on the inferred detections you first need to
-create the appropriate configuration files:
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # or test
-NUM_SHARDS=1 # Set to NUM_GPUS if using the parallel evaluation script above
-
-mkdir -p ${SPLIT}_eval_metrics
-
-echo "
-label_map_path: '../object_detection/data/oid_bbox_trainable_label_map.pbtxt'
-tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS}' }
-" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt
-
-echo "
-metrics_set: 'open_images_metrics'
-" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt
-```
-
-And then run:
-
-```bash
-# From tensorflow/models/research/oid
-SPLIT=validation # or test
-
-PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \
-python -m object_detection/metrics/offline_eval_map_corloc \
- --eval_dir=${SPLIT}_eval_metrics \
- --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \
- --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt
-```
-
-The first configuration file contains an `object_detection.protos.InputReader`
-message that describes the location of the necessary input files. The second
-file contains an `object_detection.protos.EvalConfig` message that describes the
-evaluation metric. For more information about these protos see the corresponding
-source files.
-
-### Expected mAPs {#expected-maps}
-
-The result of running `offline_eval_map_corloc` is a CSV file located at
-`${SPLIT}_eval_metrics/metrics.csv`. With the above configuration, the file will
-contain average precision at IoU≥0.5 for each of the classes present in the
-dataset. It will also contain the mAP@IoU≥0.5. Both the per-class average
-precisions and the mAP are computed according to the [Open Images evaluation
-protocol](evaluation_protocols.md). The expected mAPs for the validation and
-test sets of Open Images in this case are:
-
-Set | Fraction of data | Images | mAP@IoU≥0.5
----------: | :--------------: | :-----: | -----------
-validation | everything | 41,620 | 39.2%
-validation | first 2 shards | 884 | 52.4%
-test | everything | 125,436 | 37.7%
-test | first 2 shards | 2,476 | 50.8%
diff --git a/object_detection/g3doc/preparing_inputs.md b/object_detection/g3doc/preparing_inputs.md
deleted file mode 100644
index d9d290d2..00000000
--- a/object_detection/g3doc/preparing_inputs.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Preparing Inputs
-
-Tensorflow Object Detection API reads data using the TFRecord file format. Two
-sample scripts (`create_pascal_tf_record.py` and `create_pet_tf_record.py`) are
-provided to convert from the PASCAL VOC dataset and Oxford-IIIT Pet dataset to
-TFRecords.
-
-## Generating the PASCAL VOC TFRecord files.
-
-The raw 2012 PASCAL VOC data set is located
-[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar).
-To download, extract and convert it to TFRecords, run the following commands
-below:
-
-```bash
-# From tensorflow/models/research/
-wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
-tar -xvf VOCtrainval_11-May-2012.tar
-python object_detection/dataset_tools/create_pascal_tf_record.py \
- --label_map_path=object_detection/data/pascal_label_map.pbtxt \
- --data_dir=VOCdevkit --year=VOC2012 --set=train \
- --output_path=pascal_train.record
-python object_detection/dataset_tools/create_pascal_tf_record.py \
- --label_map_path=object_detection/data/pascal_label_map.pbtxt \
- --data_dir=VOCdevkit --year=VOC2012 --set=val \
- --output_path=pascal_val.record
-```
-
-You should end up with two TFRecord files named `pascal_train.record` and
-`pascal_val.record` in the `tensorflow/models/research/` directory.
-
-The label map for the PASCAL VOC data set can be found at
-`object_detection/data/pascal_label_map.pbtxt`.
-
-## Generating the Oxford-IIIT Pet TFRecord files.
-
-The Oxford-IIIT Pet data set is located
-[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). To download, extract and
-convert it to TFRecrods, run the following commands below:
-
-```bash
-# From tensorflow/models/research/
-wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
-wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
-tar -xvf annotations.tar.gz
-tar -xvf images.tar.gz
-python object_detection/dataset_tools/create_pet_tf_record.py \
- --label_map_path=object_detection/data/pet_label_map.pbtxt \
- --data_dir=`pwd` \
- --output_dir=`pwd`
-```
-
-You should end up with two TFRecord files named `pet_train.record` and
-`pet_val.record` in the `tensorflow/models/research/` directory.
-
-The label map for the Pet dataset can be found at
-`object_detection/data/pet_label_map.pbtxt`.
diff --git a/object_detection/g3doc/running_locally.md b/object_detection/g3doc/running_locally.md
deleted file mode 100644
index b143a9b7..00000000
--- a/object_detection/g3doc/running_locally.md
+++ /dev/null
@@ -1,81 +0,0 @@
-# Running Locally
-
-This page walks through the steps required to train an object detection model
-on a local machine. It assumes the reader has completed the
-following prerequisites:
-
-1. The Tensorflow Object Detection API has been installed as documented in the
-[installation instructions](installation.md). This includes installing library
-dependencies, compiling the configuration protobufs and setting up the Python
-environment.
-2. A valid data set has been created. See [this page](preparing_inputs.md) for
-instructions on how to generate a dataset for the PASCAL VOC challenge or the
-Oxford-IIIT Pet dataset.
-3. A Object Detection pipeline configuration has been written. See
-[this page](configuring_jobs.md) for details on how to write a pipeline configuration.
-
-## Recommended Directory Structure for Training and Evaluation
-
-```
-+data
- -label_map file
- -train TFRecord file
- -eval TFRecord file
-+models
- + model
- -pipeline config file
- +train
- +eval
-```
-
-## Running the Training Job
-
-A local training job can be run with the following command:
-
-```bash
-# From the tensorflow/models/research/ directory
-python object_detection/train.py \
- --logtostderr \
- --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \
- --train_dir=${PATH_TO_TRAIN_DIR}
-```
-
-where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config and
-`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints
-and events will be written to. By default, the training job will
-run indefinitely until the user kills it.
-
-## Running the Evaluation Job
-
-Evaluation is run as a separate job. The eval job will periodically poll the
-train directory for new checkpoints and evaluate them on a test dataset. The
-job can be run using the following command:
-
-```bash
-# From the tensorflow/models/research/ directory
-python object_detection/eval.py \
- --logtostderr \
- --pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \
- --checkpoint_dir=${PATH_TO_TRAIN_DIR} \
- --eval_dir=${PATH_TO_EVAL_DIR}
-```
-
-where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config,
-`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints
-were saved (same as the training job) and `${PATH_TO_EVAL_DIR}` points to the
-directory in which evaluation events will be saved. As with the training job,
-the eval job run until terminated by default.
-
-## Running Tensorboard
-
-Progress for training and eval jobs can be inspected using Tensorboard. If
-using the recommended directory structure, Tensorboard can be run using the
-following command:
-
-```bash
-tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY}
-```
-
-where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the
-train and eval directories. Please note it may take Tensorboard a couple minutes
-to populate with data.
diff --git a/object_detection/g3doc/running_notebook.md b/object_detection/g3doc/running_notebook.md
deleted file mode 100644
index c2b8ad18..00000000
--- a/object_detection/g3doc/running_notebook.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Quick Start: Jupyter notebook for off-the-shelf inference
-
-If you'd like to hit the ground running and run detection on a few example
-images right out of the box, we recommend trying out the Jupyter notebook demo.
-To run the Jupyter notebook, run the following command from
-`tensorflow/models/research/object_detection`:
-
-```
-# From tensorflow/models/research/object_detection
-jupyter notebook
-```
-
-The notebook should open in your favorite web browser. Click the
-[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link to
-open the demo.
diff --git a/object_detection/g3doc/running_on_cloud.md b/object_detection/g3doc/running_on_cloud.md
deleted file mode 100644
index 3cb2885e..00000000
--- a/object_detection/g3doc/running_on_cloud.md
+++ /dev/null
@@ -1,128 +0,0 @@
-# Running on Google Cloud Platform
-
-The Tensorflow Object Detection API supports distributed training on Google
-Cloud ML Engine. This section documents instructions on how to train and
-evaluate your model using Cloud ML. The reader should complete the following
-prerequistes:
-
-1. The reader has created and configured a project on Google Cloud Platform.
-See [the Cloud ML quick start guide](https://cloud.google.com/ml-engine/docs/quickstarts/command-line).
-2. The reader has installed the Tensorflow Object Detection API as documented
-in the [installation instructions](installation.md).
-3. The reader has a valid data set and stored it in a Google Cloud Storage
-bucket. See [this page](preparing_inputs.md) for instructions on how to generate
-a dataset for the PASCAL VOC challenge or the Oxford-IIIT Pet dataset.
-4. The reader has configured a valid Object Detection pipeline, and stored it
-in a Google Cloud Storage bucket. See [this page](configuring_jobs.md) for
-details on how to write a pipeline configuration.
-
-Additionally, it is recommended users test their job by running training and
-evaluation jobs for a few iterations
-[locally on their own machines](running_locally.md).
-
-## Packaging
-
-In order to run the Tensorflow Object Detection API on Cloud ML, it must be
-packaged (along with it's TF-Slim dependency). The required packages can be
-created with the following command
-
-``` bash
-# From tensorflow/models/research/
-python setup.py sdist
-(cd slim && python setup.py sdist)
-```
-
-This will create python packages in dist/object_detection-0.1.tar.gz and
-slim/dist/slim-0.1.tar.gz.
-
-## Running a Multiworker Training Job
-
-Google Cloud ML requires a YAML configuration file for a multiworker training
-job using GPUs. A sample YAML file is given below:
-
-```
-trainingInput:
- runtimeVersion: "1.0"
- scaleTier: CUSTOM
- masterType: standard_gpu
- workerCount: 9
- workerType: standard_gpu
- parameterServerCount: 3
- parameterServerType: standard
-
-
-```
-
-Please keep the following guidelines in mind when writing the YAML
-configuration:
-
-* A job with n workers will have n + 1 training machines (n workers + 1 master).
-* The number of parameters servers used should be an odd number to prevent
- a parameter server from storing only weight variables or only bias variables
- (due to round robin parameter scheduling).
-* The learning rate in the training config should be decreased when using a
- larger number of workers. Some experimentation is required to find the
- optimal learning rate.
-
-The YAML file should be saved on the local machine (not on GCP). Once it has
-been written, a user can start a training job on Cloud ML Engine using the
-following command:
-
-``` bash
-# From tensorflow/models/research/
-gcloud ml-engine jobs submit training object_detection_`date +%s` \
- --job-dir=gs://${TRAIN_DIR} \
- --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
- --module-name object_detection.train \
- --region us-central1 \
- --config ${PATH_TO_LOCAL_YAML_FILE} \
- -- \
- --train_dir=gs://${TRAIN_DIR} \
- --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH}
-```
-
-Where `${PATH_TO_LOCAL_YAML_FILE}` is the local path to the YAML configuration,
-`gs://${TRAIN_DIR}` specifies the directory on Google Cloud Storage where the
-training checkpoints and events will be written to and
-`gs://${PIPELINE_CONFIG_PATH}` points to the pipeline configuration stored on
-Google Cloud Storage.
-
-Users can monitor the progress of their training job on the [ML Engine
-Dashboard](https://console.cloud.google.com/mlengine/jobs).
-
-## Running an Evaluation Job on Cloud
-
-Evaluation jobs run on a single machine, so it is not necessary to write a YAML
-configuration for evaluation. Run the following command to start the evaluation
-job:
-
-``` bash
-gcloud ml-engine jobs submit training object_detection_eval_`date +%s` \
- --job-dir=gs://${TRAIN_DIR} \
- --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
- --module-name object_detection.eval \
- --region us-central1 \
- --scale-tier BASIC_GPU \
- -- \
- --checkpoint_dir=gs://${TRAIN_DIR} \
- --eval_dir=gs://${EVAL_DIR} \
- --pipeline_config_path=gs://${PIPELINE_CONFIG_PATH}
-```
-
-Where `gs://${TRAIN_DIR}` points to the directory on Google Cloud Storage where
-training checkpoints are saved (same as the training job), `gs://${EVAL_DIR}`
-points to where evaluation events will be saved on Google Cloud Storage and
-`gs://${PIPELINE_CONFIG_PATH}` points to where the pipeline configuration is
-stored on Google Cloud Storage.
-
-## Running Tensorboard
-
-You can run Tensorboard locally on your own machine to view progress of your
-training and eval jobs on Google Cloud ML. Run the following command to start
-Tensorboard:
-
-``` bash
-tensorboard --logdir=gs://${YOUR_CLOUD_BUCKET}
-```
-
-Note it may Tensorboard a few minutes to populate with results.
diff --git a/object_detection/g3doc/running_pets.md b/object_detection/g3doc/running_pets.md
deleted file mode 100644
index a82bc521..00000000
--- a/object_detection/g3doc/running_pets.md
+++ /dev/null
@@ -1,314 +0,0 @@
-# Quick Start: Distributed Training on the Oxford-IIIT Pets Dataset on Google Cloud
-
-This page is a walkthrough for training an object detector using the Tensorflow
-Object Detection API. In this tutorial, we'll be training on the Oxford-IIIT Pets
-dataset to build a system to detect various breeds of cats and dogs. The output
-of the detector will look like the following:
-
-
-
-## Setting up a Project on Google Cloud
-
-To accelerate the process, we'll run training and evaluation on [Google Cloud
-ML Engine](https://cloud.google.com/ml-engine/) to leverage multiple GPUs. To
-begin, you will have to set up Google Cloud via the following steps (if you have
-already done this, feel free to skip to the next section):
-
-1. [Create a GCP project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
-2. [Install the Google Cloud SDK](https://cloud.google.com/sdk/downloads) on
-your workstation or laptop.
-This will provide the tools you need to upload files to Google Cloud Storage and
-start ML training jobs.
-3. [Enable the ML Engine
-APIs](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component&_ga=1.73374291.1570145678.1496689256).
-By default, a new GCP project does not enable APIs to start ML Engine training
-jobs. Use the above link to explicitly enable them.
-4. [Set up a Google Cloud Storage (GCS)
-bucket](https://cloud.google.com/storage/docs/creating-buckets). ML Engine
-training jobs can only access files on a Google Cloud Storage bucket. In this
-tutorial, we'll be required to upload our dataset and configuration to GCS.
-
-Please remember the name of your GCS bucket, as we will reference it multiple
-times in this document. Substitute `${YOUR_GCS_BUCKET}` with the name of
-your bucket in this document. For your convenience, you should define the
-environment variable below:
-
-``` bash
-export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET}
-```
-
-## Installing Tensorflow and the Tensorflow Object Detection API
-
-Please run through the [installation instructions](installation.md) to install
-Tensorflow and all it dependencies. Ensure the Protobuf libraries are
-compiled and the library directories are added to `PYTHONPATH`.
-
-## Getting the Oxford-IIIT Pets Dataset and Uploading it to Google Cloud Storage
-
-In order to train a detector, we require a dataset of images, bounding boxes and
-classifications. For this demo, we'll use the Oxford-IIIT Pets dataset. The raw
-dataset for Oxford-IIIT Pets lives
-[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). You will need to download
-both the image dataset [`images.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz)
-and the groundtruth data [`annotations.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz)
-to the `tensorflow/models/research/` directory and unzip them. This may take
-some time.
-
-``` bash
-# From tensorflow/models/research/
-wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
-wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
-tar -xvf images.tar.gz
-tar -xvf annotations.tar.gz
-```
-
-After downloading the tarballs, your `tensorflow/models/research/` directory
-should appear as follows:
-
-```lang-none
-- images.tar.gz
-- annotations.tar.gz
-+ images/
-+ annotations/
-+ object_detection/
-... other files and directories
-```
-
-The Tensorflow Object Detection API expects data to be in the TFRecord format,
-so we'll now run the `create_pet_tf_record` script to convert from the raw
-Oxford-IIIT Pet dataset into TFRecords. Run the following commands from the
-`tensorflow/models/research/` directory:
-
-``` bash
-# From tensorflow/models/research/
-python object_detection/dataset_tools/create_pet_tf_record.py \
- --label_map_path=object_detection/data/pet_label_map.pbtxt \
- --data_dir=`pwd` \
- --output_dir=`pwd`
-```
-
-Note: It is normal to see some warnings when running this script. You may ignore
-them.
-
-Two TFRecord files named `pet_train.record` and `pet_val.record` should be
-generated in the `tensorflow/models/research/` directory.
-
-Now that the data has been generated, we'll need to upload it to Google Cloud
-Storage so the data can be accessed by ML Engine. Run the following command to
-copy the files into your GCS bucket (substituting `${YOUR_GCS_BUCKET}`):
-
-``` bash
-# From tensorflow/models/research/
-gsutil cp pet_train.record gs://${YOUR_GCS_BUCKET}/data/pet_train.record
-gsutil cp pet_val.record gs://${YOUR_GCS_BUCKET}/data/pet_val.record
-gsutil cp object_detection/data/pet_label_map.pbtxt gs://${YOUR_GCS_BUCKET}/data/pet_label_map.pbtxt
-```
-
-Please remember the path where you upload the data to, as we will need this
-information when configuring the pipeline in a following step.
-
-## Downloading a COCO-pretrained Model for Transfer Learning
-
-Training a state of the art object detector from scratch can take days, even
-when using multiple GPUs! In order to speed up training, we'll take an object
-detector trained on a different dataset (COCO), and reuse some of it's
-parameters to initialize our new model.
-
-Download our [COCO-pretrained Faster R-CNN with Resnet-101
-model](http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz).
-Unzip the contents of the folder and copy the `model.ckpt*` files into your GCS
-Bucket.
-
-``` bash
-wget http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz
-tar -xvf faster_rcnn_resnet101_coco_11_06_2017.tar.gz
-gsutil cp faster_rcnn_resnet101_coco_11_06_2017/model.ckpt.* gs://${YOUR_GCS_BUCKET}/data/
-```
-
-Remember the path where you uploaded the model checkpoint to, as we will need it
-in the following step.
-
-## Configuring the Object Detection Pipeline
-
-In the Tensorflow Object Detection API, the model parameters, training
-parameters and eval parameters are all defined by a config file. More details
-can be found [here](configuring_jobs.md). For this tutorial, we will use some
-predefined templates provided with the source code. In the
-`object_detection/samples/configs` folder, there are skeleton object_detection
-configuration files. We will use `faster_rcnn_resnet101_pets.config` as a
-starting point for configuring the pipeline. Open the file with your favourite
-text editor.
-
-We'll need to configure some paths in order for the template to work. Search the
-file for instances of `PATH_TO_BE_CONFIGURED` and replace them with the
-appropriate value (typically `gs://${YOUR_GCS_BUCKET}/data/`). Afterwards
-upload your edited file onto GCS, making note of the path it was uploaded to
-(we'll need it when starting the training/eval jobs).
-
-``` bash
-# From tensorflow/models/research/
-
-# Edit the faster_rcnn_resnet101_pets.config template. Please note that there
-# are multiple places where PATH_TO_BE_CONFIGURED needs to be set.
-sed -i "s|PATH_TO_BE_CONFIGURED|"gs://${YOUR_GCS_BUCKET}"/data|g" \
- object_detection/samples/configs/faster_rcnn_resnet101_pets.config
-
-# Copy edited template to cloud.
-gsutil cp object_detection/samples/configs/faster_rcnn_resnet101_pets.config \
- gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
-```
-
-## Checking Your Google Cloud Storage Bucket
-
-At this point in the tutorial, you should have uploaded the training/validation
-datasets (including label map), our COCO trained FasterRCNN finetune checkpoint and your job
-configuration to your Google Cloud Storage Bucket. Your bucket should look like
-the following:
-
-```lang-none
-+ ${YOUR_GCS_BUCKET}/
- + data/
- - faster_rcnn_resnet101_pets.config
- - model.ckpt.index
- - model.ckpt.meta
- - model.ckpt.data-00000-of-00001
- - pet_label_map.pbtxt
- - pet_train.record
- - pet_val.record
-```
-
-You can inspect your bucket using the [Google Cloud Storage
-browser](https://console.cloud.google.com/storage/browser).
-
-## Starting Training and Evaluation Jobs on Google Cloud ML Engine
-
-Before we can start a job on Google Cloud ML Engine, we must:
-
-1. Package the Tensorflow Object Detection code.
-2. Write a cluster configuration for our Google Cloud ML job.
-
-To package the Tensorflow Object Detection code, run the following commands from
-the `tensorflow/models/research/` directory:
-
-``` bash
-# From tensorflow/models/research/
-python setup.py sdist
-(cd slim && python setup.py sdist)
-```
-
-You should see two tar.gz files created at `dist/object_detection-0.1.tar.gz`
-and `slim/dist/slim-0.1.tar.gz`.
-
-For running the training Cloud ML job, we'll configure the cluster to use 10
-training jobs (1 master + 9 workers) and three parameters servers. The
-configuration file can be found at `object_detection/samples/cloud/cloud.yml`.
-
-To start training, execute the following command from the
-`tensorflow/models/research/` directory:
-
-``` bash
-# From tensorflow/models/research/
-gcloud ml-engine jobs submit training `whoami`_object_detection_`date +%s` \
- --job-dir=gs://${YOUR_GCS_BUCKET}/train \
- --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
- --module-name object_detection.train \
- --region us-central1 \
- --config object_detection/samples/cloud/cloud.yml \
- -- \
- --train_dir=gs://${YOUR_GCS_BUCKET}/train \
- --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
-```
-
-Once training has started, we can run an evaluation concurrently:
-
-``` bash
-# From tensorflow/models/research/
-gcloud ml-engine jobs submit training `whoami`_object_detection_eval_`date +%s` \
- --job-dir=gs://${YOUR_GCS_BUCKET}/train \
- --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
- --module-name object_detection.eval \
- --region us-central1 \
- --scale-tier BASIC_GPU \
- -- \
- --checkpoint_dir=gs://${YOUR_GCS_BUCKET}/train \
- --eval_dir=gs://${YOUR_GCS_BUCKET}/eval \
- --pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
-```
-
-Note: Even though we're running an evaluation job, the `gcloud ml-engine jobs
-submit training` command is correct. ML Engine does not distinguish between
-training and evaluation jobs.
-
-Users can monitor and stop training and evaluation jobs on the [ML Engine
-Dashboard](https://console.cloud.google.com/mlengine/jobs).
-
-## Monitoring Progress with Tensorboard
-
-You can monitor progress of the training and eval jobs by running Tensorboard on
-your local machine:
-
-``` bash
-# This command needs to be run once to allow your local machine to access your
-# GCS bucket.
-gcloud auth application-default login
-
-tensorboard --logdir=gs://${YOUR_GCS_BUCKET}
-```
-
-Once Tensorboard is running, navigate to `localhost:6006` from your favourite
-web browser. You should see something similar to the following:
-
-
-
-You will also want to click on the images tab to see example detections made by
-the model while it trains. After about an hour and a half of training, you can
-expect to see something like this:
-
-
-
-Note: It takes roughly 10 minutes for a job to get started on ML Engine, and
-roughly an hour for the system to evaluate the validation dataset. It may take
-some time to populate the dashboards. If you do not see any entries after half
-an hour, check the logs from the [ML Engine
-Dashboard](https://console.cloud.google.com/mlengine/jobs). Note that by default
-the training jobs are configured to go for much longer than is necessary for
-convergence. To save money, we recommend killing your jobs once you've seen
-that they've converged.
-
-## Exporting the Tensorflow Graph
-
-After your model has been trained, you should export it to a Tensorflow
-graph proto. First, you need to identify a candidate checkpoint to export. You
-can search your bucket using the [Google Cloud Storage
-Browser](https://console.cloud.google.com/storage/browser). The file should be
-stored under `${YOUR_GCS_BUCKET}/train`. The checkpoint will typically consist of
-three files:
-
-* `model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001`
-* `model.ckpt-${CHECKPOINT_NUMBER}.index`
-* `model.ckpt-${CHECKPOINT_NUMBER}.meta`
-
-After you've identified a candidate checkpoint to export, run the following
-command from `tensorflow/models/research/`:
-
-``` bash
-# From tensorflow/models/research/
-gsutil cp gs://${YOUR_GCS_BUCKET}/train/model.ckpt-${CHECKPOINT_NUMBER}.* .
-python object_detection/export_inference_graph.py \
- --input_type image_tensor \
- --pipeline_config_path object_detection/samples/configs/faster_rcnn_resnet101_pets.config \
- --trained_checkpoint_prefix model.ckpt-${CHECKPOINT_NUMBER} \
- --output_directory exported_graphs
-```
-
-Afterwards, you should see a directory named `exported_graphs` containing the
-SavedModel and frozen graph.
-
-## What's Next
-
-Congratulations, you have now trained an object detector for various cats and
-dogs! There different things you can do now:
-
-1. [Test your exported model using the provided Jupyter notebook.](running_notebook.md)
-2. [Experiment with different model configurations.](configuring_jobs.md)
-3. Train an object detector using your own data.
diff --git a/object_detection/g3doc/using_your_own_dataset.md b/object_detection/g3doc/using_your_own_dataset.md
deleted file mode 100644
index c403930e..00000000
--- a/object_detection/g3doc/using_your_own_dataset.md
+++ /dev/null
@@ -1,157 +0,0 @@
-# Preparing Inputs
-
-To use your own dataset in Tensorflow Object Detection API, you must convert it
-into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details).
-This document outlines how to write a script to generate the TFRecord file.
-
-## Label Maps
-
-Each dataset is required to have a label map associated with it. This label map
-defines a mapping from string class names to integer class Ids. The label map
-should be a `StringIntLabelMap` text protobuf. Sample label maps can be found in
-object_detection/data. Label maps should always start from id 1.
-
-## Dataset Requirements
-
-For every example in your dataset, you should have the following information:
-
-1. An RGB image for the dataset encoded as jpeg or png.
-2. A list of bounding boxes for the image. Each bounding box should contain:
- 1. A bounding box coordinates (with origin in top left corner) defined by 4
- floating point numbers [ymin, xmin, ymax, xmax]. Note that we store the
- _normalized_ coordinates (x / width, y / height) in the TFRecord dataset.
- 2. The class of the object in the bounding box.
-
-# Example Image
-
-Consider the following image:
-
-
-
-with the following label map:
-
-```
-item {
- id: 1
- name: 'Cat'
-}
-
-
-item {
- id: 2
- name: 'Dog'
-}
-```
-
-We can generate a tf.Example proto for this image using the following code:
-
-```python
-
-def create_cat_tf_example(encoded_cat_image_data):
- """Creates a tf.Example proto from sample cat image.
-
- Args:
- encoded_cat_image_data: The jpg encoded data of the cat image.
-
- Returns:
- example: The created tf.Example.
- """
-
- height = 1032.0
- width = 1200.0
- filename = 'example_cat.jpg'
- image_format = b'jpg'
-
- xmins = [322.0 / 1200.0]
- xmaxs = [1062.0 / 1200.0]
- ymins = [174.0 / 1032.0]
- ymaxs = [761.0 / 1032.0]
- classes_text = ['Cat']
- classes = [1]
-
- tf_example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(filename),
- 'image/source_id': dataset_util.bytes_feature(filename),
- 'image/encoded': dataset_util.bytes_feature(encoded_image_data),
- 'image/format': dataset_util.bytes_feature(image_format),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- }))
- return tf_example
-```
-
-## Conversion Script Outline
-
-A typical conversion script will look like the following:
-
-```python
-
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-
-
-flags = tf.app.flags
-flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
-FLAGS = flags.FLAGS
-
-
-def create_tf_example(example):
- # TODO(user): Populate the following variables from your example.
- height = None # Image height
- width = None # Image width
- filename = None # Filename of the image. Empty if image is not from file
- encoded_image_data = None # Encoded image bytes
- image_format = None # b'jpeg' or b'png'
-
- xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
- xmaxs = [] # List of normalized right x coordinates in bounding box
- # (1 per box)
- ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
- ymaxs = [] # List of normalized bottom y coordinates in bounding box
- # (1 per box)
- classes_text = [] # List of string class name of bounding box (1 per box)
- classes = [] # List of integer class id of bounding box (1 per box)
-
- tf_example = tf.train.Example(features=tf.train.Features(feature={
- 'image/height': dataset_util.int64_feature(height),
- 'image/width': dataset_util.int64_feature(width),
- 'image/filename': dataset_util.bytes_feature(filename),
- 'image/source_id': dataset_util.bytes_feature(filename),
- 'image/encoded': dataset_util.bytes_feature(encoded_image_data),
- 'image/format': dataset_util.bytes_feature(image_format),
- 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
- 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
- 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
- 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
- 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
- 'image/object/class/label': dataset_util.int64_list_feature(classes),
- }))
- return tf_example
-
-
-def main(_):
- writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
-
- # TODO(user): Write code to read in your dataset to examples variable
-
- for example in examples:
- tf_example = create_tf_example(example)
- writer.write(tf_example.SerializeToString())
-
- writer.close()
-
-
-if __name__ == '__main__':
- tf.app.run()
-
-```
-
-Note: You may notice additional fields in some other datasets. They are
-currently unused by the API and are optional.
diff --git a/object_detection/inference.py b/object_detection/inference.py
deleted file mode 100644
index 7af17191..00000000
--- a/object_detection/inference.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import numpy as np
-import os
-import six.moves.urllib as urllib
-import sys
-import tarfile
-import tensorflow as tf
-import zipfile
-import cv2
-import glob
-import time
-import argparse
-from multiprocessing import Process, Queue, Event
-
-from collections import defaultdict
-from io import StringIO
-from matplotlib import pyplot as plt
-from PIL import Image
-from utils import label_map_util
-from utils import visualization_utils as vis_util
-
-def load_image_into_numpy_array(image):
- (im_width, im_height) = image.size
- return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
-
-
-def load_details(args):
-
- PATH_TO_CKPT = args.frozen_graph
- PATH_TO_LABELS = args.label_map
- NUM_CLASSES = args.num_output_classes
- PATH_TO_TEST_IMAGES_DIR = args.input_dir
- PATH_TO_RESULT_IMAGES_DIR = args.output_dir
-
- if not os.path.exists(args.output_dir):
- os.mkdir(args.output_dir)
-
- TEST_IMAGE_PATHS = sorted(glob.glob(os.path.join(PATH_TO_TEST_IMAGES_DIR, '*.jpg')))
- JPG_PATHS = [ os.path.basename(path) for path in TEST_IMAGE_PATHS ]
- RESULT_IMAGE_PATHS = [ os.path.join(PATH_TO_RESULT_IMAGES_DIR, jpg_path) for jpg_path in JPG_PATHS ]
-
- label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
- categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
- category_index = label_map_util.create_category_index(categories)
-
- return TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, category_index
-
-
-def feed(queue, args):
-
- """
- Queue that reads images from disk.
- All GPU worker processes poll from this queue for input.
- """
-
- TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, _ = load_details(args)
- key = 0
- for image_path, result_path in zip(TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS):
- key+=1
- image_np = cv2.imread(image_path, 1)
- image_np_expanded = np.expand_dims(image_np, axis=0)
- queue.put((image_np, image_np_expanded, result_path, key))
-
-
-def infer(args, feed_queue, stitch_queue, completed, gpu_id):
-
- """
- Binds a process to a GPU and uses it for inference
- """
-
- config = tf.ConfigProto(allow_soft_placement = True)
- config.gpu_options.allow_growth = False
- config.gpu_options.per_process_gpu_memory_fraction = 0.75 / args.n_jobs
-
- # Scaling 0.75 down by args.n_jobs is required because total GPU memory is sum of
- # memory of all available GPUs. Since, we need each GPU to use 75% of a single GPU
- # memory, we have to multiple total memory by (0.75/args.n_jobs)
-
- detection_graph = tf.Graph()
- with detection_graph.device('/gpu:' + str(gpu_id)):
- with detection_graph.as_default():
- od_graph_def = tf.GraphDef()
- with tf.gfile.GFile(args.frozen_graph, 'rb') as fid:
- serialized_graph = fid.read()
- od_graph_def.ParseFromString(serialized_graph)
- tf.import_graph_def(od_graph_def, name='')
-
- with tf.Session(graph=detection_graph, config=config) as sess:
-
- # Fetching tensors from the graph
- image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
- detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
- detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
- detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
- num_detections = detection_graph.get_tensor_by_name('num_detections:0')
-
- while True:
- if not feed_queue.empty():
- begin = time.time()
- image_np, image_np_expanded, result_path, key = feed_queue.get()
-
- (boxes, scores, classes, num) = sess.run(
- [detection_boxes, detection_scores, detection_classes, num_detections],
- feed_dict={image_tensor: image_np_expanded})
-
- FPS2 = 1/(time.time() - begin)
- stitch_queue.put((boxes, scores, classes, num, image_np, result_path, FPS2, key))
-
- if completed.is_set():
- break
-
- print('Done')
-
-
-def stitch(queue, completed, args):
-
- """
- Stitches frames inorder
- """
-
- TEST_IMAGE_PATHS, RESULT_IMAGE_PATHS, category_index = load_details(args)
- SQ = lambda x: np.squeeze(x)
- total_frames = len(RESULT_IMAGE_PATHS)
- first_frame = time.time()
- process_buffer = {}
- current_frame = 1
-
- print('Processing...')
- while True:
- if not queue.empty():
- boxes, scores, classes, count, image_np, result_path, FPS2, key = queue.get()
- process_buffer[key] = (boxes, scores, classes, count, image_np, result_path, FPS2)
-
- # Keeps polling for the next frame
- current_objects = process_buffer.pop(current_frame, None)
-
- if current_objects is not None:
-
- begin = time.time()
- (boxes, scores, classes, count, image_np, result_path, FPS2) = current_objects
- boxes, classes, scores = SQ(boxes), SQ(classes).astype(np.int32), SQ(scores)
-
- vis_util.visualize_boxes_and_labels_on_image_array(
- image_np,
- boxes,
- classes,
- scores,
- category_index,
- use_normalized_coordinates=True,
- line_thickness=8)
-
- cv2.imwrite(result_path, image_np)
-
- FPS = 1 / (time.time() - begin)
- log = 'Images Processed: %d Count: %d Process+Stitch_FPS: %.2f Process_FPS: %.2f ' % (key, count, FPS, FPS2)
-
- with open(os.path.join('logs' + str(args.n_jobs) + '.txt'), 'w') as file:
- file.write(log + '\n')
- if key == total_frames-1:
- file.write("Time Taken -> %.2f \n" % (time.time() - first_frame))
-
- if key == total_frames-1:
- print("Time Taken -> ", time.time() - first_frame)
-
- current_frame += 1
-
- if current_frame == total_frames:
- completed.set()
- break
-
-
-if __name__ == "__main__":
-
- parser = argparse.ArgumentParser()
-
- parser.add_argument("--input_dir", help = "Path of the input images directory")
- parser.add_argument("--frozen_graph", help = "Path of the frozen graph model")
- parser.add_argument("--label_map", help = "Path of the label map file")
- parser.add_argument("--output_dir", help = "Path of the output directory")
- parser.add_argument("--num_output_classes", help="Defines the number of output classes", type=int)
- parser.add_argument("--n_jobs", help="Number of GPU jobs in parallel", type=int)
- parser.add_argument("--delay", help="Delay for queue in seconds", type=int, default=0)
-
- args = parser.parse_args()
-
- # Initializing queues and events
- stitch_queue = Queue()
- feed_queue = Queue()
- completed = Event()
-
- gpu_workers = []
-
- # Creating processes for GPU inference, loading data and stitching data
- for gpu_id in range(args.n_jobs):
- gpu_workers.append(Process(target=infer, args=(args, feed_queue, stitch_queue, completed, gpu_id)))
- stitch_cpu = Process(target=stitch, args=(stitch_queue, completed, args))
- feed_cpu = Process(target=feed, args=(feed_queue, args))
-
- # Optional delay to give imread a head start
- feed_cpu.start()
- time.sleep(args.delay)
-
- stitch_cpu.start()
- for gpu in gpu_workers:
- gpu.start()
-
- feed_cpu.join()
- stitch_cpu.join()
- for gpu in gpu_workers:
- gpu.join()
diff --git a/object_detection/inference/BUILD b/object_detection/inference/BUILD
deleted file mode 100644
index c36df0d0..00000000
--- a/object_detection/inference/BUILD
+++ /dev/null
@@ -1,40 +0,0 @@
-# Tensorflow Object Detection API: main runnables.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_library(
- name = "detection_inference",
- srcs = ["detection_inference.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_test(
- name = "detection_inference_test",
- srcs = ["detection_inference_test.py"],
- deps = [
- ":detection_inference",
- "//third_party/py/PIL:pil",
- "//third_party/py/numpy",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:dataset_util",
- ],
-)
-
-py_binary(
- name = "infer_detections",
- srcs = ["infer_detections.py"],
- deps = [
- ":detection_inference",
- "//tensorflow",
- ],
-)
diff --git a/object_detection/inference/detection_inference.py b/object_detection/inference/detection_inference.py
deleted file mode 100644
index dc66686f..00000000
--- a/object_detection/inference/detection_inference.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utility functions for detection inference."""
-from __future__ import division
-
-import tensorflow as tf
-
-from object_detection.core import standard_fields
-
-
-def build_input(tfrecord_paths):
- """Builds the graph's input.
-
- Args:
- tfrecord_paths: List of paths to the input TFRecords
-
- Returns:
- serialized_example_tensor: The next serialized example. String scalar Tensor
- image_tensor: The decoded image of the example. Uint8 tensor,
- shape=[1, None, None,3]
- """
- filename_queue = tf.train.string_input_producer(
- tfrecord_paths, shuffle=False, num_epochs=1)
-
- tf_record_reader = tf.TFRecordReader()
- _, serialized_example_tensor = tf_record_reader.read(filename_queue)
- features = tf.parse_single_example(
- serialized_example_tensor,
- features={
- standard_fields.TfExampleFields.image_encoded:
- tf.FixedLenFeature([], tf.string),
- })
- encoded_image = features[standard_fields.TfExampleFields.image_encoded]
- image_tensor = tf.image.decode_image(encoded_image, channels=3)
- image_tensor.set_shape([None, None, 3])
- image_tensor = tf.expand_dims(image_tensor, 0)
-
- return serialized_example_tensor, image_tensor
-
-
-def build_inference_graph(image_tensor, inference_graph_path):
- """Loads the inference graph and connects it to the input image.
-
- Args:
- image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3]
- inference_graph_path: Path to the inference graph with embedded weights
-
- Returns:
- detected_boxes_tensor: Detected boxes. Float tensor,
- shape=[num_detections, 4]
- detected_scores_tensor: Detected scores. Float tensor,
- shape=[num_detections]
- detected_labels_tensor: Detected labels. Int64 tensor,
- shape=[num_detections]
- """
- with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file:
- graph_content = graph_def_file.read()
- graph_def = tf.GraphDef()
- graph_def.MergeFromString(graph_content)
-
- tf.import_graph_def(
- graph_def, name='', input_map={'image_tensor': image_tensor})
-
- g = tf.get_default_graph()
-
- num_detections_tensor = tf.squeeze(
- g.get_tensor_by_name('num_detections:0'), 0)
- num_detections_tensor = tf.cast(num_detections_tensor, tf.int32)
-
- detected_boxes_tensor = tf.squeeze(
- g.get_tensor_by_name('detection_boxes:0'), 0)
- detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor]
-
- detected_scores_tensor = tf.squeeze(
- g.get_tensor_by_name('detection_scores:0'), 0)
- detected_scores_tensor = detected_scores_tensor[:num_detections_tensor]
-
- detected_labels_tensor = tf.squeeze(
- g.get_tensor_by_name('detection_classes:0'), 0)
- detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64)
- detected_labels_tensor = detected_labels_tensor[:num_detections_tensor]
-
- return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor
-
-
-def infer_detections_and_add_to_example(
- serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor,
- detected_labels_tensor, discard_image_pixels):
- """Runs the supplied tensors and adds the inferred detections to the example.
-
- Args:
- serialized_example_tensor: Serialized TF example. Scalar string tensor
- detected_boxes_tensor: Detected boxes. Float tensor,
- shape=[num_detections, 4]
- detected_scores_tensor: Detected scores. Float tensor,
- shape=[num_detections]
- detected_labels_tensor: Detected labels. Int64 tensor,
- shape=[num_detections]
- discard_image_pixels: If true, discards the image from the result
- Returns:
- The de-serialized TF example augmented with the inferred detections.
- """
- tf_example = tf.train.Example()
- (serialized_example, detected_boxes, detected_scores,
- detected_classes) = tf.get_default_session().run([
- serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor,
- detected_labels_tensor
- ])
- detected_boxes = detected_boxes.T
-
- tf_example.ParseFromString(serialized_example)
- feature = tf_example.features.feature
- feature[standard_fields.TfExampleFields.
- detection_score].float_list.value[:] = detected_scores
- feature[standard_fields.TfExampleFields.
- detection_bbox_ymin].float_list.value[:] = detected_boxes[0]
- feature[standard_fields.TfExampleFields.
- detection_bbox_xmin].float_list.value[:] = detected_boxes[1]
- feature[standard_fields.TfExampleFields.
- detection_bbox_ymax].float_list.value[:] = detected_boxes[2]
- feature[standard_fields.TfExampleFields.
- detection_bbox_xmax].float_list.value[:] = detected_boxes[3]
- feature[standard_fields.TfExampleFields.
- detection_class_label].int64_list.value[:] = detected_classes
-
- if discard_image_pixels:
- del feature[standard_fields.TfExampleFields.image_encoded]
-
- return tf_example
diff --git a/object_detection/inference/detection_inference_test.py b/object_detection/inference/detection_inference_test.py
deleted file mode 100644
index eabb6b47..00000000
--- a/object_detection/inference/detection_inference_test.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Tests for detection_inference.py."""
-
-import os
-import StringIO
-
-import numpy as np
-from PIL import Image
-import tensorflow as tf
-
-from object_detection.core import standard_fields
-from object_detection.inference import detection_inference
-from object_detection.utils import dataset_util
-
-
-def get_mock_tfrecord_path():
- return os.path.join(tf.test.get_temp_dir(), 'mock.tfrec')
-
-
-def create_mock_tfrecord():
- pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB')
- image_output_stream = StringIO.StringIO()
- pil_image.save(image_output_stream, format='png')
- encoded_image = image_output_stream.getvalue()
-
- feature_map = {
- 'test_field':
- dataset_util.float_list_feature([1, 2, 3, 4]),
- standard_fields.TfExampleFields.image_encoded:
- dataset_util.bytes_feature(encoded_image),
- }
-
- tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map))
- with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
- writer.write(tf_example.SerializeToString())
-
-
-def get_mock_graph_path():
- return os.path.join(tf.test.get_temp_dir(), 'mock_graph.pb')
-
-
-def create_mock_graph():
- g = tf.Graph()
- with g.as_default():
- in_image_tensor = tf.placeholder(
- tf.uint8, shape=[1, None, None, 3], name='image_tensor')
- tf.constant([2.0], name='num_detections')
- tf.constant(
- [[[0, 0.8, 0.7, 1], [0.1, 0.2, 0.8, 0.9], [0.2, 0.3, 0.4, 0.5]]],
- name='detection_boxes')
- tf.constant([[0.1, 0.2, 0.3]], name='detection_scores')
- tf.identity(
- tf.constant([[1.0, 2.0, 3.0]]) *
- tf.reduce_sum(tf.cast(in_image_tensor, dtype=tf.float32)),
- name='detection_classes')
- graph_def = g.as_graph_def()
-
- with tf.gfile.Open(get_mock_graph_path(), 'w') as fl:
- fl.write(graph_def.SerializeToString())
-
-
-class InferDetectionsTests(tf.test.TestCase):
-
- def test_simple(self):
- create_mock_graph()
- create_mock_tfrecord()
-
- serialized_example_tensor, image_tensor = detection_inference.build_input(
- [get_mock_tfrecord_path()])
- self.assertAllEqual(image_tensor.get_shape().as_list(), [1, None, None, 3])
-
- (detected_boxes_tensor, detected_scores_tensor,
- detected_labels_tensor) = detection_inference.build_inference_graph(
- image_tensor, get_mock_graph_path())
-
- with self.test_session(use_gpu=False) as sess:
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
- tf.train.start_queue_runners()
-
- tf_example = detection_inference.infer_detections_and_add_to_example(
- serialized_example_tensor, detected_boxes_tensor,
- detected_scores_tensor, detected_labels_tensor, False)
-
- self.assertProtoEquals(r"""
- features {
- feature {
- key: "image/detection/bbox/ymin"
- value { float_list { value: [0.0, 0.1] } } }
- feature {
- key: "image/detection/bbox/xmin"
- value { float_list { value: [0.8, 0.2] } } }
- feature {
- key: "image/detection/bbox/ymax"
- value { float_list { value: [0.7, 0.8] } } }
- feature {
- key: "image/detection/bbox/xmax"
- value { float_list { value: [1.0, 0.9] } } }
- feature {
- key: "image/detection/label"
- value { int64_list { value: [123, 246] } } }
- feature {
- key: "image/detection/score"
- value { float_list { value: [0.1, 0.2] } } }
- feature {
- key: "image/encoded"
- value { bytes_list { value:
- "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\001\000\000"
- "\000\001\010\002\000\000\000\220wS\336\000\000\000\022IDATx"
- "\234b\250f`\000\000\000\000\377\377\003\000\001u\000|gO\242"
- "\213\000\000\000\000IEND\256B`\202" } } }
- feature {
- key: "test_field"
- value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } }
- """, tf_example)
-
- def test_discard_image(self):
- create_mock_graph()
- create_mock_tfrecord()
-
- serialized_example_tensor, image_tensor = detection_inference.build_input(
- [get_mock_tfrecord_path()])
- (detected_boxes_tensor, detected_scores_tensor,
- detected_labels_tensor) = detection_inference.build_inference_graph(
- image_tensor, get_mock_graph_path())
-
- with self.test_session(use_gpu=False) as sess:
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
- tf.train.start_queue_runners()
-
- tf_example = detection_inference.infer_detections_and_add_to_example(
- serialized_example_tensor, detected_boxes_tensor,
- detected_scores_tensor, detected_labels_tensor, True)
-
- self.assertProtoEquals(r"""
- features {
- feature {
- key: "image/detection/bbox/ymin"
- value { float_list { value: [0.0, 0.1] } } }
- feature {
- key: "image/detection/bbox/xmin"
- value { float_list { value: [0.8, 0.2] } } }
- feature {
- key: "image/detection/bbox/ymax"
- value { float_list { value: [0.7, 0.8] } } }
- feature {
- key: "image/detection/bbox/xmax"
- value { float_list { value: [1.0, 0.9] } } }
- feature {
- key: "image/detection/label"
- value { int64_list { value: [123, 246] } } }
- feature {
- key: "image/detection/score"
- value { float_list { value: [0.1, 0.2] } } }
- feature {
- key: "test_field"
- value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } }
- """, tf_example)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/inference/infer_detections.py b/object_detection/inference/infer_detections.py
deleted file mode 100644
index a251009e..00000000
--- a/object_detection/inference/infer_detections.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Infers detections on a TFRecord of TFExamples given an inference graph.
-
-Example usage:
- ./infer_detections \
- --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \
- --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \
- --inference_graph=/path/to/frozen_weights_inference_graph.pb
-
-The output is a TFRecord of TFExamples. Each TFExample from the input is first
-augmented with detections from the inference graph and then copied to the
-output.
-
-The input and output nodes of the inference graph are expected to have the same
-types, shapes, and semantics, as the input and output nodes of graphs produced
-by export_inference_graph.py, when run with --input_type=image_tensor.
-
-The script can also discard the image pixels in the output. This greatly
-reduces the output size and can potentially accelerate reading data in
-subsequent processing steps that don't require the images (e.g. computing
-metrics).
-"""
-
-import itertools
-import tensorflow as tf
-from object_detection.inference import detection_inference
-
-tf.flags.DEFINE_string('input_tfrecord_paths', None,
- 'A comma separated list of paths to input TFRecords.')
-tf.flags.DEFINE_string('output_tfrecord_path', None,
- 'Path to the output TFRecord.')
-tf.flags.DEFINE_string('inference_graph', None,
- 'Path to the inference graph with embedded weights.')
-tf.flags.DEFINE_boolean('discard_image_pixels', False,
- 'Discards the images in the output TFExamples. This'
- ' significantly reduces the output size and is useful'
- ' if the subsequent tools don\'t need access to the'
- ' images (e.g. when computing evaluation measures).')
-
-FLAGS = tf.flags.FLAGS
-
-
-def main(_):
- tf.logging.set_verbosity(tf.logging.INFO)
-
- required_flags = ['input_tfrecord_paths', 'output_tfrecord_path',
- 'inference_graph']
- for flag_name in required_flags:
- if not getattr(FLAGS, flag_name):
- raise ValueError('Flag --{} is required'.format(flag_name))
-
- with tf.Session() as sess:
- input_tfrecord_paths = [
- v for v in FLAGS.input_tfrecord_paths.split(',') if v]
- tf.logging.info('Reading input from %d files', len(input_tfrecord_paths))
- serialized_example_tensor, image_tensor = detection_inference.build_input(
- input_tfrecord_paths)
- tf.logging.info('Reading graph and building model...')
- (detected_boxes_tensor, detected_scores_tensor,
- detected_labels_tensor) = detection_inference.build_inference_graph(
- image_tensor, FLAGS.inference_graph)
-
- tf.logging.info('Running inference and writing output to {}'.format(
- FLAGS.output_tfrecord_path))
- sess.run(tf.local_variables_initializer())
- tf.train.start_queue_runners()
- with tf.python_io.TFRecordWriter(
- FLAGS.output_tfrecord_path) as tf_record_writer:
- try:
- for counter in itertools.count():
- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10,
- counter)
- tf_example = detection_inference.infer_detections_and_add_to_example(
- serialized_example_tensor, detected_boxes_tensor,
- detected_scores_tensor, detected_labels_tensor,
- FLAGS.discard_image_pixels)
- tf_record_writer.write(tf_example.SerializeToString())
- except tf.errors.OutOfRangeError:
- tf.logging.info('Finished processing records')
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/matchers/BUILD b/object_detection/matchers/BUILD
deleted file mode 100644
index 1bc5992f..00000000
--- a/object_detection/matchers/BUILD
+++ /dev/null
@@ -1,51 +0,0 @@
-# Tensorflow Object Detection API: Matcher implementations.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-py_library(
- name = "argmax_matcher",
- srcs = [
- "argmax_matcher.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:matcher",
- ],
-)
-
-py_test(
- name = "argmax_matcher_test",
- srcs = ["argmax_matcher_test.py"],
- deps = [
- ":argmax_matcher",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "bipartite_matcher",
- srcs = [
- "bipartite_matcher.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow/contrib/image:image_py",
- "//tensorflow_models/object_detection/core:matcher",
- ],
-)
-
-py_test(
- name = "bipartite_matcher_test",
- srcs = [
- "bipartite_matcher_test.py",
- ],
- deps = [
- ":bipartite_matcher",
- "//tensorflow",
- ],
-)
diff --git a/object_detection/matchers/__init__.py b/object_detection/matchers/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/matchers/__pycache__/__init__.cpython-35.pyc b/object_detection/matchers/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 949e5d44..00000000
Binary files a/object_detection/matchers/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc b/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc
deleted file mode 100644
index 405b0cad..00000000
Binary files a/object_detection/matchers/__pycache__/argmax_matcher.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc b/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc
deleted file mode 100644
index 7371d354..00000000
Binary files a/object_detection/matchers/__pycache__/bipartite_matcher.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/matchers/argmax_matcher.py b/object_detection/matchers/argmax_matcher.py
deleted file mode 100644
index 97d85185..00000000
--- a/object_detection/matchers/argmax_matcher.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Argmax matcher implementation.
-
-This class takes a similarity matrix and matches columns to rows based on the
-maximum value per column. One can specify matched_thresholds and
-to prevent columns from matching to rows (generally resulting in a negative
-training example) and unmatched_theshold to ignore the match (generally
-resulting in neither a positive or negative training example).
-
-This matcher is used in Fast(er)-RCNN.
-
-Note: matchers are used in TargetAssigners. There is a create_target_assigner
-factory function for popular implementations.
-"""
-
-import tensorflow as tf
-
-from object_detection.core import matcher
-
-
-class ArgMaxMatcher(matcher.Matcher):
- """Matcher based on highest value.
-
- This class computes matches from a similarity matrix. Each column is matched
- to a single row.
-
- To support object detection target assignment this class enables setting both
- matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
- defining three categories of similarity which define whether examples are
- positive, negative, or ignored:
- (1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
- (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
- Depending on negatives_lower_than_unmatched, this is either
- Unmatched/Negative OR Ignore.
- (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
- negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
- For ignored matches this class sets the values in the Match object to -2.
- """
-
- def __init__(self,
- matched_threshold,
- unmatched_threshold=None,
- negatives_lower_than_unmatched=True,
- force_match_for_each_row=False):
- """Construct ArgMaxMatcher.
-
- Args:
- matched_threshold: Threshold for positive matches. Positive if
- sim >= matched_threshold, where sim is the maximum value of the
- similarity matrix for a given column. Set to None for no threshold.
- unmatched_threshold: Threshold for negative matches. Negative if
- sim < unmatched_threshold. Defaults to matched_threshold
- when set to None.
- negatives_lower_than_unmatched: Boolean which defaults to True. If True
- then negative matches are the ones below the unmatched_threshold,
- whereas ignored matches are in between the matched and umatched
- threshold. If False, then negative matches are in between the matched
- and unmatched threshold, and everything lower than unmatched is ignored.
- force_match_for_each_row: If True, ensures that each row is matched to
- at least one column (which is not guaranteed otherwise if the
- matched_threshold is high). Defaults to False. See
- argmax_matcher_test.testMatcherForceMatch() for an example.
-
- Raises:
- ValueError: if unmatched_threshold is set but matched_threshold is not set
- or if unmatched_threshold > matched_threshold.
- """
- if (matched_threshold is None) and (unmatched_threshold is not None):
- raise ValueError('Need to also define matched_threshold when'
- 'unmatched_threshold is defined')
- self._matched_threshold = matched_threshold
- if unmatched_threshold is None:
- self._unmatched_threshold = matched_threshold
- else:
- if unmatched_threshold > matched_threshold:
- raise ValueError('unmatched_threshold needs to be smaller or equal'
- 'to matched_threshold')
- self._unmatched_threshold = unmatched_threshold
- if not negatives_lower_than_unmatched:
- if self._unmatched_threshold == self._matched_threshold:
- raise ValueError('When negatives are in between matched and '
- 'unmatched thresholds, these cannot be of equal '
- 'value. matched: %s, unmatched: %s',
- self._matched_threshold, self._unmatched_threshold)
- self._force_match_for_each_row = force_match_for_each_row
- self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
-
- def _match(self, similarity_matrix):
- """Tries to match each column of the similarity matrix to a row.
-
- Args:
- similarity_matrix: tensor of shape [N, M] representing any similarity
- metric.
-
- Returns:
- Match object with corresponding matches for each of M columns.
- """
-
- def _match_when_rows_are_empty():
- """Performs matching when the rows of similarity matrix are empty.
-
- When the rows are empty, all detections are false positives. So we return
- a tensor of -1's to indicate that the columns do not match to any rows.
-
- Returns:
- matches: int32 tensor indicating the row each column matches to.
- """
- return -1 * tf.ones([tf.shape(similarity_matrix)[1]], dtype=tf.int32)
-
- def _match_when_rows_are_non_empty():
- """Performs matching when the rows of similarity matrix are non empty.
-
- Returns:
- matches: int32 tensor indicating the row each column matches to.
- """
- # Matches for each column
- matches = tf.argmax(similarity_matrix, 0)
-
- # Deal with matched and unmatched threshold
- if self._matched_threshold is not None:
- # Get logical indices of ignored and unmatched columns as tf.int64
- matched_vals = tf.reduce_max(similarity_matrix, 0)
- below_unmatched_threshold = tf.greater(self._unmatched_threshold,
- matched_vals)
- between_thresholds = tf.logical_and(
- tf.greater_equal(matched_vals, self._unmatched_threshold),
- tf.greater(self._matched_threshold, matched_vals))
-
- if self._negatives_lower_than_unmatched:
- matches = self._set_values_using_indicator(matches,
- below_unmatched_threshold,
- -1)
- matches = self._set_values_using_indicator(matches,
- between_thresholds,
- -2)
- else:
- matches = self._set_values_using_indicator(matches,
- below_unmatched_threshold,
- -2)
- matches = self._set_values_using_indicator(matches,
- between_thresholds,
- -1)
-
- if self._force_match_for_each_row:
- forced_matches_ids = tf.cast(tf.argmax(similarity_matrix, 1), tf.int32)
-
- # Set matches[forced_matches_ids] = [0, ..., R], R is number of rows.
- row_range = tf.range(tf.shape(similarity_matrix)[0])
- col_range = tf.range(tf.shape(similarity_matrix)[1])
- forced_matches_values = tf.cast(row_range, matches.dtype)
- keep_matches_ids, _ = tf.setdiff1d(col_range, forced_matches_ids)
- keep_matches_values = tf.gather(matches, keep_matches_ids)
- matches = tf.dynamic_stitch(
- [forced_matches_ids,
- keep_matches_ids], [forced_matches_values, keep_matches_values])
-
- return tf.cast(matches, tf.int32)
-
- return tf.cond(
- tf.greater(tf.shape(similarity_matrix)[0], 0),
- _match_when_rows_are_non_empty, _match_when_rows_are_empty)
-
- def _set_values_using_indicator(self, x, indicator, val):
- """Set the indicated fields of x to val.
-
- Args:
- x: tensor.
- indicator: boolean with same shape as x.
- val: scalar with value to set.
-
- Returns:
- modified tensor.
- """
- indicator = tf.cast(indicator, x.dtype)
- return tf.add(tf.multiply(x, 1 - indicator), val * indicator)
diff --git a/object_detection/matchers/argmax_matcher_test.py b/object_detection/matchers/argmax_matcher_test.py
deleted file mode 100644
index 36740f4b..00000000
--- a/object_detection/matchers/argmax_matcher_test.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.matchers.argmax_matcher."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.matchers import argmax_matcher
-
-
-class ArgMaxMatcherTest(tf.test.TestCase):
-
- def test_return_correct_matches_with_default_thresholds(self):
- similarity = np.array([[1., 1, 1, 3, 1],
- [2, -1, 2, 0, 4],
- [3, 0, -1, 0, 0]])
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None)
- expected_matched_rows = np.array([2, 0, 1, 0, 1])
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, np.arange(similarity.shape[1]))
- self.assertEmpty(res_unmatched_cols)
-
- def test_return_correct_matches_with_empty_rows(self):
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None)
- sim = 0.2*tf.ones([0, 5])
- match = matcher.match(sim)
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_unmatched_cols = sess.run(unmatched_cols)
- self.assertAllEqual(res_unmatched_cols, np.arange(5))
-
- def test_return_correct_matches_with_matched_threshold(self):
- similarity = np.array([[1, 1, 1, 3, 1],
- [2, -1, 2, 0, 4],
- [3, 0, -1, 0, 0]], dtype=np.int32)
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3)
- expected_matched_cols = np.array([0, 3, 4])
- expected_matched_rows = np.array([2, 0, 1])
- expected_unmatched_cols = np.array([1, 2])
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- init_op = tf.global_variables_initializer()
-
- with self.test_session() as sess:
- sess.run(init_op)
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, expected_matched_cols)
- self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols)
-
- def test_return_correct_matches_with_matched_and_unmatched_threshold(self):
- similarity = np.array([[1, 1, 1, 3, 1],
- [2, -1, 2, 0, 4],
- [3, 0, -1, 0, 0]], dtype=np.int32)
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3,
- unmatched_threshold=2)
- expected_matched_cols = np.array([0, 3, 4])
- expected_matched_rows = np.array([2, 0, 1])
- expected_unmatched_cols = np.array([1]) # col 2 has too high maximum val
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, expected_matched_cols)
- self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols)
-
- def test_return_correct_matches_negatives_lower_than_unmatched_false(self):
- similarity = np.array([[1, 1, 1, 3, 1],
- [2, -1, 2, 0, 4],
- [3, 0, -1, 0, 0]], dtype=np.int32)
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3,
- unmatched_threshold=2,
- negatives_lower_than_unmatched=False)
- expected_matched_cols = np.array([0, 3, 4])
- expected_matched_rows = np.array([2, 0, 1])
- expected_unmatched_cols = np.array([2]) # col 1 has too low maximum val
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, expected_matched_cols)
- self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols)
-
- def test_return_correct_matches_unmatched_row_not_using_force_match(self):
- similarity = np.array([[1, 1, 1, 3, 1],
- [-1, 0, -2, -2, -1],
- [3, 0, -1, 2, 0]], dtype=np.int32)
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3,
- unmatched_threshold=2)
- expected_matched_cols = np.array([0, 3])
- expected_matched_rows = np.array([2, 0])
- expected_unmatched_cols = np.array([1, 2, 4])
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, expected_matched_cols)
- self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols)
-
- def test_return_correct_matches_unmatched_row_while_using_force_match(self):
- similarity = np.array([[1, 1, 1, 3, 1],
- [-1, 0, -2, -2, -1],
- [3, 0, -1, 2, 0]], dtype=np.int32)
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3,
- unmatched_threshold=2,
- force_match_for_each_row=True)
- expected_matched_cols = np.array([0, 1, 3])
- expected_matched_rows = np.array([2, 1, 0])
- expected_unmatched_cols = np.array([2, 4]) # col 2 has too high max val
-
- sim = tf.constant(similarity)
- match = matcher.match(sim)
- matched_cols = match.matched_column_indices()
- matched_rows = match.matched_row_indices()
- unmatched_cols = match.unmatched_column_indices()
-
- with self.test_session() as sess:
- res_matched_cols = sess.run(matched_cols)
- res_matched_rows = sess.run(matched_rows)
- res_unmatched_cols = sess.run(unmatched_cols)
-
- self.assertAllEqual(res_matched_rows, expected_matched_rows)
- self.assertAllEqual(res_matched_cols, expected_matched_cols)
- self.assertAllEqual(res_unmatched_cols, expected_unmatched_cols)
-
- def test_valid_arguments_corner_case(self):
- argmax_matcher.ArgMaxMatcher(matched_threshold=1,
- unmatched_threshold=1)
-
- def test_invalid_arguments_corner_case_negatives_lower_than_thres_false(self):
- with self.assertRaises(ValueError):
- argmax_matcher.ArgMaxMatcher(matched_threshold=1,
- unmatched_threshold=1,
- negatives_lower_than_unmatched=False)
-
- def test_invalid_arguments_no_matched_threshold(self):
- with self.assertRaises(ValueError):
- argmax_matcher.ArgMaxMatcher(matched_threshold=None,
- unmatched_threshold=4)
-
- def test_invalid_arguments_unmatched_thres_larger_than_matched_thres(self):
- with self.assertRaises(ValueError):
- argmax_matcher.ArgMaxMatcher(matched_threshold=1,
- unmatched_threshold=2)
-
- def test_set_values_using_indicator(self):
- input_a = np.array([3, 4, 5, 1, 4, 3, 2])
- expected_b = np.array([3, 0, 0, 1, 0, 3, 2]) # Set a>3 to 0
- expected_c = np.array(
- [3., 4., 5., -1., 4., 3., -1.]) # Set a<3 to -1. Float32
- idxb_ = input_a > 3
- idxc_ = input_a < 3
-
- matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=None)
-
- a = tf.constant(input_a)
- idxb = tf.constant(idxb_)
- idxc = tf.constant(idxc_)
- b = matcher._set_values_using_indicator(a, idxb, 0)
- c = matcher._set_values_using_indicator(tf.cast(a, tf.float32), idxc, -1)
- with self.test_session() as sess:
- res_b = sess.run(b)
- res_c = sess.run(c)
- self.assertAllEqual(res_b, expected_b)
- self.assertAllEqual(res_c, expected_c)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/matchers/bipartite_matcher.py b/object_detection/matchers/bipartite_matcher.py
deleted file mode 100644
index 3d717d12..00000000
--- a/object_detection/matchers/bipartite_matcher.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bipartite matcher implementation."""
-
-import tensorflow as tf
-
-from tensorflow.contrib.image.python.ops import image_ops
-from object_detection.core import matcher
-
-
-class GreedyBipartiteMatcher(matcher.Matcher):
- """Wraps a Tensorflow greedy bipartite matcher."""
-
- def _match(self, similarity_matrix, num_valid_rows=-1):
- """Bipartite matches a collection rows and columns. A greedy bi-partite.
-
- TODO: Add num_valid_columns options to match only that many columns with
- all the rows.
-
- Args:
- similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
- where higher values mean more similar.
- num_valid_rows: A scalar or a 1-D tensor with one element describing the
- number of valid rows of similarity_matrix to consider for the bipartite
- matching. If set to be negative, then all rows from similarity_matrix
- are used.
-
- Returns:
- match_results: int32 tensor of shape [M] with match_results[i]=-1
- meaning that column i is not matched and otherwise that it is matched to
- row match_results[i].
- """
- # Convert similarity matrix to distance matrix as tf.image.bipartite tries
- # to find minimum distance matches.
- distance_matrix = -1 * similarity_matrix
- _, match_results = image_ops.bipartite_match(
- distance_matrix, num_valid_rows)
- match_results = tf.reshape(match_results, [-1])
- match_results = tf.cast(match_results, tf.int32)
- return match_results
diff --git a/object_detection/matchers/bipartite_matcher_test.py b/object_detection/matchers/bipartite_matcher_test.py
deleted file mode 100644
index 2ee45a80..00000000
--- a/object_detection/matchers/bipartite_matcher_test.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.core.bipartite_matcher."""
-
-import tensorflow as tf
-
-from object_detection.matchers import bipartite_matcher
-
-
-class GreedyBipartiteMatcherTest(tf.test.TestCase):
-
- def test_get_expected_matches_when_all_rows_are_valid(self):
- similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
- num_valid_rows = 2
- expected_match_results = [-1, 1, 0]
-
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
- with self.test_session() as sess:
- match_results_out = sess.run(match._match_results)
- self.assertAllEqual(match_results_out, expected_match_results)
-
- def test_get_expected_matches_with_valid_rows_set_to_minus_one(self):
- similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
- num_valid_rows = -1
- expected_match_results = [-1, 1, 0]
-
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
- with self.test_session() as sess:
- match_results_out = sess.run(match._match_results)
- self.assertAllEqual(match_results_out, expected_match_results)
-
- def test_get_no_matches_with_zero_valid_rows(self):
- similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
- num_valid_rows = 0
- expected_match_results = [-1, -1, -1]
-
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
- with self.test_session() as sess:
- match_results_out = sess.run(match._match_results)
- self.assertAllEqual(match_results_out, expected_match_results)
-
- def test_get_expected_matches_with_only_one_valid_row(self):
- similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
- num_valid_rows = 1
- expected_match_results = [-1, -1, 0]
-
- matcher = bipartite_matcher.GreedyBipartiteMatcher()
- match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
- with self.test_session() as sess:
- match_results_out = sess.run(match._match_results)
- self.assertAllEqual(match_results_out, expected_match_results)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/meta_architectures/BUILD b/object_detection/meta_architectures/BUILD
deleted file mode 100644
index 0172a9c0..00000000
--- a/object_detection/meta_architectures/BUILD
+++ /dev/null
@@ -1,109 +0,0 @@
-# Tensorflow Object Detection API: Meta-architectures.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_library(
- name = "ssd_meta_arch",
- srcs = ["ssd_meta_arch.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/core:model",
- "//tensorflow_models/object_detection/core:target_assigner",
- "//tensorflow_models/object_detection/utils:shape_utils",
- "//tensorflow_models/object_detection/utils:visualization_utils",
- ],
-)
-
-py_test(
- name = "ssd_meta_arch_test",
- srcs = ["ssd_meta_arch_test.py"],
- deps = [
- ":ssd_meta_arch",
- "//tensorflow",
- "//tensorflow/python:training",
- "//tensorflow_models/object_detection/core:anchor_generator",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/core:post_processing",
- "//tensorflow_models/object_detection/core:region_similarity_calculator",
- "//tensorflow_models/object_detection/utils:test_utils",
- ],
-)
-
-py_library(
- name = "faster_rcnn_meta_arch",
- srcs = [
- "faster_rcnn_meta_arch.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
- "//tensorflow_models/object_detection/core:balanced_positive_negative_sampler",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:box_list_ops",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/core:model",
- "//tensorflow_models/object_detection/core:post_processing",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/core:target_assigner",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/object_detection/utils:shape_utils",
- ],
-)
-
-py_library(
- name = "faster_rcnn_meta_arch_test_lib",
- srcs = [
- "faster_rcnn_meta_arch_test_lib.py",
- ],
- deps = [
- ":faster_rcnn_meta_arch",
- "//tensorflow",
- "//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
- "//tensorflow_models/object_detection/builders:box_predictor_builder",
- "//tensorflow_models/object_detection/builders:hyperparams_builder",
- "//tensorflow_models/object_detection/builders:post_processing_builder",
- "//tensorflow_models/object_detection/core:losses",
- "//tensorflow_models/object_detection/protos:box_predictor_py_pb2",
- "//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
- "//tensorflow_models/object_detection/protos:post_processing_py_pb2",
- ],
-)
-
-py_test(
- name = "faster_rcnn_meta_arch_test",
- srcs = ["faster_rcnn_meta_arch_test.py"],
- deps = [
- ":faster_rcnn_meta_arch_test_lib",
- ],
-)
-
-py_library(
- name = "rfcn_meta_arch",
- srcs = ["rfcn_meta_arch.py"],
- deps = [
- ":faster_rcnn_meta_arch",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/utils:ops",
- ],
-)
-
-py_test(
- name = "rfcn_meta_arch_test",
- srcs = ["rfcn_meta_arch_test.py"],
- deps = [
- ":faster_rcnn_meta_arch_test_lib",
- ":rfcn_meta_arch",
- "//tensorflow",
- ],
-)
diff --git a/object_detection/meta_architectures/__init__.py b/object_detection/meta_architectures/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 188b1fa1..00000000
Binary files a/object_detection/meta_architectures/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc
deleted file mode 100644
index 70a966dd..00000000
Binary files a/object_detection/meta_architectures/__pycache__/faster_rcnn_meta_arch.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc
deleted file mode 100644
index 5d67872e..00000000
Binary files a/object_detection/meta_architectures/__pycache__/rfcn_meta_arch.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc b/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc
deleted file mode 100644
index 45168a64..00000000
Binary files a/object_detection/meta_architectures/__pycache__/ssd_meta_arch.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch.py b/object_detection/meta_architectures/faster_rcnn_meta_arch.py
deleted file mode 100644
index ae878b93..00000000
--- a/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ /dev/null
@@ -1,1677 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Faster R-CNN meta-architecture definition.
-
-General tensorflow implementation of Faster R-CNN detection models.
-
-See Faster R-CNN: Ren, Shaoqing, et al.
-"Faster R-CNN: Towards real-time object detection with region proposal
-networks." Advances in neural information processing systems. 2015.
-
-We allow for two modes: first_stage_only=True and first_stage_only=False. In
-the former setting, all of the user facing methods (e.g., predict, postprocess,
-loss) can be used as if the model consisted only of the RPN, returning class
-agnostic proposals (these can be thought of as approximate detections with no
-associated class information). In the latter setting, proposals are computed,
-then passed through a second stage "box classifier" to yield (multi-class)
-detections.
-
-Implementations of Faster R-CNN models must define a new
-FasterRCNNFeatureExtractor and override three methods: `preprocess`,
-`_extract_proposal_features` (the first stage of the model), and
-`_extract_box_classifier_features` (the second stage of the model). Optionally,
-the `restore_fn` method can be overridden. See tests for an example.
-
-A few important notes:
-+ Batching conventions: We support batched inference and training where
-all images within a batch have the same resolution. Batch sizes are determined
-dynamically via the shape of the input tensors (rather than being specified
-directly as, e.g., a model constructor).
-
-A complication is that due to non-max suppression, we are not guaranteed to get
-the same number of proposals from the first stage RPN (region proposal network)
-for each image (though in practice, we should often get the same number of
-proposals). For this reason we pad to a max number of proposals per image
-within a batch. This `self.max_num_proposals` property is set to the
-`first_stage_max_proposals` parameter at inference time and the
-`second_stage_batch_size` at training time since we subsample the batch to
-be sent through the box classifier during training.
-
-For the second stage of the pipeline, we arrange the proposals for all images
-within the batch along a single batch dimension. For example, the input to
-_extract_box_classifier_features is a tensor of shape
-`[total_num_proposals, crop_height, crop_width, depth]` where
-total_num_proposals is batch_size * self.max_num_proposals. (And note that per
-the above comment, a subset of these entries correspond to zero paddings.)
-
-+ Coordinate representations:
-Following the API (see model.DetectionModel definition), our outputs after
-postprocessing operations are always normalized boxes however, internally, we
-sometimes convert to absolute --- e.g. for loss computation. In particular,
-anchors and proposal_boxes are both represented as absolute coordinates.
-"""
-from abc import abstractmethod
-from functools import partial
-import tensorflow as tf
-
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.core import balanced_positive_negative_sampler as sampler
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import box_predictor
-from object_detection.core import losses
-from object_detection.core import model
-from object_detection.core import post_processing
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-
-slim = tf.contrib.slim
-
-
-class FasterRCNNFeatureExtractor(object):
- """Faster R-CNN Feature Extractor definition."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: A boolean indicating whether the training version of the
- computation graph should be constructed.
- first_stage_features_stride: Output stride of extracted RPN feature map.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a relative large batch size
- (e.g. 8), it could be desirable to enable batch norm update.
- reuse_weights: Whether to reuse variables. Default is None.
- weight_decay: float weight decay for feature extractor (default: 0.0).
- """
- self._is_training = is_training
- self._first_stage_features_stride = first_stage_features_stride
- self._train_batch_norm = (batch_norm_trainable and is_training)
- self._reuse_weights = reuse_weights
- self._weight_decay = weight_decay
-
- @abstractmethod
- def preprocess(self, resized_inputs):
- """Feature-extractor specific preprocessing (minus image resizing)."""
- pass
-
- def extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features.
-
- This function is responsible for extracting feature maps from preprocessed
- images. These features are used by the region proposal network (RPN) to
- predict proposals.
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float tensor
- representing a batch of images.
- scope: A scope name.
-
- Returns:
- rpn_feature_map: A tensor with shape [batch, height, width, depth]
- """
- with tf.variable_scope(scope, values=[preprocessed_inputs]):
- return self._extract_proposal_features(preprocessed_inputs, scope)
-
- @abstractmethod
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features, to be overridden."""
- pass
-
- def extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features.
-
- Args:
- proposal_feature_maps: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
- representing the feature map cropped to each proposal.
- scope: A scope name.
-
- Returns:
- proposal_classifier_features: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, height, width, depth]
- representing box classifier features for each proposal.
- """
- with tf.variable_scope(scope, values=[proposal_feature_maps]):
- return self._extract_box_classifier_features(proposal_feature_maps, scope)
-
- @abstractmethod
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features, to be overridden."""
- pass
-
- def restore_from_classification_checkpoint_fn(
- self,
- first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Args:
- first_stage_feature_extractor_scope: A scope name for the first stage
- feature extractor.
- second_stage_feature_extractor_scope: A scope name for the second stage
- feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- variables_to_restore = {}
- for variable in tf.global_variables():
- for scope_name in [first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope]:
- if variable.op.name.startswith(scope_name):
- var_name = variable.op.name.replace(scope_name + '/', '')
- variables_to_restore[var_name] = variable
- return variables_to_restore
-
-
-class FasterRCNNMetaArch(model.DetectionModel):
- """Faster R-CNN Meta-architecture definition."""
-
- def __init__(self,
- is_training,
- num_classes,
- image_resizer_fn,
- feature_extractor,
- first_stage_only,
- first_stage_anchor_generator,
- first_stage_atrous_rate,
- first_stage_box_predictor_arg_scope,
- first_stage_box_predictor_kernel_size,
- first_stage_box_predictor_depth,
- first_stage_minibatch_size,
- first_stage_positive_balance_fraction,
- first_stage_nms_score_threshold,
- first_stage_nms_iou_threshold,
- first_stage_max_proposals,
- first_stage_localization_loss_weight,
- first_stage_objectness_loss_weight,
- initial_crop_size,
- maxpool_kernel_size,
- maxpool_stride,
- second_stage_mask_rcnn_box_predictor,
- second_stage_batch_size,
- second_stage_balance_fraction,
- second_stage_non_max_suppression_fn,
- second_stage_score_conversion_fn,
- second_stage_localization_loss_weight,
- second_stage_classification_loss_weight,
- second_stage_classification_loss,
- second_stage_mask_prediction_loss_weight=1.0,
- hard_example_miner=None,
- parallel_iterations=16):
- """FasterRCNNMetaArch Constructor.
-
- Args:
- is_training: A boolean indicating whether the training version of the
- computation graph should be constructed.
- num_classes: Number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- image_resizer_fn: A callable for image resizing. This callable
- takes a rank-3 image tensor of shape [height, width, channels]
- (corresponding to a single image) and returns a rank-3 image tensor,
- possibly with new spatial dimensions. See
- builders/image_resizer_builder.py.
- feature_extractor: A FasterRCNNFeatureExtractor object.
- first_stage_only: Whether to construct only the Region Proposal Network
- (RPN) part of the model.
- first_stage_anchor_generator: An anchor_generator.AnchorGenerator object
- (note that currently we only support
- grid_anchor_generator.GridAnchorGenerator objects)
- first_stage_atrous_rate: A single integer indicating the atrous rate for
- the single convolution op which is applied to the `rpn_features_to_crop`
- tensor to obtain a tensor to be used for box prediction. Some feature
- extractors optionally allow for producing feature maps computed at
- denser resolutions. The atrous rate is used to compensate for the
- denser feature maps by using an effectively larger receptive field.
- (This should typically be set to 1).
- first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
- separable_conv2d and fully_connected ops for the RPN box predictor.
- first_stage_box_predictor_kernel_size: Kernel size to use for the
- convolution op just prior to RPN box predictions.
- first_stage_box_predictor_depth: Output depth for the convolution op
- just prior to RPN box predictions.
- first_stage_minibatch_size: The "batch size" to use for computing the
- objectness and location loss of the region proposal network. This
- "batch size" refers to the number of anchors selected as contributing
- to the loss function for any given image within the image batch and is
- only called "batch_size" due to terminology from the Faster R-CNN paper.
- first_stage_positive_balance_fraction: Fraction of positive examples
- per image for the RPN. The recommended value for Faster RCNN is 0.5.
- first_stage_nms_score_threshold: Score threshold for non max suppression
- for the Region Proposal Network (RPN). This value is expected to be in
- [0, 1] as it is applied directly after a softmax transformation. The
- recommended value for Faster R-CNN is 0.
- first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
- for performing Non-Max Suppression (NMS) on the boxes predicted by the
- Region Proposal Network (RPN).
- first_stage_max_proposals: Maximum number of boxes to retain after
- performing Non-Max Suppression (NMS) on the boxes predicted by the
- Region Proposal Network (RPN).
- first_stage_localization_loss_weight: A float
- first_stage_objectness_loss_weight: A float
- initial_crop_size: A single integer indicating the output size
- (width and height are set to be the same) of the initial bilinear
- interpolation based cropping during ROI pooling.
- maxpool_kernel_size: A single integer indicating the kernel size of the
- max pool op on the cropped feature map during ROI pooling.
- maxpool_stride: A single integer indicating the stride of the max pool
- op on the cropped feature map during ROI pooling.
- second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for
- the second stage.
- second_stage_batch_size: The batch size used for computing the
- classification and refined location loss of the box classifier. This
- "batch size" refers to the number of proposals selected as contributing
- to the loss function for any given image within the image batch and is
- only called "batch_size" due to terminology from the Faster R-CNN paper.
- second_stage_balance_fraction: Fraction of positive examples to use
- per image for the box classifier. The recommended value for Faster RCNN
- is 0.25.
- second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
- callable that takes `boxes`, `scores`, optional `clip_window` and
- optional (kwarg) `mask` inputs (with all other inputs already set)
- and returns a dictionary containing tensors with keys:
- `detection_boxes`, `detection_scores`, `detection_classes`,
- `num_detections`, and (optionally) `detection_masks`. See
- `post_processing.batch_multiclass_non_max_suppression` for the type and
- shape of these tensors.
- second_stage_score_conversion_fn: Callable elementwise nonlinearity
- (that takes tensors as inputs and returns tensors). This is usually
- used to convert logits to probabilities.
- second_stage_localization_loss_weight: A float indicating the scale factor
- for second stage localization loss.
- second_stage_classification_loss_weight: A float indicating the scale
- factor for second stage classification loss.
- second_stage_classification_loss: Classification loss used by the second
- stage classifier. Either losses.WeightedSigmoidClassificationLoss or
- losses.WeightedSoftmaxClassificationLoss.
- second_stage_mask_prediction_loss_weight: A float indicating the scale
- factor for second stage mask prediction loss. This is applicable only if
- second stage box predictor is configured to predict masks.
- hard_example_miner: A losses.HardExampleMiner object (can be None).
- parallel_iterations: (Optional) The number of iterations allowed to run
- in parallel for calls to tf.map_fn.
- Raises:
- ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
- training time.
- ValueError: If first_stage_anchor_generator is not of type
- grid_anchor_generator.GridAnchorGenerator.
- """
- super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
-
- if is_training and second_stage_batch_size > first_stage_max_proposals:
- raise ValueError('second_stage_batch_size should be no greater than '
- 'first_stage_max_proposals.')
- if not isinstance(first_stage_anchor_generator,
- grid_anchor_generator.GridAnchorGenerator):
- raise ValueError('first_stage_anchor_generator must be of type '
- 'grid_anchor_generator.GridAnchorGenerator.')
-
- self._is_training = is_training
- self._image_resizer_fn = image_resizer_fn
- self._feature_extractor = feature_extractor
- self._first_stage_only = first_stage_only
-
- # The first class is reserved as background.
- unmatched_cls_target = tf.constant(
- [1] + self._num_classes * [0], dtype=tf.float32)
- self._proposal_target_assigner = target_assigner.create_target_assigner(
- 'FasterRCNN', 'proposal')
- self._detector_target_assigner = target_assigner.create_target_assigner(
- 'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target)
- # Both proposal and detector target assigners use the same box coder
- self._box_coder = self._proposal_target_assigner.box_coder
-
- # (First stage) Region proposal network parameters
- self._first_stage_anchor_generator = first_stage_anchor_generator
- self._first_stage_atrous_rate = first_stage_atrous_rate
- self._first_stage_box_predictor_arg_scope = (
- first_stage_box_predictor_arg_scope)
- self._first_stage_box_predictor_kernel_size = (
- first_stage_box_predictor_kernel_size)
- self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
- self._first_stage_minibatch_size = first_stage_minibatch_size
- self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
- positive_fraction=first_stage_positive_balance_fraction)
- self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
- self._is_training, num_classes=1,
- conv_hyperparams=self._first_stage_box_predictor_arg_scope,
- min_depth=0, max_depth=0, num_layers_before_predictor=0,
- use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
- box_code_size=self._box_coder.code_size)
-
- self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
- self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
- self._first_stage_max_proposals = first_stage_max_proposals
-
- self._first_stage_localization_loss = (
- losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
- self._first_stage_objectness_loss = (
- losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True))
- self._first_stage_loc_loss_weight = first_stage_localization_loss_weight
- self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
-
- # Per-region cropping parameters
- self._initial_crop_size = initial_crop_size
- self._maxpool_kernel_size = maxpool_kernel_size
- self._maxpool_stride = maxpool_stride
-
- self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor
-
- self._second_stage_batch_size = second_stage_batch_size
- self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
- positive_fraction=second_stage_balance_fraction)
-
- self._second_stage_nms_fn = second_stage_non_max_suppression_fn
- self._second_stage_score_conversion_fn = second_stage_score_conversion_fn
-
- self._second_stage_localization_loss = (
- losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
- self._second_stage_classification_loss = second_stage_classification_loss
- self._second_stage_mask_loss = (
- losses.WeightedSigmoidClassificationLoss(anchorwise_output=True))
- self._second_stage_loc_loss_weight = second_stage_localization_loss_weight
- self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
- self._second_stage_mask_loss_weight = (
- second_stage_mask_prediction_loss_weight)
- self._hard_example_miner = hard_example_miner
- self._parallel_iterations = parallel_iterations
-
- @property
- def first_stage_feature_extractor_scope(self):
- return 'FirstStageFeatureExtractor'
-
- @property
- def second_stage_feature_extractor_scope(self):
- return 'SecondStageFeatureExtractor'
-
- @property
- def first_stage_box_predictor_scope(self):
- return 'FirstStageBoxPredictor'
-
- @property
- def second_stage_box_predictor_scope(self):
- return 'SecondStageBoxPredictor'
-
- @property
- def max_num_proposals(self):
- """Max number of proposals (to pad to) for each image in the input batch.
-
- At training time, this is set to be the `second_stage_batch_size` if hard
- example miner is not configured, else it is set to
- `first_stage_max_proposals`. At inference time, this is always set to
- `first_stage_max_proposals`.
-
- Returns:
- A positive integer.
- """
- if self._is_training and not self._hard_example_miner:
- return self._second_stage_batch_size
- return self._first_stage_max_proposals
-
- def preprocess(self, inputs):
- """Feature-extractor specific preprocessing.
-
- See base class.
-
- For Faster R-CNN, we perform image resizing in the base class --- each
- class subclassing FasterRCNNMetaArch is responsible for any additional
- preprocessing (e.g., scaling pixel values to be in [-1, 1]).
-
- Args:
- inputs: a [batch, height_in, width_in, channels] float tensor representing
- a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: a [batch, height_out, width_out, channels] float
- tensor representing a batch of images.
- Raises:
- ValueError: if inputs tensor does not have type tf.float32
- """
- if inputs.dtype is not tf.float32:
- raise ValueError('`preprocess` expects a tf.float32 tensor')
- with tf.name_scope('Preprocessor'):
- resized_inputs = tf.map_fn(self._image_resizer_fn,
- elems=inputs,
- dtype=tf.float32,
- parallel_iterations=self._parallel_iterations)
- return self._feature_extractor.preprocess(resized_inputs)
-
- def predict(self, preprocessed_inputs):
- """Predicts unpostprocessed tensors from input tensor.
-
- This function takes an input batch of images and runs it through the
- forward pass of the network to yield "raw" un-postprocessed predictions.
- If `first_stage_only` is True, this function only returns first stage
- RPN predictions (un-postprocessed). Otherwise it returns both
- first stage RPN predictions as well as second stage box classifier
- predictions.
-
- Other remarks:
- + Anchor pruning vs. clipping: following the recommendation of the Faster
- R-CNN paper, we prune anchors that venture outside the image window at
- training time and clip anchors to the image window at inference time.
- + Proposal padding: as described at the top of the file, proposals are
- padded to self._max_num_proposals and flattened so that proposals from all
- images within the input batch are arranged along the same batch dimension.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- prediction_dict: a dictionary holding "raw" prediction tensors:
- 1) rpn_box_predictor_features: A 4-D float32 tensor with shape
- [batch_size, height, width, depth] to be used for predicting proposal
- boxes and corresponding objectness scores.
- 2) rpn_features_to_crop: A 4-D float32 tensor with shape
- [batch_size, height, width, depth] representing image features to crop
- using the proposal boxes predicted by the RPN.
- 3) image_shape: a 1-D tensor of shape [4] representing the input
- image shape.
- 4) rpn_box_encodings: 3-D float tensor of shape
- [batch_size, num_anchors, self._box_coder.code_size] containing
- predicted boxes.
- 5) rpn_objectness_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, 2] containing class
- predictions (logits) for each of the anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
- 6) anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors
- for the first stage RPN (in absolute coordinates). Note that
- `num_anchors` can differ depending on whether the model is created in
- training or inference mode.
-
- (and if first_stage_only=False):
- 7) refined_box_encodings: a 3-D tensor with shape
- [total_num_proposals, num_classes, 4] representing predicted
- (final) refined box encodings, where
- total_num_proposals=batch_size*self._max_num_proposals
- 8) class_predictions_with_background: a 3-D tensor with shape
- [total_num_proposals, num_classes + 1] containing class
- predictions (logits) for each of the anchors, where
- total_num_proposals=batch_size*self._max_num_proposals.
- Note that this tensor *includes* background class predictions
- (at class index 0).
- 9) num_proposals: An int32 tensor of shape [batch_size] representing the
- number of proposals generated by the RPN. `num_proposals` allows us
- to keep track of which entries are to be treated as zero paddings and
- which are not since we always pad the number of proposals to be
- `self.max_num_proposals` for each image.
- 10) proposal_boxes: A float32 tensor of shape
- [batch_size, self.max_num_proposals, 4] representing
- decoded proposal bounding boxes in absolute coordinates.
- 11) mask_predictions: (optional) a 4-D tensor with shape
- [total_num_padded_proposals, num_classes, mask_height, mask_width]
- containing instance mask predictions.
- """
- (rpn_box_predictor_features, rpn_features_to_crop, anchors_boxlist,
- image_shape) = self._extract_rpn_feature_maps(preprocessed_inputs)
- (rpn_box_encodings, rpn_objectness_predictions_with_background
- ) = self._predict_rpn_proposals(rpn_box_predictor_features)
-
- # The Faster R-CNN paper recommends pruning anchors that venture outside
- # the image window at training time and clipping at inference time.
- clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
- if self._is_training:
- (rpn_box_encodings, rpn_objectness_predictions_with_background,
- anchors_boxlist) = self._remove_invalid_anchors_and_predictions(
- rpn_box_encodings, rpn_objectness_predictions_with_background,
- anchors_boxlist, clip_window)
- else:
- anchors_boxlist = box_list_ops.clip_to_window(
- anchors_boxlist, clip_window)
-
- anchors = anchors_boxlist.get()
- prediction_dict = {
- 'rpn_box_predictor_features': rpn_box_predictor_features,
- 'rpn_features_to_crop': rpn_features_to_crop,
- 'image_shape': image_shape,
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'anchors': anchors
- }
-
- if not self._first_stage_only:
- prediction_dict.update(self._predict_second_stage(
- rpn_box_encodings,
- rpn_objectness_predictions_with_background,
- rpn_features_to_crop,
- anchors, image_shape))
- return prediction_dict
-
- def _predict_second_stage(self, rpn_box_encodings,
- rpn_objectness_predictions_with_background,
- rpn_features_to_crop,
- anchors,
- image_shape):
- """Predicts the output tensors from second stage of Faster R-CNN.
-
- Args:
- rpn_box_encodings: 4-D float tensor of shape
- [batch_size, num_valid_anchors, self._box_coder.code_size] containing
- predicted boxes.
- rpn_objectness_predictions_with_background: 2-D float tensor of shape
- [batch_size, num_valid_anchors, 2] containing class
- predictions (logits) for each of the anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
- rpn_features_to_crop: A 4-D float32 tensor with shape
- [batch_size, height, width, depth] representing image features to crop
- using the proposal boxes predicted by the RPN.
- anchors: 2-D float tensor of shape
- [num_anchors, self._box_coder.code_size].
- image_shape: A 1D int32 tensors of size [4] containing the image shape.
-
- Returns:
- prediction_dict: a dictionary holding "raw" prediction tensors:
- 1) refined_box_encodings: a 3-D tensor with shape
- [total_num_proposals, num_classes, 4] representing predicted
- (final) refined box encodings, where
- total_num_proposals=batch_size*self._max_num_proposals
- 2) class_predictions_with_background: a 3-D tensor with shape
- [total_num_proposals, num_classes + 1] containing class
- predictions (logits) for each of the anchors, where
- total_num_proposals=batch_size*self._max_num_proposals.
- Note that this tensor *includes* background class predictions
- (at class index 0).
- 3) num_proposals: An int32 tensor of shape [batch_size] representing the
- number of proposals generated by the RPN. `num_proposals` allows us
- to keep track of which entries are to be treated as zero paddings and
- which are not since we always pad the number of proposals to be
- `self.max_num_proposals` for each image.
- 4) proposal_boxes: A float32 tensor of shape
- [batch_size, self.max_num_proposals, 4] representing
- decoded proposal bounding boxes in absolute coordinates.
- 5) proposal_boxes_normalized: A float32 tensor of shape
- [batch_size, self.max_num_proposals, 4] representing decoded proposal
- bounding boxes in normalized coordinates. Can be used to override the
- boxes proposed by the RPN, thus enabling one to extract features and
- get box classification and prediction for externally selected areas
- of the image.
- 6) box_classifier_features: a 4-D float32 tensor representing the
- features for each proposal.
- 7) mask_predictions: (optional) a 4-D tensor with shape
- [total_num_padded_proposals, num_classes, mask_height, mask_width]
- containing instance mask predictions.
- """
- proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
- rpn_box_encodings, rpn_objectness_predictions_with_background,
- anchors, image_shape)
-
- flattened_proposal_feature_maps = (
- self._compute_second_stage_input_feature_maps(
- rpn_features_to_crop, proposal_boxes_normalized))
-
- box_classifier_features = (
- self._feature_extractor.extract_box_classifier_features(
- flattened_proposal_feature_maps,
- scope=self.second_stage_feature_extractor_scope))
-
- box_predictions = self._mask_rcnn_box_predictor.predict(
- box_classifier_features,
- num_predictions_per_location=1,
- scope=self.second_stage_box_predictor_scope)
- refined_box_encodings = tf.squeeze(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.squeeze(box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
-
- absolute_proposal_boxes = ops.normalized_to_image_coordinates(
- proposal_boxes_normalized, image_shape, self._parallel_iterations)
-
- prediction_dict = {
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background':
- class_predictions_with_background,
- 'num_proposals': num_proposals,
- 'proposal_boxes': absolute_proposal_boxes,
- 'box_classifier_features': box_classifier_features,
- 'proposal_boxes_normalized': proposal_boxes_normalized,
- }
- if box_predictor.MASK_PREDICTIONS in box_predictions:
- mask_predictions = tf.squeeze(box_predictions[
- box_predictor.MASK_PREDICTIONS], axis=1)
- prediction_dict['mask_predictions'] = mask_predictions
-
- return prediction_dict
-
- def _extract_rpn_feature_maps(self, preprocessed_inputs):
- """Extracts RPN features.
-
- This function extracts two feature maps: a feature map to be directly
- fed to a box predictor (to predict location and objectness scores for
- proposals) and a feature map from which to crop regions which will then
- be sent to the second stage box classifier.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] image tensor.
-
- Returns:
- rpn_box_predictor_features: A 4-D float32 tensor with shape
- [batch, height, width, depth] to be used for predicting proposal boxes
- and corresponding objectness scores.
- rpn_features_to_crop: A 4-D float32 tensor with shape
- [batch, height, width, depth] representing image features to crop using
- the proposals boxes.
- anchors: A BoxList representing anchors (for the RPN) in
- absolute coordinates.
- image_shape: A 1-D tensor representing the input image shape.
- """
- image_shape = tf.shape(preprocessed_inputs)
- rpn_features_to_crop = self._feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope=self.first_stage_feature_extractor_scope)
-
- feature_map_shape = tf.shape(rpn_features_to_crop)
- anchors = self._first_stage_anchor_generator.generate(
- [(feature_map_shape[1], feature_map_shape[2])])
- with slim.arg_scope(self._first_stage_box_predictor_arg_scope):
- kernel_size = self._first_stage_box_predictor_kernel_size
- rpn_box_predictor_features = slim.conv2d(
- rpn_features_to_crop,
- self._first_stage_box_predictor_depth,
- kernel_size=[kernel_size, kernel_size],
- rate=self._first_stage_atrous_rate,
- activation_fn=tf.nn.relu6)
- return (rpn_box_predictor_features, rpn_features_to_crop,
- anchors, image_shape)
-
- def _predict_rpn_proposals(self, rpn_box_predictor_features):
- """Adds box predictors to RPN feature map to predict proposals.
-
- Note resulting tensors will not have been postprocessed.
-
- Args:
- rpn_box_predictor_features: A 4-D float32 tensor with shape
- [batch, height, width, depth] to be used for predicting proposal boxes
- and corresponding objectness scores.
-
- Returns:
- box_encodings: 3-D float tensor of shape
- [batch_size, num_anchors, self._box_coder.code_size] containing
- predicted boxes.
- objectness_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, 2] containing class
- predictions (logits) for each of the anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
-
- Raises:
- RuntimeError: if the anchor generator generates anchors corresponding to
- multiple feature maps. We currently assume that a single feature map
- is generated for the RPN.
- """
- num_anchors_per_location = (
- self._first_stage_anchor_generator.num_anchors_per_location())
- if len(num_anchors_per_location) != 1:
- raise RuntimeError('anchor_generator is expected to generate anchors '
- 'corresponding to a single feature map.')
- box_predictions = self._first_stage_box_predictor.predict(
- rpn_box_predictor_features,
- num_anchors_per_location[0],
- scope=self.first_stage_box_predictor_scope)
-
- box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
- objectness_predictions_with_background = box_predictions[
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
- return (tf.squeeze(box_encodings, axis=2),
- objectness_predictions_with_background)
-
- def _remove_invalid_anchors_and_predictions(
- self,
- box_encodings,
- objectness_predictions_with_background,
- anchors_boxlist,
- clip_window):
- """Removes anchors that (partially) fall outside an image.
-
- Also removes associated box encodings and objectness predictions.
-
- Args:
- box_encodings: 3-D float tensor of shape
- [batch_size, num_anchors, self._box_coder.code_size] containing
- predicted boxes.
- objectness_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, 2] containing class
- predictions (logits) for each of the anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
- anchors_boxlist: A BoxList representing num_anchors anchors (for the RPN)
- in absolute coordinates.
- clip_window: a 1-D tensor representing the [ymin, xmin, ymax, xmax]
- extent of the window to clip/prune to.
-
- Returns:
- box_encodings: 4-D float tensor of shape
- [batch_size, num_valid_anchors, self._box_coder.code_size] containing
- predicted boxes, where num_valid_anchors <= num_anchors
- objectness_predictions_with_background: 2-D float tensor of shape
- [batch_size, num_valid_anchors, 2] containing class
- predictions (logits) for each of the anchors, where
- num_valid_anchors <= num_anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
- anchors: A BoxList representing num_valid_anchors anchors (for the RPN) in
- absolute coordinates.
- """
- pruned_anchors_boxlist, keep_indices = box_list_ops.prune_outside_window(
- anchors_boxlist, clip_window)
- def _batch_gather_kept_indices(predictions_tensor):
- return tf.map_fn(
- partial(tf.gather, indices=keep_indices),
- elems=predictions_tensor,
- dtype=tf.float32,
- parallel_iterations=self._parallel_iterations,
- back_prop=True)
- return (_batch_gather_kept_indices(box_encodings),
- _batch_gather_kept_indices(objectness_predictions_with_background),
- pruned_anchors_boxlist)
-
- def _flatten_first_two_dimensions(self, inputs):
- """Flattens `K-d` tensor along batch dimension to be a `(K-1)-d` tensor.
-
- Converts `inputs` with shape [A, B, ..., depth] into a tensor of shape
- [A * B, ..., depth].
-
- Args:
- inputs: A float tensor with shape [A, B, ..., depth]. Note that the first
- two and last dimensions must be statically defined.
- Returns:
- A float tensor with shape [A * B, ..., depth] (where the first and last
- dimension are statically defined.
- """
- combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
- flattened_shape = tf.stack([combined_shape[0] * combined_shape[1]] +
- combined_shape[2:])
- return tf.reshape(inputs, flattened_shape)
-
- def postprocess(self, prediction_dict):
- """Convert prediction tensors to final detections.
-
- This function converts raw predictions tensors to final detection results.
- See base class for output format conventions. Note also that by default,
- scores are to be interpreted as logits, but if a score_converter is used,
- then scores are remapped (and may thus have a different interpretation).
-
- If first_stage_only=True, the returned results represent proposals from the
- first stage RPN and are padded to have self.max_num_proposals for each
- image; otherwise, the results can be interpreted as multiclass detections
- from the full two-stage model and are padded to self._max_detections.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors (see the
- documentation for the predict method. If first_stage_only=True, we
- expect prediction_dict to contain `rpn_box_encodings`,
- `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
- `image_shape`, and `anchors` fields. Otherwise we expect
- prediction_dict to additionally contain `refined_box_encodings`,
- `class_predictions_with_background`, `num_proposals`,
- `proposal_boxes` and, optionally, `mask_predictions` fields.
-
- Returns:
- detections: a dictionary containing the following fields
- detection_boxes: [batch, max_detection, 4]
- detection_scores: [batch, max_detections]
- detection_classes: [batch, max_detections]
- (this entry is only created if rpn_mode=False)
- num_detections: [batch]
- """
- with tf.name_scope('FirstStagePostprocessor'):
- image_shape = prediction_dict['image_shape']
- if self._first_stage_only:
- proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
- prediction_dict['rpn_box_encodings'],
- prediction_dict['rpn_objectness_predictions_with_background'],
- prediction_dict['anchors'],
- image_shape)
- return {
- 'detection_boxes': proposal_boxes,
- 'detection_scores': proposal_scores,
- 'num_detections': tf.to_float(num_proposals)
- }
- with tf.name_scope('SecondStagePostprocessor'):
- mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
- detections_dict = self._postprocess_box_classifier(
- prediction_dict['refined_box_encodings'],
- prediction_dict['class_predictions_with_background'],
- prediction_dict['proposal_boxes'],
- prediction_dict['num_proposals'],
- image_shape,
- mask_predictions=mask_predictions)
- return detections_dict
-
- def _postprocess_rpn(self,
- rpn_box_encodings_batch,
- rpn_objectness_predictions_with_background_batch,
- anchors,
- image_shape):
- """Converts first stage prediction tensors from the RPN to proposals.
-
- This function decodes the raw RPN predictions, runs non-max suppression
- on the result.
-
- Note that the behavior of this function is slightly modified during
- training --- specifically, we stop the gradient from passing through the
- proposal boxes and we only return a balanced sampled subset of proposals
- with size `second_stage_batch_size`.
-
- Args:
- rpn_box_encodings_batch: A 3-D float32 tensor of shape
- [batch_size, num_anchors, self._box_coder.code_size] containing
- predicted proposal box encodings.
- rpn_objectness_predictions_with_background_batch: A 3-D float tensor of
- shape [batch_size, num_anchors, 2] containing objectness predictions
- (logits) for each of the anchors with 0 corresponding to background
- and 1 corresponding to object.
- anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors
- for the first stage RPN. Note that `num_anchors` can differ depending
- on whether the model is created in training or inference mode.
- image_shape: A 1-D tensor representing the input image shape.
-
- Returns:
- proposal_boxes: A float tensor with shape
- [batch_size, max_num_proposals, 4] representing the (potentially zero
- padded) proposal boxes for all images in the batch. These boxes are
- represented as normalized coordinates.
- proposal_scores: A float tensor with shape
- [batch_size, max_num_proposals] representing the (potentially zero
- padded) proposal objectness scores for all images in the batch.
- num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
- representing the number of proposals predicted for each image in
- the batch.
- """
- rpn_box_encodings_batch = tf.expand_dims(rpn_box_encodings_batch, axis=2)
- rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape(
- rpn_box_encodings_batch)
- tiled_anchor_boxes = tf.tile(
- tf.expand_dims(anchors, 0), [rpn_encodings_shape[0], 1, 1])
- proposal_boxes = self._batch_decode_boxes(rpn_box_encodings_batch,
- tiled_anchor_boxes)
- proposal_boxes = tf.squeeze(proposal_boxes, axis=2)
- rpn_objectness_softmax_without_background = tf.nn.softmax(
- rpn_objectness_predictions_with_background_batch)[:, :, 1]
- clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
- (proposal_boxes, proposal_scores, _, _, _,
- num_proposals) = post_processing.batch_multiclass_non_max_suppression(
- tf.expand_dims(proposal_boxes, axis=2),
- tf.expand_dims(rpn_objectness_softmax_without_background,
- axis=2),
- self._first_stage_nms_score_threshold,
- self._first_stage_nms_iou_threshold,
- self._first_stage_max_proposals,
- self._first_stage_max_proposals,
- clip_window=clip_window)
- if self._is_training:
- proposal_boxes = tf.stop_gradient(proposal_boxes)
- if not self._hard_example_miner:
- (groundtruth_boxlists, groundtruth_classes_with_background_list,
- _) = self._format_groundtruth_data(image_shape)
- (proposal_boxes, proposal_scores,
- num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch(
- proposal_boxes, proposal_scores, num_proposals,
- groundtruth_boxlists, groundtruth_classes_with_background_list)
- # normalize proposal boxes
- proposal_boxes_reshaped = tf.reshape(proposal_boxes, [-1, 4])
- normalized_proposal_boxes_reshaped = box_list_ops.to_normalized_coordinates(
- box_list.BoxList(proposal_boxes_reshaped),
- image_shape[1], image_shape[2], check_range=False).get()
- proposal_boxes = tf.reshape(normalized_proposal_boxes_reshaped,
- [-1, proposal_boxes.shape[1].value, 4])
- return proposal_boxes, proposal_scores, num_proposals
-
- def _unpad_proposals_and_sample_box_classifier_batch(
- self,
- proposal_boxes,
- proposal_scores,
- num_proposals,
- groundtruth_boxlists,
- groundtruth_classes_with_background_list):
- """Unpads proposals and samples a minibatch for second stage.
-
- Args:
- proposal_boxes: A float tensor with shape
- [batch_size, num_proposals, 4] representing the (potentially zero
- padded) proposal boxes for all images in the batch. These boxes are
- represented as normalized coordinates.
- proposal_scores: A float tensor with shape
- [batch_size, num_proposals] representing the (potentially zero
- padded) proposal objectness scores for all images in the batch.
- num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
- representing the number of proposals predicted for each image in
- the batch.
- groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates
- of the groundtruth boxes.
- groundtruth_classes_with_background_list: A list of 2-D one-hot
- (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
- class targets with the 0th index assumed to map to the background class.
-
- Returns:
- proposal_boxes: A float tensor with shape
- [batch_size, second_stage_batch_size, 4] representing the (potentially
- zero padded) proposal boxes for all images in the batch. These boxes
- are represented as normalized coordinates.
- proposal_scores: A float tensor with shape
- [batch_size, second_stage_batch_size] representing the (potentially zero
- padded) proposal objectness scores for all images in the batch.
- num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
- representing the number of proposals predicted for each image in
- the batch.
- """
- single_image_proposal_box_sample = []
- single_image_proposal_score_sample = []
- single_image_num_proposals_sample = []
- for (single_image_proposal_boxes,
- single_image_proposal_scores,
- single_image_num_proposals,
- single_image_groundtruth_boxlist,
- single_image_groundtruth_classes_with_background) in zip(
- tf.unstack(proposal_boxes),
- tf.unstack(proposal_scores),
- tf.unstack(num_proposals),
- groundtruth_boxlists,
- groundtruth_classes_with_background_list):
- static_shape = single_image_proposal_boxes.get_shape()
- sliced_static_shape = tf.TensorShape([tf.Dimension(None),
- static_shape.dims[-1]])
- single_image_proposal_boxes = tf.slice(
- single_image_proposal_boxes,
- [0, 0],
- [single_image_num_proposals, -1])
- single_image_proposal_boxes.set_shape(sliced_static_shape)
-
- single_image_proposal_scores = tf.slice(single_image_proposal_scores,
- [0],
- [single_image_num_proposals])
- single_image_boxlist = box_list.BoxList(single_image_proposal_boxes)
- single_image_boxlist.add_field(fields.BoxListFields.scores,
- single_image_proposal_scores)
- sampled_boxlist = self._sample_box_classifier_minibatch(
- single_image_boxlist,
- single_image_groundtruth_boxlist,
- single_image_groundtruth_classes_with_background)
- sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list(
- sampled_boxlist,
- num_boxes=self._second_stage_batch_size)
- single_image_num_proposals_sample.append(tf.minimum(
- sampled_boxlist.num_boxes(),
- self._second_stage_batch_size))
- bb = sampled_padded_boxlist.get()
- single_image_proposal_box_sample.append(bb)
- single_image_proposal_score_sample.append(
- sampled_padded_boxlist.get_field(fields.BoxListFields.scores))
- return (tf.stack(single_image_proposal_box_sample),
- tf.stack(single_image_proposal_score_sample),
- tf.stack(single_image_num_proposals_sample))
-
- def _format_groundtruth_data(self, image_shape):
- """Helper function for preparing groundtruth data for target assignment.
-
- In order to be consistent with the model.DetectionModel interface,
- groundtruth boxes are specified in normalized coordinates and classes are
- specified as label indices with no assumed background category. To prepare
- for target assignment, we:
- 1) convert boxes to absolute coordinates,
- 2) add a background class at class index 0
- 3) groundtruth instance masks, if available, are resized to match
- image_shape.
-
- Args:
- image_shape: A 1-D int32 tensor of shape [4] representing the shape of the
- input image batch.
-
- Returns:
- groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates
- of the groundtruth boxes.
- groundtruth_classes_with_background_list: A list of 2-D one-hot
- (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
- class targets with the 0th index assumed to map to the background class.
- groundtruth_masks_list: If present, a list of 3-D tf.float32 tensors of
- shape [num_boxes, image_height, image_width] containing instance masks.
- This is set to None if no masks exist in the provided groundtruth.
- """
- groundtruth_boxlists = [
- box_list_ops.to_absolute_coordinates(
- box_list.BoxList(boxes), image_shape[1], image_shape[2])
- for boxes in self.groundtruth_lists(fields.BoxListFields.boxes)]
- groundtruth_classes_with_background_list = [
- tf.to_float(
- tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT'))
- for one_hot_encoding in self.groundtruth_lists(
- fields.BoxListFields.classes)]
-
- groundtruth_masks_list = self._groundtruth_lists.get(
- fields.BoxListFields.masks)
- if groundtruth_masks_list is not None:
- resized_masks_list = []
- for mask in groundtruth_masks_list:
- resized_4d_mask = tf.image.resize_images(
- tf.expand_dims(mask, axis=3),
- image_shape[1:3],
- method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
- align_corners=True)
- resized_masks_list.append(tf.squeeze(resized_4d_mask, axis=3))
- groundtruth_masks_list = resized_masks_list
-
- return (groundtruth_boxlists, groundtruth_classes_with_background_list,
- groundtruth_masks_list)
-
- def _sample_box_classifier_minibatch(self,
- proposal_boxlist,
- groundtruth_boxlist,
- groundtruth_classes_with_background):
- """Samples a mini-batch of proposals to be sent to the box classifier.
-
- Helper function for self._postprocess_rpn.
-
- Args:
- proposal_boxlist: A BoxList containing K proposal boxes in absolute
- coordinates.
- groundtruth_boxlist: A Boxlist containing N groundtruth object boxes in
- absolute coordinates.
- groundtruth_classes_with_background: A tensor with shape
- `[N, self.num_classes + 1]` representing groundtruth classes. The
- classes are assumed to be k-hot encoded, and include background as the
- zero-th class.
-
- Returns:
- a BoxList contained sampled proposals.
- """
- (cls_targets, cls_weights, _, _, _) = self._detector_target_assigner.assign(
- proposal_boxlist, groundtruth_boxlist,
- groundtruth_classes_with_background)
- # Selects all boxes as candidates if none of them is selected according
- # to cls_weights. This could happen as boxes within certain IOU ranges
- # are ignored. If triggered, the selected boxes will still be ignored
- # during loss computation.
- cls_weights += tf.to_float(tf.equal(tf.reduce_sum(cls_weights), 0))
- positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0)
- sampled_indices = self._second_stage_sampler.subsample(
- tf.cast(cls_weights, tf.bool),
- self._second_stage_batch_size,
- positive_indicator)
- return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices)
-
- def _compute_second_stage_input_feature_maps(self, features_to_crop,
- proposal_boxes_normalized):
- """Crops to a set of proposals from the feature map for a batch of images.
-
- Helper function for self._postprocess_rpn. This function calls
- `tf.image.crop_and_resize` to create the feature map to be passed to the
- second stage box classifier for each proposal.
-
- Args:
- features_to_crop: A float32 tensor with shape
- [batch_size, height, width, depth]
- proposal_boxes_normalized: A float32 tensor with shape [batch_size,
- num_proposals, box_code_size] containing proposal boxes in
- normalized coordinates.
-
- Returns:
- A float32 tensor with shape [K, new_height, new_width, depth].
- """
- def get_box_inds(proposals):
- proposals_shape = proposals.get_shape().as_list()
- if any(dim is None for dim in proposals_shape):
- proposals_shape = tf.shape(proposals)
- ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
- multiplier = tf.expand_dims(
- tf.range(start=0, limit=proposals_shape[0]), 1)
- return tf.reshape(ones_mat * multiplier, [-1])
-
- cropped_regions = tf.image.crop_and_resize(
- features_to_crop,
- self._flatten_first_two_dimensions(proposal_boxes_normalized),
- get_box_inds(proposal_boxes_normalized),
- (self._initial_crop_size, self._initial_crop_size))
- return slim.max_pool2d(
- cropped_regions,
- [self._maxpool_kernel_size, self._maxpool_kernel_size],
- stride=self._maxpool_stride)
-
- def _postprocess_box_classifier(self,
- refined_box_encodings,
- class_predictions_with_background,
- proposal_boxes,
- num_proposals,
- image_shape,
- mask_predictions=None):
- """Converts predictions from the second stage box classifier to detections.
-
- Args:
- refined_box_encodings: a 3-D float tensor with shape
- [total_num_padded_proposals, num_classes, 4] representing predicted
- (final) refined box encodings.
- class_predictions_with_background: a 3-D tensor float with shape
- [total_num_padded_proposals, num_classes + 1] containing class
- predictions (logits) for each of the proposals. Note that this tensor
- *includes* background class predictions (at class index 0).
- proposal_boxes: a 3-D float tensor with shape
- [batch_size, self.max_num_proposals, 4] representing decoded proposal
- bounding boxes in absolute coordinates.
- num_proposals: a 1-D int32 tensor of shape [batch] representing the number
- of proposals predicted for each image in the batch.
- image_shape: a 1-D int32 tensor representing the input image shape.
- mask_predictions: (optional) a 4-D float tensor with shape
- [total_num_padded_proposals, num_classes, mask_height, mask_width]
- containing instance mask prediction logits.
-
- Returns:
- A dictionary containing:
- `detection_boxes`: [batch, max_detection, 4]
- `detection_scores`: [batch, max_detections]
- `detection_classes`: [batch, max_detections]
- `num_detections`: [batch]
- `detection_masks`:
- (optional) [batch, max_detections, mask_height, mask_width]. Note
- that a pixel-wise sigmoid score converter is applied to the detection
- masks.
- """
- refined_box_encodings_batch = tf.reshape(refined_box_encodings,
- [-1, self.max_num_proposals,
- self.num_classes,
- self._box_coder.code_size])
- class_predictions_with_background_batch = tf.reshape(
- class_predictions_with_background,
- [-1, self.max_num_proposals, self.num_classes + 1]
- )
- refined_decoded_boxes_batch = self._batch_decode_boxes(
- refined_box_encodings_batch, proposal_boxes)
- class_predictions_with_background_batch = (
- self._second_stage_score_conversion_fn(
- class_predictions_with_background_batch))
- class_predictions_batch = tf.reshape(
- tf.slice(class_predictions_with_background_batch,
- [0, 0, 1], [-1, -1, -1]),
- [-1, self.max_num_proposals, self.num_classes])
- clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
-
- mask_predictions_batch = None
- if mask_predictions is not None:
- mask_height = mask_predictions.shape[2].value
- mask_width = mask_predictions.shape[3].value
- mask_predictions = tf.sigmoid(mask_predictions)
- mask_predictions_batch = tf.reshape(
- mask_predictions, [-1, self.max_num_proposals,
- self.num_classes, mask_height, mask_width])
- (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, _,
- num_detections) = self._second_stage_nms_fn(
- refined_decoded_boxes_batch,
- class_predictions_batch,
- clip_window=clip_window,
- change_coordinate_frame=True,
- num_valid_boxes=num_proposals,
- masks=mask_predictions_batch)
- detections = {'detection_boxes': nmsed_boxes,
- 'detection_scores': nmsed_scores,
- 'detection_classes': nmsed_classes,
- 'num_detections': tf.to_float(num_detections)}
- if nmsed_masks is not None:
- detections['detection_masks'] = nmsed_masks
- return detections
-
- def _batch_decode_boxes(self, box_encodings, anchor_boxes):
- """Decodes box encodings with respect to the anchor boxes.
-
- Args:
- box_encodings: a 4-D tensor with shape
- [batch_size, num_anchors, num_classes, self._box_coder.code_size]
- representing box encodings.
- anchor_boxes: [batch_size, num_anchors, 4] representing
- decoded bounding boxes.
-
- Returns:
- decoded_boxes: a [batch_size, num_anchors, num_classes, 4]
- float tensor representing bounding box predictions
- (for each image in batch, proposal and class).
- """
- combined_shape = shape_utils.combined_static_and_dynamic_shape(
- box_encodings)
- num_classes = combined_shape[2]
- tiled_anchor_boxes = tf.tile(
- tf.expand_dims(anchor_boxes, 2), [1, 1, num_classes, 1])
- tiled_anchors_boxlist = box_list.BoxList(
- tf.reshape(tiled_anchor_boxes, [-1, 4]))
- decoded_boxes = self._box_coder.decode(
- tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
- tiled_anchors_boxlist)
- return tf.reshape(decoded_boxes.get(),
- tf.stack([combined_shape[0], combined_shape[1],
- num_classes, 4]))
-
- def loss(self, prediction_dict, scope=None):
- """Compute scalar loss tensors given prediction tensors.
-
- If first_stage_only=True, only RPN related losses are computed (i.e.,
- `rpn_localization_loss` and `rpn_objectness_loss`). Otherwise all
- losses are computed.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors (see the
- documentation for the predict method. If first_stage_only=True, we
- expect prediction_dict to contain `rpn_box_encodings`,
- `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
- `image_shape`, and `anchors` fields. Otherwise we expect
- prediction_dict to additionally contain `refined_box_encodings`,
- `class_predictions_with_background`, `num_proposals`, and
- `proposal_boxes` fields.
- scope: Optional scope name.
-
- Returns:
- a dictionary mapping loss keys (`first_stage_localization_loss`,
- `first_stage_objectness_loss`, 'second_stage_localization_loss',
- 'second_stage_classification_loss') to scalar tensors representing
- corresponding loss values.
- """
- with tf.name_scope(scope, 'Loss', prediction_dict.values()):
- (groundtruth_boxlists, groundtruth_classes_with_background_list,
- groundtruth_masks_list
- ) = self._format_groundtruth_data(prediction_dict['image_shape'])
- loss_dict = self._loss_rpn(
- prediction_dict['rpn_box_encodings'],
- prediction_dict['rpn_objectness_predictions_with_background'],
- prediction_dict['anchors'],
- groundtruth_boxlists,
- groundtruth_classes_with_background_list)
- if not self._first_stage_only:
- loss_dict.update(
- self._loss_box_classifier(
- prediction_dict['refined_box_encodings'],
- prediction_dict['class_predictions_with_background'],
- prediction_dict['proposal_boxes'],
- prediction_dict['num_proposals'],
- groundtruth_boxlists,
- groundtruth_classes_with_background_list,
- prediction_dict['image_shape'],
- prediction_dict.get('mask_predictions'),
- groundtruth_masks_list,
- ))
- return loss_dict
-
- def _loss_rpn(self,
- rpn_box_encodings,
- rpn_objectness_predictions_with_background,
- anchors,
- groundtruth_boxlists,
- groundtruth_classes_with_background_list):
- """Computes scalar RPN loss tensors.
-
- Uses self._proposal_target_assigner to obtain regression and classification
- targets for the first stage RPN, samples a "minibatch" of anchors to
- participate in the loss computation, and returns the RPN losses.
-
- Args:
- rpn_box_encodings: A 4-D float tensor of shape
- [batch_size, num_anchors, self._box_coder.code_size] containing
- predicted proposal box encodings.
- rpn_objectness_predictions_with_background: A 2-D float tensor of shape
- [batch_size, num_anchors, 2] containing objectness predictions
- (logits) for each of the anchors with 0 corresponding to background
- and 1 corresponding to object.
- anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors
- for the first stage RPN. Note that `num_anchors` can differ depending
- on whether the model is created in training or inference mode.
- groundtruth_boxlists: A list of BoxLists containing coordinates of the
- groundtruth boxes.
- groundtruth_classes_with_background_list: A list of 2-D one-hot
- (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
- class targets with the 0th index assumed to map to the background class.
-
- Returns:
- a dictionary mapping loss keys (`first_stage_localization_loss`,
- `first_stage_objectness_loss`) to scalar tensors representing
- corresponding loss values.
- """
- with tf.name_scope('RPNLoss'):
- (batch_cls_targets, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, _) = target_assigner.batch_assign_targets(
- self._proposal_target_assigner, box_list.BoxList(anchors),
- groundtruth_boxlists, len(groundtruth_boxlists)*[None])
- batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2)
-
- def _minibatch_subsample_fn(inputs):
- cls_targets, cls_weights = inputs
- return self._first_stage_sampler.subsample(
- tf.cast(cls_weights, tf.bool),
- self._first_stage_minibatch_size, tf.cast(cls_targets, tf.bool))
- batch_sampled_indices = tf.to_float(tf.map_fn(
- _minibatch_subsample_fn,
- [batch_cls_targets, batch_cls_weights],
- dtype=tf.bool,
- parallel_iterations=self._parallel_iterations,
- back_prop=True))
-
- # Normalize by number of examples in sampled minibatch
- normalizer = tf.reduce_sum(batch_sampled_indices, axis=1)
- batch_one_hot_targets = tf.one_hot(
- tf.to_int32(batch_cls_targets), depth=2)
- sampled_reg_indices = tf.multiply(batch_sampled_indices,
- batch_reg_weights)
-
- localization_losses = self._first_stage_localization_loss(
- rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices)
- objectness_losses = self._first_stage_objectness_loss(
- rpn_objectness_predictions_with_background,
- batch_one_hot_targets, weights=batch_sampled_indices)
- localization_loss = tf.reduce_mean(
- tf.reduce_sum(localization_losses, axis=1) / normalizer)
- objectness_loss = tf.reduce_mean(
- tf.reduce_sum(objectness_losses, axis=1) / normalizer)
- loss_dict = {}
-
- with tf.name_scope('localization_loss'):
- loss_dict['first_stage_localization_loss'] = (
- self._first_stage_loc_loss_weight * localization_loss)
- with tf.name_scope('objectness_loss'):
- loss_dict['first_stage_objectness_loss'] = (
- self._first_stage_obj_loss_weight * objectness_loss)
- return loss_dict
-
- def _loss_box_classifier(self,
- refined_box_encodings,
- class_predictions_with_background,
- proposal_boxes,
- num_proposals,
- groundtruth_boxlists,
- groundtruth_classes_with_background_list,
- image_shape,
- prediction_masks=None,
- groundtruth_masks_list=None):
- """Computes scalar box classifier loss tensors.
-
- Uses self._detector_target_assigner to obtain regression and classification
- targets for the second stage box classifier, optionally performs
- hard mining, and returns losses. All losses are computed independently
- for each image and then averaged across the batch.
- Please note that for boxes and masks with multiple labels, the box
- regression and mask prediction losses are only computed for one label.
-
- This function assumes that the proposal boxes in the "padded" regions are
- actually zero (and thus should not be matched to).
-
-
- Args:
- refined_box_encodings: a 3-D tensor with shape
- [total_num_proposals, num_classes, box_coder.code_size] representing
- predicted (final) refined box encodings.
- class_predictions_with_background: a 2-D tensor with shape
- [total_num_proposals, num_classes + 1] containing class
- predictions (logits) for each of the anchors. Note that this tensor
- *includes* background class predictions (at class index 0).
- proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
- decoded proposal bounding boxes.
- num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
- representing the number of proposals predicted for each image in
- the batch.
- groundtruth_boxlists: a list of BoxLists containing coordinates of the
- groundtruth boxes.
- groundtruth_classes_with_background_list: a list of 2-D one-hot
- (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
- class targets with the 0th index assumed to map to the background class.
- image_shape: a 1-D tensor of shape [4] representing the image shape.
- prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
- num_classes, mask_height, mask_width] containing the instance masks for
- each box.
- groundtruth_masks_list: an optional list of 3-D tensors of shape
- [num_boxes, image_height, image_width] containing the instance masks for
- each of the boxes.
-
- Returns:
- a dictionary mapping loss keys ('second_stage_localization_loss',
- 'second_stage_classification_loss') to scalar tensors representing
- corresponding loss values.
-
- Raises:
- ValueError: if `predict_instance_masks` in
- second_stage_mask_rcnn_box_predictor is True and
- `groundtruth_masks_list` is not provided.
- """
- with tf.name_scope('BoxClassifierLoss'):
- paddings_indicator = self._padded_batched_proposals_indicator(
- num_proposals, self.max_num_proposals)
- proposal_boxlists = [
- box_list.BoxList(proposal_boxes_single_image)
- for proposal_boxes_single_image in tf.unstack(proposal_boxes)]
- batch_size = len(proposal_boxlists)
-
- num_proposals_or_one = tf.to_float(tf.expand_dims(
- tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1))
- normalizer = tf.tile(num_proposals_or_one,
- [1, self.max_num_proposals]) * batch_size
-
- (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, _) = target_assigner.batch_assign_targets(
- self._detector_target_assigner, proposal_boxlists,
- groundtruth_boxlists, groundtruth_classes_with_background_list)
-
- # We only predict refined location encodings for the non background
- # classes, but we now pad it to make it compatible with the class
- # predictions
- flat_cls_targets_with_background = tf.reshape(
- batch_cls_targets_with_background,
- [batch_size * self.max_num_proposals, -1])
- refined_box_encodings_with_background = tf.pad(
- refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
- # For anchors with multiple labels, picks refined_location_encodings
- # for just one class to avoid over-counting for regression loss and
- # (optionally) mask loss.
- one_hot_flat_cls_targets_with_background = tf.argmax(
- flat_cls_targets_with_background, axis=1)
- one_hot_flat_cls_targets_with_background = tf.one_hot(
- one_hot_flat_cls_targets_with_background,
- flat_cls_targets_with_background.get_shape()[1])
- refined_box_encodings_masked_by_class_targets = tf.boolean_mask(
- refined_box_encodings_with_background,
- tf.greater(one_hot_flat_cls_targets_with_background, 0))
- class_predictions_with_background = tf.reshape(
- class_predictions_with_background,
- [batch_size, self.max_num_proposals, -1])
- reshaped_refined_box_encodings = tf.reshape(
- refined_box_encodings_masked_by_class_targets,
- [batch_size, -1, 4])
-
- second_stage_loc_losses = self._second_stage_localization_loss(
- reshaped_refined_box_encodings,
- batch_reg_targets, weights=batch_reg_weights) / normalizer
- second_stage_cls_losses = self._second_stage_classification_loss(
- class_predictions_with_background,
- batch_cls_targets_with_background,
- weights=batch_cls_weights) / normalizer
- second_stage_loc_loss = tf.reduce_sum(
- tf.boolean_mask(second_stage_loc_losses, paddings_indicator))
- second_stage_cls_loss = tf.reduce_sum(
- tf.boolean_mask(second_stage_cls_losses, paddings_indicator))
-
- if self._hard_example_miner:
- (second_stage_loc_loss, second_stage_cls_loss
- ) = self._unpad_proposals_and_apply_hard_mining(
- proposal_boxlists, second_stage_loc_losses,
- second_stage_cls_losses, num_proposals)
- loss_dict = {}
- with tf.name_scope('localization_loss'):
- loss_dict['second_stage_localization_loss'] = (
- self._second_stage_loc_loss_weight * second_stage_loc_loss)
-
- with tf.name_scope('classification_loss'):
- loss_dict['second_stage_classification_loss'] = (
- self._second_stage_cls_loss_weight * second_stage_cls_loss)
-
- second_stage_mask_loss = None
- if prediction_masks is not None:
- if groundtruth_masks_list is None:
- raise ValueError('Groundtruth instance masks not provided. '
- 'Please configure input reader.')
-
- # Create a new target assigner that matches the proposals to groundtruth
- # and returns the mask targets.
- # TODO: Move `unmatched_cls_target` from constructor to assign function.
- # This will enable reuse of a single target assigner for both class
- # targets and mask targets.
- mask_target_assigner = target_assigner.create_target_assigner(
- 'FasterRCNN', 'detection',
- unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32))
- (batch_mask_targets, _, _,
- batch_mask_target_weights, _) = target_assigner.batch_assign_targets(
- mask_target_assigner, proposal_boxlists,
- groundtruth_boxlists, groundtruth_masks_list)
-
- # Pad the prediction_masks with to add zeros for background class to be
- # consistent with class predictions.
- prediction_masks_with_background = tf.pad(
- prediction_masks, [[0, 0], [1, 0], [0, 0], [0, 0]])
- prediction_masks_masked_by_class_targets = tf.boolean_mask(
- prediction_masks_with_background,
- tf.greater(one_hot_flat_cls_targets_with_background, 0))
- mask_height = prediction_masks.shape[2].value
- mask_width = prediction_masks.shape[3].value
- reshaped_prediction_masks = tf.reshape(
- prediction_masks_masked_by_class_targets,
- [batch_size, -1, mask_height * mask_width])
-
- batch_mask_targets_shape = tf.shape(batch_mask_targets)
- flat_gt_masks = tf.reshape(batch_mask_targets,
- [-1, batch_mask_targets_shape[2],
- batch_mask_targets_shape[3]])
-
- # Use normalized proposals to crop mask targets from image masks.
- flat_normalized_proposals = box_list_ops.to_normalized_coordinates(
- box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])),
- image_shape[1], image_shape[2]).get()
-
- flat_cropped_gt_mask = tf.image.crop_and_resize(
- tf.expand_dims(flat_gt_masks, -1),
- flat_normalized_proposals,
- tf.range(flat_normalized_proposals.shape[0].value),
- [mask_height, mask_width])
-
- batch_cropped_gt_mask = tf.reshape(
- flat_cropped_gt_mask,
- [batch_size, -1, mask_height * mask_width])
-
- second_stage_mask_losses = self._second_stage_mask_loss(
- reshaped_prediction_masks,
- batch_cropped_gt_mask,
- weights=batch_mask_target_weights) / (
- mask_height * mask_width *
- tf.maximum(tf.reduce_sum(batch_mask_target_weights, axis=1,
- keep_dims=True),
- tf.ones((batch_size, 1))))
- second_stage_mask_loss = tf.reduce_sum(
- tf.boolean_mask(second_stage_mask_losses, paddings_indicator))
-
- if second_stage_mask_loss is not None:
- with tf.name_scope('mask_loss'):
- loss_dict['second_stage_mask_loss'] = (
- self._second_stage_mask_loss_weight * second_stage_mask_loss)
- return loss_dict
-
- def _padded_batched_proposals_indicator(self,
- num_proposals,
- max_num_proposals):
- """Creates indicator matrix of non-pad elements of padded batch proposals.
-
- Args:
- num_proposals: Tensor of type tf.int32 with shape [batch_size].
- max_num_proposals: Maximum number of proposals per image (integer).
-
- Returns:
- A Tensor of type tf.bool with shape [batch_size, max_num_proposals].
- """
- batch_size = tf.size(num_proposals)
- tiled_num_proposals = tf.tile(
- tf.expand_dims(num_proposals, 1), [1, max_num_proposals])
- tiled_proposal_index = tf.tile(
- tf.expand_dims(tf.range(max_num_proposals), 0), [batch_size, 1])
- return tf.greater(tiled_num_proposals, tiled_proposal_index)
-
- def _unpad_proposals_and_apply_hard_mining(self,
- proposal_boxlists,
- second_stage_loc_losses,
- second_stage_cls_losses,
- num_proposals):
- """Unpads proposals and applies hard mining.
-
- Args:
- proposal_boxlists: A list of `batch_size` BoxLists each representing
- `self.max_num_proposals` representing decoded proposal bounding boxes
- for each image.
- second_stage_loc_losses: A Tensor of type `float32`. A tensor of shape
- `[batch_size, self.max_num_proposals]` representing per-anchor
- second stage localization loss values.
- second_stage_cls_losses: A Tensor of type `float32`. A tensor of shape
- `[batch_size, self.max_num_proposals]` representing per-anchor
- second stage classification loss values.
- num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
- representing the number of proposals predicted for each image in
- the batch.
-
- Returns:
- second_stage_loc_loss: A scalar float32 tensor representing the second
- stage localization loss.
- second_stage_cls_loss: A scalar float32 tensor representing the second
- stage classification loss.
- """
- for (proposal_boxlist, single_image_loc_loss, single_image_cls_loss,
- single_image_num_proposals) in zip(
- proposal_boxlists,
- tf.unstack(second_stage_loc_losses),
- tf.unstack(second_stage_cls_losses),
- tf.unstack(num_proposals)):
- proposal_boxlist = box_list.BoxList(
- tf.slice(proposal_boxlist.get(),
- [0, 0], [single_image_num_proposals, -1]))
- single_image_loc_loss = tf.slice(single_image_loc_loss,
- [0], [single_image_num_proposals])
- single_image_cls_loss = tf.slice(single_image_cls_loss,
- [0], [single_image_num_proposals])
- return self._hard_example_miner(
- location_losses=tf.expand_dims(single_image_loc_loss, 0),
- cls_losses=tf.expand_dims(single_image_cls_loss, 0),
- decoded_boxlist_list=[proposal_boxlist])
-
- def restore_map(self, from_detection_checkpoint=True):
- """Returns a map of variables to load from a foreign checkpoint.
-
- See parent class for details.
-
- Args:
- from_detection_checkpoint: whether to restore from a full detection
- checkpoint (with compatible variable names) or to restore from a
- classification checkpoint for initialization prior to training.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- if not from_detection_checkpoint:
- return self._feature_extractor.restore_from_classification_checkpoint_fn(
- self.first_stage_feature_extractor_scope,
- self.second_stage_feature_extractor_scope)
-
- variables_to_restore = tf.global_variables()
- variables_to_restore.append(slim.get_or_create_global_step())
- # Only load feature extractor variables to be consistent with loading from
- # a classification checkpoint.
- feature_extractor_variables = tf.contrib.framework.filter_variables(
- variables_to_restore,
- include_patterns=[self.first_stage_feature_extractor_scope,
- self.second_stage_feature_extractor_scope])
- return {var.op.name: var for var in feature_extractor_variables}
diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py b/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
deleted file mode 100644
index b31a22db..00000000
--- a/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
-
-
-class FasterRCNNMetaArchTest(
- faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase):
-
- def test_postprocess_second_stage_only_inference_mode_with_masks(self):
- model = self._build_model(
- is_training=False, first_stage_only=False, second_stage_batch_size=6)
-
- batch_size = 2
- total_num_padded_proposals = batch_size * model.max_num_proposals
- proposal_boxes = tf.constant(
- [[[1, 1, 2, 3],
- [0, 0, 1, 1],
- [.5, .5, .6, .6],
- 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
- [[2, 3, 6, 8],
- [1, 2, 5, 3],
- 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
- num_proposals = tf.constant([3, 2], dtype=tf.int32)
- refined_box_encodings = tf.zeros(
- [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32)
- class_predictions_with_background = tf.ones(
- [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)
-
- mask_height = 2
- mask_width = 2
- mask_predictions = 30. * tf.ones(
- [total_num_padded_proposals, model.num_classes,
- mask_height, mask_width], dtype=tf.float32)
- exp_detection_masks = np.array([[[[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[1, 1], [1, 1]]],
- [[[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[1, 1], [1, 1]],
- [[0, 0], [0, 0]]]])
-
- detections = model.postprocess({
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'num_proposals': num_proposals,
- 'proposal_boxes': proposal_boxes,
- 'image_shape': image_shape,
- 'mask_predictions': mask_predictions
- })
- with self.test_session() as sess:
- detections_out = sess.run(detections)
- self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
- self.assertAllClose(detections_out['detection_scores'],
- [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
- self.assertAllClose(detections_out['detection_classes'],
- [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
- self.assertAllClose(detections_out['num_detections'], [5, 4])
- self.assertAllClose(detections_out['detection_masks'],
- exp_detection_masks)
-
- def _get_box_classifier_features_shape(self,
- image_size,
- batch_size,
- max_num_proposals,
- initial_crop_size,
- maxpool_stride,
- num_features):
- return (batch_size * max_num_proposals,
- initial_crop_size/maxpool_stride,
- initial_crop_size/maxpool_stride,
- num_features)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py b/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
deleted file mode 100644
index 1e84dad3..00000000
--- a/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ /dev/null
@@ -1,1257 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
-import numpy as np
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.anchor_generators import grid_anchor_generator
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.builders import post_processing_builder
-from object_detection.core import losses
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.protos import box_predictor_pb2
-from object_detection.protos import hyperparams_pb2
-from object_detection.protos import post_processing_pb2
-
-slim = tf.contrib.slim
-BOX_CODE_SIZE = 4
-
-
-class FakeFasterRCNNFeatureExtractor(
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
- """Fake feature extracture to use in tests."""
-
- def __init__(self):
- super(FakeFasterRCNNFeatureExtractor, self).__init__(
- is_training=False,
- first_stage_features_stride=32,
- reuse_weights=None,
- weight_decay=0.0)
-
- def preprocess(self, resized_inputs):
- return tf.identity(resized_inputs)
-
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- with tf.variable_scope('mock_model'):
- return 0 * slim.conv2d(preprocessed_inputs,
- num_outputs=3, kernel_size=1, scope='layer1')
-
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- with tf.variable_scope('mock_model'):
- return 0 * slim.conv2d(proposal_feature_maps,
- num_outputs=3, kernel_size=1, scope='layer2')
-
-
-class FasterRCNNMetaArchTestBase(tf.test.TestCase):
- """Base class to test Faster R-CNN and R-FCN meta architectures."""
-
- def _build_arg_scope_with_hyperparams(self,
- hyperparams_text_proto,
- is_training):
- hyperparams = hyperparams_pb2.Hyperparams()
- text_format.Merge(hyperparams_text_proto, hyperparams)
- return hyperparams_builder.build(hyperparams, is_training=is_training)
-
- def _get_second_stage_box_predictor_text_proto(self):
- box_predictor_text_proto = """
- mask_rcnn_box_predictor {
- fc_hyperparams {
- op: FC
- activation: NONE
- regularizer {
- l2_regularizer {
- weight: 0.0005
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- """
- return box_predictor_text_proto
-
- def _get_second_stage_box_predictor(self, num_classes, is_training):
- box_predictor_proto = box_predictor_pb2.BoxPredictor()
- text_format.Merge(self._get_second_stage_box_predictor_text_proto(),
- box_predictor_proto)
- return box_predictor_builder.build(
- hyperparams_builder.build,
- box_predictor_proto,
- num_classes=num_classes,
- is_training=is_training)
-
- def _get_model(self, box_predictor, **common_kwargs):
- return faster_rcnn_meta_arch.FasterRCNNMetaArch(
- initial_crop_size=3,
- maxpool_kernel_size=1,
- maxpool_stride=1,
- second_stage_mask_rcnn_box_predictor=box_predictor,
- **common_kwargs)
-
- def _build_model(self,
- is_training,
- first_stage_only,
- second_stage_batch_size,
- first_stage_max_proposals=8,
- num_classes=2,
- hard_mining=False,
- softmax_second_stage_classification_loss=True):
-
- def image_resizer_fn(image):
- return tf.identity(image)
-
- # anchors in this test are designed so that a subset of anchors are inside
- # the image and a subset of anchors are outside.
- first_stage_anchor_scales = (0.001, 0.005, 0.1)
- first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
- first_stage_anchor_strides = (1, 1)
- first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
- first_stage_anchor_scales,
- first_stage_anchor_aspect_ratios,
- anchor_stride=first_stage_anchor_strides)
-
- fake_feature_extractor = FakeFasterRCNNFeatureExtractor()
-
- first_stage_box_predictor_hyperparams_text_proto = """
- op: CONV
- activation: RELU
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- }
- }
- """
- first_stage_box_predictor_arg_scope = (
- self._build_arg_scope_with_hyperparams(
- first_stage_box_predictor_hyperparams_text_proto, is_training))
-
- first_stage_box_predictor_kernel_size = 3
- first_stage_atrous_rate = 1
- first_stage_box_predictor_depth = 512
- first_stage_minibatch_size = 3
- first_stage_positive_balance_fraction = .5
-
- first_stage_nms_score_threshold = -1.0
- first_stage_nms_iou_threshold = 1.0
- first_stage_max_proposals = first_stage_max_proposals
-
- first_stage_localization_loss_weight = 1.0
- first_stage_objectness_loss_weight = 1.0
-
- post_processing_text_proto = """
- batch_non_max_suppression {
- score_threshold: -20.0
- iou_threshold: 1.0
- max_detections_per_class: 5
- max_total_detections: 5
- }
- """
- post_processing_config = post_processing_pb2.PostProcessing()
- text_format.Merge(post_processing_text_proto, post_processing_config)
- second_stage_non_max_suppression_fn, _ = post_processing_builder.build(
- post_processing_config)
- second_stage_balance_fraction = 1.0
-
- second_stage_score_conversion_fn = tf.identity
- second_stage_localization_loss_weight = 1.0
- second_stage_classification_loss_weight = 1.0
- if softmax_second_stage_classification_loss:
- second_stage_classification_loss = (
- losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True))
- else:
- second_stage_classification_loss = (
- losses.WeightedSigmoidClassificationLoss(anchorwise_output=True))
-
- hard_example_miner = None
- if hard_mining:
- hard_example_miner = losses.HardExampleMiner(
- num_hard_examples=1,
- iou_threshold=0.99,
- loss_type='both',
- cls_loss_weight=second_stage_classification_loss_weight,
- loc_loss_weight=second_stage_localization_loss_weight,
- max_negatives_per_positive=None)
-
- common_kwargs = {
- 'is_training': is_training,
- 'num_classes': num_classes,
- 'image_resizer_fn': image_resizer_fn,
- 'feature_extractor': fake_feature_extractor,
- 'first_stage_only': first_stage_only,
- 'first_stage_anchor_generator': first_stage_anchor_generator,
- 'first_stage_atrous_rate': first_stage_atrous_rate,
- 'first_stage_box_predictor_arg_scope':
- first_stage_box_predictor_arg_scope,
- 'first_stage_box_predictor_kernel_size':
- first_stage_box_predictor_kernel_size,
- 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
- 'first_stage_minibatch_size': first_stage_minibatch_size,
- 'first_stage_positive_balance_fraction':
- first_stage_positive_balance_fraction,
- 'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
- 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
- 'first_stage_max_proposals': first_stage_max_proposals,
- 'first_stage_localization_loss_weight':
- first_stage_localization_loss_weight,
- 'first_stage_objectness_loss_weight':
- first_stage_objectness_loss_weight,
- 'second_stage_batch_size': second_stage_batch_size,
- 'second_stage_balance_fraction': second_stage_balance_fraction,
- 'second_stage_non_max_suppression_fn':
- second_stage_non_max_suppression_fn,
- 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
- 'second_stage_localization_loss_weight':
- second_stage_localization_loss_weight,
- 'second_stage_classification_loss_weight':
- second_stage_classification_loss_weight,
- 'second_stage_classification_loss':
- second_stage_classification_loss,
- 'hard_example_miner': hard_example_miner}
-
- return self._get_model(self._get_second_stage_box_predictor(
- num_classes=num_classes, is_training=is_training), **common_kwargs)
-
- def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only(
- self):
- test_graph = tf.Graph()
- with test_graph.as_default():
- model = self._build_model(
- is_training=False, first_stage_only=True, second_stage_batch_size=2)
- batch_size = 2
- height = 10
- width = 12
- input_image_shape = (batch_size, height, width, 3)
-
- preprocessed_inputs = tf.placeholder(dtype=tf.float32,
- shape=(batch_size, None, None, 3))
- prediction_dict = model.predict(preprocessed_inputs)
-
- # In inference mode, anchors are clipped to the image window, but not
- # pruned. Since MockFasterRCNN.extract_proposal_features returns a
- # tensor with the same shape as its input, the expected number of anchors
- # is height * width * the number of anchors per location (i.e. 3x3).
- expected_num_anchors = height * width * 3 * 3
- expected_output_keys = set([
- 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape',
- 'rpn_box_encodings', 'rpn_objectness_predictions_with_background',
- 'anchors'])
- expected_output_shapes = {
- 'rpn_box_predictor_features': (batch_size, height, width, 512),
- 'rpn_features_to_crop': (batch_size, height, width, 3),
- 'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
- 'rpn_objectness_predictions_with_background':
- (batch_size, expected_num_anchors, 2),
- 'anchors': (expected_num_anchors, 4)
- }
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- prediction_out = sess.run(prediction_dict,
- feed_dict={
- preprocessed_inputs:
- np.zeros(input_image_shape)
- })
-
- self.assertEqual(set(prediction_out.keys()), expected_output_keys)
-
- self.assertAllEqual(prediction_out['image_shape'], input_image_shape)
- for output_key, expected_shape in expected_output_shapes.items():
- self.assertAllEqual(prediction_out[output_key].shape, expected_shape)
-
- # Check that anchors are clipped to window.
- anchors = prediction_out['anchors']
- self.assertTrue(np.all(np.greater_equal(anchors, 0)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
-
- def test_predict_gives_valid_anchors_in_training_mode_first_stage_only(self):
- test_graph = tf.Graph()
- with test_graph.as_default():
- model = self._build_model(
- is_training=True, first_stage_only=True, second_stage_batch_size=2)
- batch_size = 2
- height = 10
- width = 12
- input_image_shape = (batch_size, height, width, 3)
- preprocessed_inputs = tf.placeholder(dtype=tf.float32,
- shape=(batch_size, None, None, 3))
- prediction_dict = model.predict(preprocessed_inputs)
-
- expected_output_keys = set([
- 'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape',
- 'rpn_box_encodings', 'rpn_objectness_predictions_with_background',
- 'anchors'])
- # At training time, anchors that exceed image bounds are pruned. Thus
- # the `expected_num_anchors` in the above inference mode test is now
- # a strict upper bound on the number of anchors.
- num_anchors_strict_upper_bound = height * width * 3 * 3
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- prediction_out = sess.run(prediction_dict,
- feed_dict={
- preprocessed_inputs:
- np.zeros(input_image_shape)
- })
-
- self.assertEqual(set(prediction_out.keys()), expected_output_keys)
- self.assertAllEqual(prediction_out['image_shape'], input_image_shape)
-
- # Check that anchors have less than the upper bound and
- # are clipped to window.
- anchors = prediction_out['anchors']
- self.assertTrue(len(anchors.shape) == 2 and anchors.shape[1] == 4)
- num_anchors_out = anchors.shape[0]
- self.assertTrue(num_anchors_out < num_anchors_strict_upper_bound)
-
- self.assertTrue(np.all(np.greater_equal(anchors, 0)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
- self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
-
- self.assertAllEqual(prediction_out['rpn_box_encodings'].shape,
- (batch_size, num_anchors_out, 4))
- self.assertAllEqual(
- prediction_out['rpn_objectness_predictions_with_background'].shape,
- (batch_size, num_anchors_out, 2))
-
- def test_predict_correct_shapes_in_inference_mode_both_stages(
- self):
- batch_size = 2
- image_size = 10
- max_num_proposals = 8
- initial_crop_size = 3
- maxpool_stride = 1
-
- input_shapes = [(batch_size, image_size, image_size, 3),
- (None, image_size, image_size, 3),
- (batch_size, None, None, 3),
- (None, None, None, 3)]
- expected_num_anchors = image_size * image_size * 3 * 3
- expected_shapes = {
- 'rpn_box_predictor_features':
- (2, image_size, image_size, 512),
- 'rpn_features_to_crop': (2, image_size, image_size, 3),
- 'image_shape': (4,),
- 'rpn_box_encodings': (2, expected_num_anchors, 4),
- 'rpn_objectness_predictions_with_background':
- (2, expected_num_anchors, 2),
- 'anchors': (expected_num_anchors, 4),
- 'refined_box_encodings': (2 * max_num_proposals, 2, 4),
- 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
- 'num_proposals': (2,),
- 'proposal_boxes': (2, max_num_proposals, 4),
- 'proposal_boxes_normalized': (2, max_num_proposals, 4),
- 'box_classifier_features':
- self._get_box_classifier_features_shape(image_size,
- batch_size,
- max_num_proposals,
- initial_crop_size,
- maxpool_stride,
- 3)
- }
-
- for input_shape in input_shapes:
- test_graph = tf.Graph()
- with test_graph.as_default():
- model = self._build_model(
- is_training=False, first_stage_only=False,
- second_stage_batch_size=2)
- preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
- result_tensor_dict = model.predict(preprocessed_inputs)
- init_op = tf.global_variables_initializer()
- with self.test_session(graph=test_graph) as sess:
- sess.run(init_op)
- tensor_dict_out = sess.run(result_tensor_dict, feed_dict={
- preprocessed_inputs:
- np.zeros((batch_size, image_size, image_size, 3))})
- self.assertEqual(set(tensor_dict_out.keys()),
- set(expected_shapes.keys()))
- for key in expected_shapes:
- self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
-
- def test_predict_gives_correct_shapes_in_train_mode_both_stages(self):
- test_graph = tf.Graph()
- with test_graph.as_default():
- model = self._build_model(
- is_training=True, first_stage_only=False, second_stage_batch_size=7)
-
- batch_size = 2
- image_size = 10
- max_num_proposals = 7
- initial_crop_size = 3
- maxpool_stride = 1
-
- image_shape = (batch_size, image_size, image_size, 3)
- preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32)
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
- tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [
- tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
- tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
-
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
-
- result_tensor_dict = model.predict(preprocessed_inputs)
- expected_shapes = {
- 'rpn_box_predictor_features':
- (2, image_size, image_size, 512),
- 'rpn_features_to_crop': (2, image_size, image_size, 3),
- 'image_shape': (4,),
- 'refined_box_encodings': (2 * max_num_proposals, 2, 4),
- 'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
- 'num_proposals': (2,),
- 'proposal_boxes': (2, max_num_proposals, 4),
- 'proposal_boxes_normalized': (2, max_num_proposals, 4),
- 'box_classifier_features':
- self._get_box_classifier_features_shape(image_size,
- batch_size,
- max_num_proposals,
- initial_crop_size,
- maxpool_stride,
- 3)
- }
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- tensor_dict_out = sess.run(result_tensor_dict)
- self.assertEqual(set(tensor_dict_out.keys()),
- set(expected_shapes.keys()).union(set([
- 'rpn_box_encodings',
- 'rpn_objectness_predictions_with_background',
- 'anchors'])))
- for key in expected_shapes:
- self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
-
- anchors_shape_out = tensor_dict_out['anchors'].shape
- self.assertEqual(2, len(anchors_shape_out))
- self.assertEqual(4, anchors_shape_out[1])
- num_anchors_out = anchors_shape_out[0]
- self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape,
- (2, num_anchors_out, 4))
- self.assertAllEqual(
- tensor_dict_out['rpn_objectness_predictions_with_background'].shape,
- (2, num_anchors_out, 2))
-
- def test_postprocess_first_stage_only_inference_mode(self):
- model = self._build_model(
- is_training=False, first_stage_only=True, second_stage_batch_size=6)
- batch_size = 2
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size, anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [10, -10],
- [10, -11],
- [-10, 12]],
- [[10, -10],
- [-10, 13],
- [-10, 12],
- [10, -11]]], dtype=tf.float32)
- rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
- proposals = model.postprocess({
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'rpn_features_to_crop': rpn_features_to_crop,
- 'anchors': anchors,
- 'image_shape': image_shape})
- expected_proposal_boxes = [
- [[0, 0, .5, .5], [.5, .5, 1, 1], [0, .5, .5, 1], [.5, 0, 1.0, .5]]
- + 4 * [4 * [0]],
- [[0, .5, .5, 1], [.5, 0, 1.0, .5], [0, 0, .5, .5], [.5, .5, 1, 1]]
- + 4 * [4 * [0]]]
- expected_proposal_scores = [[1, 1, 0, 0, 0, 0, 0, 0],
- [1, 1, 0, 0, 0, 0, 0, 0]]
- expected_num_proposals = [4, 4]
-
- expected_output_keys = set(['detection_boxes', 'detection_scores',
- 'num_detections'])
- self.assertEqual(set(proposals.keys()), expected_output_keys)
- with self.test_session() as sess:
- proposals_out = sess.run(proposals)
- self.assertAllClose(proposals_out['detection_boxes'],
- expected_proposal_boxes)
- self.assertAllClose(proposals_out['detection_scores'],
- expected_proposal_scores)
- self.assertAllEqual(proposals_out['num_detections'],
- expected_num_proposals)
-
- def test_postprocess_first_stage_only_train_mode(self):
- model = self._build_model(
- is_training=True, first_stage_only=True, second_stage_batch_size=2)
- batch_size = 2
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size, anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [-10, 12],
- [-10, 11],
- [-10, 10]],
- [[-10, 13],
- [-10, 12],
- [-10, 11],
- [-10, 10]]], dtype=tf.float32)
- rpn_features_to_crop = tf.ones((batch_size, 8, 8, 10), dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
- tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
- tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
-
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
- proposals = model.postprocess({
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'rpn_features_to_crop': rpn_features_to_crop,
- 'anchors': anchors,
- 'image_shape': image_shape})
- expected_proposal_boxes = [
- [[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]]
- expected_proposal_scores = [[1, 1],
- [1, 1]]
- expected_num_proposals = [2, 2]
-
- expected_output_keys = set(['detection_boxes', 'detection_scores',
- 'num_detections'])
- self.assertEqual(set(proposals.keys()), expected_output_keys)
-
- with self.test_session() as sess:
- proposals_out = sess.run(proposals)
- self.assertAllClose(proposals_out['detection_boxes'],
- expected_proposal_boxes)
- self.assertAllClose(proposals_out['detection_scores'],
- expected_proposal_scores)
- self.assertAllEqual(proposals_out['num_detections'],
- expected_num_proposals)
-
- def test_postprocess_second_stage_only_inference_mode(self):
- num_proposals_shapes = [(2), (None)]
- refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)]
- class_predictions_with_background_shapes = [(16, 3), (None, 3)]
- proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)]
- batch_size = 2
- image_shape = np.array((2, 36, 48, 3), dtype=np.int32)
- for (num_proposals_shape, refined_box_encoding_shape,
- class_predictions_with_background_shape,
- proposal_boxes_shape) in zip(num_proposals_shapes,
- refined_box_encodings_shapes,
- class_predictions_with_background_shapes,
- proposal_boxes_shapes):
- tf_graph = tf.Graph()
- with tf_graph.as_default():
- model = self._build_model(
- is_training=False, first_stage_only=False,
- second_stage_batch_size=6)
- total_num_padded_proposals = batch_size * model.max_num_proposals
- proposal_boxes = np.array(
- [[[1, 1, 2, 3],
- [0, 0, 1, 1],
- [.5, .5, .6, .6],
- 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
- [[2, 3, 6, 8],
- [1, 2, 5, 3],
- 4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]])
- num_proposals = np.array([3, 2], dtype=np.int32)
- refined_box_encodings = np.zeros(
- [total_num_padded_proposals, model.num_classes, 4])
- class_predictions_with_background = np.ones(
- [total_num_padded_proposals, model.num_classes+1])
-
- num_proposals_placeholder = tf.placeholder(tf.int32,
- shape=num_proposals_shape)
- refined_box_encodings_placeholder = tf.placeholder(
- tf.float32, shape=refined_box_encoding_shape)
- class_predictions_with_background_placeholder = tf.placeholder(
- tf.float32, shape=class_predictions_with_background_shape)
- proposal_boxes_placeholder = tf.placeholder(
- tf.float32, shape=proposal_boxes_shape)
- image_shape_placeholder = tf.placeholder(tf.int32, shape=(4))
-
- detections = model.postprocess({
- 'refined_box_encodings': refined_box_encodings_placeholder,
- 'class_predictions_with_background':
- class_predictions_with_background_placeholder,
- 'num_proposals': num_proposals_placeholder,
- 'proposal_boxes': proposal_boxes_placeholder,
- 'image_shape': image_shape_placeholder,
- })
- with self.test_session(graph=tf_graph) as sess:
- detections_out = sess.run(
- detections,
- feed_dict={
- refined_box_encodings_placeholder: refined_box_encodings,
- class_predictions_with_background_placeholder:
- class_predictions_with_background,
- num_proposals_placeholder: num_proposals,
- proposal_boxes_placeholder: proposal_boxes,
- image_shape_placeholder: image_shape
- })
- self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
- self.assertAllClose(detections_out['detection_scores'],
- [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
- self.assertAllClose(detections_out['detection_classes'],
- [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
- self.assertAllClose(detections_out['num_detections'], [5, 4])
-
- def test_preprocess_preserves_input_shapes(self):
- image_shapes = [(3, None, None, 3),
- (None, 10, 10, 3),
- (None, None, None, 3)]
- for image_shape in image_shapes:
- model = self._build_model(
- is_training=False, first_stage_only=False, second_stage_batch_size=6)
- image_placeholder = tf.placeholder(tf.float32, shape=image_shape)
- preprocessed_inputs = model.preprocess(image_placeholder)
- self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape)
-
- # TODO: Split test into two - with and without masks.
- def test_loss_first_stage_only_mode(self):
- model = self._build_model(
- is_training=True, first_stage_only=True, second_stage_batch_size=6)
- batch_size = 2
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
-
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [10, -10],
- [10, -11],
- [-10, 12]],
- [[10, -10],
- [-10, 13],
- [-10, 12],
- [10, -11]]], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
- tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
- tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
-
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
- loss_dict = model.loss(prediction_dict)
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0)
- self.assertTrue('second_stage_localization_loss' not in loss_dict_out)
- self.assertTrue('second_stage_classification_loss' not in loss_dict_out)
-
- # TODO: Split test into two - with and without masks.
- def test_loss_full(self):
- model = self._build_model(
- is_training=True, first_stage_only=False, second_stage_batch_size=6)
- batch_size = 2
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [10, -10],
- [10, -11],
- [-10, 12]],
- [[10, -10],
- [-10, 13],
- [-10, 12],
- [10, -11]]], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- num_proposals = tf.constant([6, 6], dtype=tf.int32)
- proposal_boxes = tf.constant(
- 2 * [[[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32],
- [0, 0, 16, 16],
- [0, 16, 16, 32]]], dtype=tf.float32)
- refined_box_encodings = tf.zeros(
- (batch_size * model.max_num_proposals,
- model.num_classes,
- BOX_CODE_SIZE), dtype=tf.float32)
- class_predictions_with_background = tf.constant(
- [[-10, 10, -10], # first image
- [10, -10, -10],
- [10, -10, -10],
- [-10, -10, 10],
- [-10, 10, -10],
- [10, -10, -10],
- [10, -10, -10], # second image
- [-10, 10, -10],
- [-10, 10, -10],
- [10, -10, -10],
- [10, -10, -10],
- [-10, 10, -10]], dtype=tf.float32)
-
- mask_predictions_logits = 20 * tf.ones((batch_size *
- model.max_num_proposals,
- model.num_classes,
- 14, 14),
- dtype=tf.float32)
-
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
- tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
- tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
-
- # Set all elements of groundtruth mask to 1.0. In this case all proposal
- # crops of the groundtruth masks should return a mask that covers the entire
- # proposal. Thus, if mask_predictions_logits element values are all greater
- # than 20, the loss should be zero.
- groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)),
- dtype=tf.float32),
- tf.convert_to_tensor(np.ones((2, 32, 32)),
- dtype=tf.float32)]
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors,
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'proposal_boxes': proposal_boxes,
- 'num_proposals': num_proposals,
- 'mask_predictions': mask_predictions_logits
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list)
- loss_dict = model.loss(prediction_dict)
-
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0)
-
- def test_loss_full_zero_padded_proposals(self):
- model = self._build_model(
- is_training=True, first_stage_only=False, second_stage_batch_size=6)
- batch_size = 1
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [10, -10],
- [10, -11],
- [10, -12]],], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- # box_classifier_batch_size is 6, but here we assume that the number of
- # actual proposals (not counting zero paddings) is fewer (3).
- num_proposals = tf.constant([3], dtype=tf.int32)
- proposal_boxes = tf.constant(
- [[[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [0, 0, 0, 0], # begin paddings
- [0, 0, 0, 0],
- [0, 0, 0, 0]]], dtype=tf.float32)
-
- refined_box_encodings = tf.zeros(
- (batch_size * model.max_num_proposals,
- model.num_classes,
- BOX_CODE_SIZE), dtype=tf.float32)
- class_predictions_with_background = tf.constant(
- [[-10, 10, -10],
- [10, -10, -10],
- [10, -10, -10],
- [0, 0, 0], # begin paddings
- [0, 0, 0],
- [0, 0, 0]], dtype=tf.float32)
-
- mask_predictions_logits = 20 * tf.ones((batch_size *
- model.max_num_proposals,
- model.num_classes,
- 14, 14),
- dtype=tf.float32)
-
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32)]
-
- # Set all elements of groundtruth mask to 1.0. In this case all proposal
- # crops of the groundtruth masks should return a mask that covers the entire
- # proposal. Thus, if mask_predictions_logits element values are all greater
- # than 20, the loss should be zero.
- groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)),
- dtype=tf.float32)]
-
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors,
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'proposal_boxes': proposal_boxes,
- 'num_proposals': num_proposals,
- 'mask_predictions': mask_predictions_logits
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list)
- loss_dict = model.loss(prediction_dict)
-
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0)
-
- def test_loss_full_multiple_label_groundtruth(self):
- model = self._build_model(
- is_training=True, first_stage_only=False, second_stage_batch_size=6,
- softmax_second_stage_classification_loss=False)
- batch_size = 1
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant([
- [[-10, 13],
- [10, -10],
- [10, -11],
- [10, -12]],], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- # box_classifier_batch_size is 6, but here we assume that the number of
- # actual proposals (not counting zero paddings) is fewer (3).
- num_proposals = tf.constant([3], dtype=tf.int32)
- proposal_boxes = tf.constant(
- [[[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [0, 0, 0, 0], # begin paddings
- [0, 0, 0, 0],
- [0, 0, 0, 0]]], dtype=tf.float32)
-
- # second_stage_localization_loss should only be computed for predictions
- # that match groundtruth. For multiple label groundtruth boxes, the loss
- # should only be computed once for the label with the smaller index.
- refined_box_encodings = tf.constant(
- [[[0, 0, 0, 0], [1, 1, -1, -1]],
- [[1, 1, -1, -1], [1, 1, 1, 1]],
- [[1, 1, -1, -1], [1, 1, 1, 1]],
- [[1, 1, -1, -1], [1, 1, 1, 1]],
- [[1, 1, -1, -1], [1, 1, 1, 1]],
- [[1, 1, -1, -1], [1, 1, 1, 1]]], dtype=tf.float32)
- class_predictions_with_background = tf.constant(
- [[-100, 100, 100],
- [100, -100, -100],
- [100, -100, -100],
- [0, 0, 0], # begin paddings
- [0, 0, 0],
- [0, 0, 0]], dtype=tf.float32)
-
- mask_predictions_logits = 20 * tf.ones((batch_size *
- model.max_num_proposals,
- model.num_classes,
- 14, 14),
- dtype=tf.float32)
-
- groundtruth_boxes_list = [
- tf.constant([[0, 0, .5, .5]], dtype=tf.float32)]
- # Box contains two ground truth labels.
- groundtruth_classes_list = [tf.constant([[1, 1]], dtype=tf.float32)]
-
- # Set all elements of groundtruth mask to 1.0. In this case all proposal
- # crops of the groundtruth masks should return a mask that covers the entire
- # proposal. Thus, if mask_predictions_logits element values are all greater
- # than 20, the loss should be zero.
- groundtruth_masks_list = [tf.convert_to_tensor(np.ones((1, 32, 32)),
- dtype=tf.float32)]
-
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors,
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'proposal_boxes': proposal_boxes,
- 'num_proposals': num_proposals,
- 'mask_predictions': mask_predictions_logits
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list)
- loss_dict = model.loss(prediction_dict)
-
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['first_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_localization_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_mask_loss'], 0)
-
- def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(self):
- model = self._build_model(
- is_training=True, first_stage_only=False, second_stage_batch_size=6)
- batch_size = 2
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant(
- [[[-10, 13],
- [10, -10],
- [10, -11],
- [10, -12]],
- [[-10, 13],
- [10, -10],
- [10, -11],
- [10, -12]]], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- # box_classifier_batch_size is 6, but here we assume that the number of
- # actual proposals (not counting zero paddings) is fewer.
- num_proposals = tf.constant([3, 2], dtype=tf.int32)
- proposal_boxes = tf.constant(
- [[[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [0, 0, 0, 0], # begin paddings
- [0, 0, 0, 0],
- [0, 0, 0, 0]],
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [0, 0, 0, 0], # begin paddings
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]], dtype=tf.float32)
-
- refined_box_encodings = tf.zeros(
- (batch_size * model.max_num_proposals,
- model.num_classes,
- BOX_CODE_SIZE), dtype=tf.float32)
- class_predictions_with_background = tf.constant(
- [[-10, 10, -10], # first image
- [10, -10, -10],
- [10, -10, -10],
- [0, 0, 0], # begin paddings
- [0, 0, 0],
- [0, 0, 0],
- [-10, -10, 10], # second image
- [10, -10, -10],
- [0, 0, 0], # begin paddings
- [0, 0, 0],
- [0, 0, 0],
- [0, 0, 0],], dtype=tf.float32)
-
- # The first groundtruth box is 4/5 of the anchor size in both directions
- # experiencing a loss of:
- # 2 * SmoothL1(5 * log(4/5)) / num_proposals
- # = 2 * (abs(5 * log(1/2)) - .5) / 3
- # The second groundtruth box is identical to the prediction and thus
- # experiences zero loss.
- # Total average loss is (abs(5 * log(1/2)) - .5) / 3.
- groundtruth_boxes_list = [
- tf.constant([[0.05, 0.05, 0.45, 0.45]], dtype=tf.float32),
- tf.constant([[0.0, 0.0, 0.5, 0.5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0]], dtype=tf.float32),
- tf.constant([[0, 1]], dtype=tf.float32)]
- exp_loc_loss = (-5 * np.log(.8) - 0.5) / 3.0
-
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors,
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'proposal_boxes': proposal_boxes,
- 'num_proposals': num_proposals
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
- loss_dict = model.loss(prediction_dict)
-
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['first_stage_localization_loss'],
- exp_loc_loss)
- self.assertAllClose(loss_dict_out['first_stage_objectness_loss'], 0)
- self.assertAllClose(loss_dict_out['second_stage_localization_loss'],
- exp_loc_loss)
- self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
-
- def test_loss_with_hard_mining(self):
- model = self._build_model(is_training=True,
- first_stage_only=False,
- second_stage_batch_size=None,
- first_stage_max_proposals=6,
- hard_mining=True)
- batch_size = 1
- anchors = tf.constant(
- [[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [16, 16, 32, 32]], dtype=tf.float32)
- rpn_box_encodings = tf.zeros(
- [batch_size,
- anchors.get_shape().as_list()[0],
- BOX_CODE_SIZE], dtype=tf.float32)
- # use different numbers for the objectness category to break ties in
- # order of boxes returned by NMS
- rpn_objectness_predictions_with_background = tf.constant(
- [[[-10, 13],
- [-10, 12],
- [10, -11],
- [10, -12]]], dtype=tf.float32)
- image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-
- # box_classifier_batch_size is 6, but here we assume that the number of
- # actual proposals (not counting zero paddings) is fewer (3).
- num_proposals = tf.constant([3], dtype=tf.int32)
- proposal_boxes = tf.constant(
- [[[0, 0, 16, 16],
- [0, 16, 16, 32],
- [16, 0, 32, 16],
- [0, 0, 0, 0], # begin paddings
- [0, 0, 0, 0],
- [0, 0, 0, 0]]], dtype=tf.float32)
-
- refined_box_encodings = tf.zeros(
- (batch_size * model.max_num_proposals,
- model.num_classes,
- BOX_CODE_SIZE), dtype=tf.float32)
- class_predictions_with_background = tf.constant(
- [[-10, 10, -10], # first image
- [-10, -10, 10],
- [10, -10, -10],
- [0, 0, 0], # begin paddings
- [0, 0, 0],
- [0, 0, 0]], dtype=tf.float32)
-
- # The first groundtruth box is 4/5 of the anchor size in both directions
- # experiencing a loss of:
- # 2 * SmoothL1(5 * log(4/5)) / num_proposals
- # = 2 * (abs(5 * log(1/2)) - .5) / 3
- # The second groundtruth box is 46/50 of the anchor size in both directions
- # experiencing a loss of:
- # 2 * SmoothL1(5 * log(42/50)) / num_proposals
- # = 2 * (.5(5 * log(.92))^2 - .5) / 3.
- # Since the first groundtruth box experiences greater loss, and we have
- # set num_hard_examples=1 in the HardMiner, the final localization loss
- # corresponds to that of the first groundtruth box.
- groundtruth_boxes_list = [
- tf.constant([[0.05, 0.05, 0.45, 0.45],
- [0.02, 0.52, 0.48, 0.98],], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32)]
- exp_loc_loss = 2 * (-5 * np.log(.8) - 0.5) / 3.0
-
- prediction_dict = {
- 'rpn_box_encodings': rpn_box_encodings,
- 'rpn_objectness_predictions_with_background':
- rpn_objectness_predictions_with_background,
- 'image_shape': image_shape,
- 'anchors': anchors,
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'proposal_boxes': proposal_boxes,
- 'num_proposals': num_proposals
- }
- model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
- loss_dict = model.loss(prediction_dict)
-
- with self.test_session() as sess:
- loss_dict_out = sess.run(loss_dict)
- self.assertAllClose(loss_dict_out['second_stage_localization_loss'],
- exp_loc_loss)
- self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
-
- def test_restore_map_for_classification_ckpt(self):
- # Define mock tensorflow classification graph and save variables.
- test_graph_classification = tf.Graph()
- with test_graph_classification.as_default():
- image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3])
- with tf.variable_scope('mock_model'):
- net = slim.conv2d(image, num_outputs=3, kernel_size=1, scope='layer1')
- slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2')
-
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver()
- save_path = self.get_temp_dir()
- with self.test_session() as sess:
- sess.run(init_op)
- saved_model_path = saver.save(sess, save_path)
-
- # Create tensorflow detection graph and load variables from
- # classification checkpoint.
- test_graph_detection = tf.Graph()
- with test_graph_detection.as_default():
- model = self._build_model(
- is_training=False, first_stage_only=False, second_stage_batch_size=6)
-
- inputs_shape = (2, 20, 20, 3)
- inputs = tf.to_float(tf.random_uniform(
- inputs_shape, minval=0, maxval=255, dtype=tf.int32))
- preprocessed_inputs = model.preprocess(inputs)
- prediction_dict = model.predict(preprocessed_inputs)
- model.postprocess(prediction_dict)
- var_map = model.restore_map(from_detection_checkpoint=False)
- self.assertIsInstance(var_map, dict)
- saver = tf.train.Saver(var_map)
- with self.test_session() as sess:
- saver.restore(sess, saved_model_path)
- for var in sess.run(tf.report_uninitialized_variables()):
- self.assertNotIn(model.first_stage_feature_extractor_scope, var.name)
- self.assertNotIn(model.second_stage_feature_extractor_scope,
- var.name)
-
- def test_restore_map_for_detection_ckpt(self):
- # Define first detection graph and save variables.
- test_graph_detection1 = tf.Graph()
- with test_graph_detection1.as_default():
- model = self._build_model(
- is_training=False, first_stage_only=False, second_stage_batch_size=6)
- inputs_shape = (2, 20, 20, 3)
- inputs = tf.to_float(tf.random_uniform(
- inputs_shape, minval=0, maxval=255, dtype=tf.int32))
- preprocessed_inputs = model.preprocess(inputs)
- prediction_dict = model.predict(preprocessed_inputs)
- model.postprocess(prediction_dict)
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver()
- save_path = self.get_temp_dir()
- with self.test_session() as sess:
- sess.run(init_op)
- saved_model_path = saver.save(sess, save_path)
-
- # Define second detection graph and restore variables.
- test_graph_detection2 = tf.Graph()
- with test_graph_detection2.as_default():
- model2 = self._build_model(is_training=False, first_stage_only=False,
- second_stage_batch_size=6, num_classes=42)
-
- inputs_shape2 = (2, 20, 20, 3)
- inputs2 = tf.to_float(tf.random_uniform(
- inputs_shape2, minval=0, maxval=255, dtype=tf.int32))
- preprocessed_inputs2 = model2.preprocess(inputs2)
- prediction_dict2 = model2.predict(preprocessed_inputs2)
- model2.postprocess(prediction_dict2)
- var_map = model2.restore_map(from_detection_checkpoint=True)
- self.assertIsInstance(var_map, dict)
- saver = tf.train.Saver(var_map)
- with self.test_session() as sess:
- saver.restore(sess, saved_model_path)
- for var in sess.run(tf.report_uninitialized_variables()):
- self.assertNotIn(model2.first_stage_feature_extractor_scope, var.name)
- self.assertNotIn(model2.second_stage_feature_extractor_scope,
- var.name)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/meta_architectures/rfcn_meta_arch.py b/object_detection/meta_architectures/rfcn_meta_arch.py
deleted file mode 100644
index a1154555..00000000
--- a/object_detection/meta_architectures/rfcn_meta_arch.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""R-FCN meta-architecture definition.
-
-R-FCN: Dai, Jifeng, et al. "R-FCN: Object Detection via Region-based
-Fully Convolutional Networks." arXiv preprint arXiv:1605.06409 (2016).
-
-The R-FCN meta architecture is similar to Faster R-CNN and only differs in the
-second stage. Hence this class inherits FasterRCNNMetaArch and overrides only
-the `_predict_second_stage` method.
-
-Similar to Faster R-CNN we allow for two modes: first_stage_only=True and
-first_stage_only=False. In the former setting, all of the user facing methods
-(e.g., predict, postprocess, loss) can be used as if the model consisted
-only of the RPN, returning class agnostic proposals (these can be thought of as
-approximate detections with no associated class information). In the latter
-setting, proposals are computed, then passed through a second stage
-"box classifier" to yield (multi-class) detections.
-
-Implementations of R-FCN models must define a new FasterRCNNFeatureExtractor and
-override three methods: `preprocess`, `_extract_proposal_features` (the first
-stage of the model), and `_extract_box_classifier_features` (the second stage of
-the model). Optionally, the `restore_fn` method can be overridden. See tests
-for an example.
-
-See notes in the documentation of Faster R-CNN meta-architecture as they all
-apply here.
-"""
-import tensorflow as tf
-
-from object_detection.core import box_predictor
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from object_detection.utils import ops
-
-
-class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
- """R-FCN Meta-architecture definition."""
-
- def __init__(self,
- is_training,
- num_classes,
- image_resizer_fn,
- feature_extractor,
- first_stage_only,
- first_stage_anchor_generator,
- first_stage_atrous_rate,
- first_stage_box_predictor_arg_scope,
- first_stage_box_predictor_kernel_size,
- first_stage_box_predictor_depth,
- first_stage_minibatch_size,
- first_stage_positive_balance_fraction,
- first_stage_nms_score_threshold,
- first_stage_nms_iou_threshold,
- first_stage_max_proposals,
- first_stage_localization_loss_weight,
- first_stage_objectness_loss_weight,
- second_stage_rfcn_box_predictor,
- second_stage_batch_size,
- second_stage_balance_fraction,
- second_stage_non_max_suppression_fn,
- second_stage_score_conversion_fn,
- second_stage_localization_loss_weight,
- second_stage_classification_loss_weight,
- second_stage_classification_loss,
- hard_example_miner,
- parallel_iterations=16):
- """RFCNMetaArch Constructor.
-
- Args:
- is_training: A boolean indicating whether the training version of the
- computation graph should be constructed.
- num_classes: Number of classes. Note that num_classes *does not*
- include the background category, so if groundtruth labels take values
- in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
- assigned classification targets can range from {0,... K}).
- image_resizer_fn: A callable for image resizing. This callable always
- takes a rank-3 image tensor (corresponding to a single image) and
- returns a rank-3 image tensor, possibly with new spatial dimensions.
- See builders/image_resizer_builder.py.
- feature_extractor: A FasterRCNNFeatureExtractor object.
- first_stage_only: Whether to construct only the Region Proposal Network
- (RPN) part of the model.
- first_stage_anchor_generator: An anchor_generator.AnchorGenerator object
- (note that currently we only support
- grid_anchor_generator.GridAnchorGenerator objects)
- first_stage_atrous_rate: A single integer indicating the atrous rate for
- the single convolution op which is applied to the `rpn_features_to_crop`
- tensor to obtain a tensor to be used for box prediction. Some feature
- extractors optionally allow for producing feature maps computed at
- denser resolutions. The atrous rate is used to compensate for the
- denser feature maps by using an effectively larger receptive field.
- (This should typically be set to 1).
- first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
- separable_conv2d and fully_connected ops for the RPN box predictor.
- first_stage_box_predictor_kernel_size: Kernel size to use for the
- convolution op just prior to RPN box predictions.
- first_stage_box_predictor_depth: Output depth for the convolution op
- just prior to RPN box predictions.
- first_stage_minibatch_size: The "batch size" to use for computing the
- objectness and location loss of the region proposal network. This
- "batch size" refers to the number of anchors selected as contributing
- to the loss function for any given image within the image batch and is
- only called "batch_size" due to terminology from the Faster R-CNN paper.
- first_stage_positive_balance_fraction: Fraction of positive examples
- per image for the RPN. The recommended value for Faster RCNN is 0.5.
- first_stage_nms_score_threshold: Score threshold for non max suppression
- for the Region Proposal Network (RPN). This value is expected to be in
- [0, 1] as it is applied directly after a softmax transformation. The
- recommended value for Faster R-CNN is 0.
- first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
- for performing Non-Max Suppression (NMS) on the boxes predicted by the
- Region Proposal Network (RPN).
- first_stage_max_proposals: Maximum number of boxes to retain after
- performing Non-Max Suppression (NMS) on the boxes predicted by the
- Region Proposal Network (RPN).
- first_stage_localization_loss_weight: A float
- first_stage_objectness_loss_weight: A float
- second_stage_rfcn_box_predictor: RFCN box predictor to use for
- second stage.
- second_stage_batch_size: The batch size used for computing the
- classification and refined location loss of the box classifier. This
- "batch size" refers to the number of proposals selected as contributing
- to the loss function for any given image within the image batch and is
- only called "batch_size" due to terminology from the Faster R-CNN paper.
- second_stage_balance_fraction: Fraction of positive examples to use
- per image for the box classifier. The recommended value for Faster RCNN
- is 0.25.
- second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
- callable that takes `boxes`, `scores`, optional `clip_window` and
- optional (kwarg) `mask` inputs (with all other inputs already set)
- and returns a dictionary containing tensors with keys:
- `detection_boxes`, `detection_scores`, `detection_classes`,
- `num_detections`, and (optionally) `detection_masks`. See
- `post_processing.batch_multiclass_non_max_suppression` for the type and
- shape of these tensors.
- second_stage_score_conversion_fn: Callable elementwise nonlinearity
- (that takes tensors as inputs and returns tensors). This is usually
- used to convert logits to probabilities.
- second_stage_localization_loss_weight: A float
- second_stage_classification_loss_weight: A float
- second_stage_classification_loss: A string indicating which loss function
- to use, supports 'softmax' and 'sigmoid'.
- hard_example_miner: A losses.HardExampleMiner object (can be None).
- parallel_iterations: (Optional) The number of iterations allowed to run
- in parallel for calls to tf.map_fn.
- Raises:
- ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
- ValueError: If first_stage_anchor_generator is not of type
- grid_anchor_generator.GridAnchorGenerator.
- """
- super(RFCNMetaArch, self).__init__(
- is_training,
- num_classes,
- image_resizer_fn,
- feature_extractor,
- first_stage_only,
- first_stage_anchor_generator,
- first_stage_atrous_rate,
- first_stage_box_predictor_arg_scope,
- first_stage_box_predictor_kernel_size,
- first_stage_box_predictor_depth,
- first_stage_minibatch_size,
- first_stage_positive_balance_fraction,
- first_stage_nms_score_threshold,
- first_stage_nms_iou_threshold,
- first_stage_max_proposals,
- first_stage_localization_loss_weight,
- first_stage_objectness_loss_weight,
- None, # initial_crop_size is not used in R-FCN
- None, # maxpool_kernel_size is not use in R-FCN
- None, # maxpool_stride is not use in R-FCN
- None, # fully_connected_box_predictor is not used in R-FCN.
- second_stage_batch_size,
- second_stage_balance_fraction,
- second_stage_non_max_suppression_fn,
- second_stage_score_conversion_fn,
- second_stage_localization_loss_weight,
- second_stage_classification_loss_weight,
- second_stage_classification_loss,
- 1.0, # second stage mask prediction loss weight isn't used in R-FCN.
- hard_example_miner,
- parallel_iterations)
-
- self._rfcn_box_predictor = second_stage_rfcn_box_predictor
-
- def _predict_second_stage(self, rpn_box_encodings,
- rpn_objectness_predictions_with_background,
- rpn_features,
- anchors,
- image_shape):
- """Predicts the output tensors from 2nd stage of FasterRCNN.
-
- Args:
- rpn_box_encodings: 4-D float tensor of shape
- [batch_size, num_valid_anchors, self._box_coder.code_size] containing
- predicted boxes.
- rpn_objectness_predictions_with_background: 2-D float tensor of shape
- [batch_size, num_valid_anchors, 2] containing class
- predictions (logits) for each of the anchors. Note that this
- tensor *includes* background class predictions (at class index 0).
- rpn_features: A 4-D float32 tensor with shape
- [batch_size, height, width, depth] representing image features from the
- RPN.
- anchors: 2-D float tensor of shape
- [num_anchors, self._box_coder.code_size].
- image_shape: A 1D int32 tensors of size [4] containing the image shape.
-
- Returns:
- prediction_dict: a dictionary holding "raw" prediction tensors:
- 1) refined_box_encodings: a 3-D tensor with shape
- [total_num_proposals, num_classes, 4] representing predicted
- (final) refined box encodings, where
- total_num_proposals=batch_size*self._max_num_proposals
- 2) class_predictions_with_background: a 3-D tensor with shape
- [total_num_proposals, num_classes + 1] containing class
- predictions (logits) for each of the anchors, where
- total_num_proposals=batch_size*self._max_num_proposals.
- Note that this tensor *includes* background class predictions
- (at class index 0).
- 3) num_proposals: An int32 tensor of shape [batch_size] representing the
- number of proposals generated by the RPN. `num_proposals` allows us
- to keep track of which entries are to be treated as zero paddings and
- which are not since we always pad the number of proposals to be
- `self.max_num_proposals` for each image.
- 4) proposal_boxes: A float32 tensor of shape
- [batch_size, self.max_num_proposals, 4] representing
- decoded proposal bounding boxes (in absolute coordinates).
- 5) proposal_boxes_normalized: A float32 tensor of shape
- [batch_size, self.max_num_proposals, 4] representing decoded proposal
- bounding boxes (in normalized coordinates). Can be used to override
- the boxes proposed by the RPN, thus enabling one to extract box
- classification and prediction for externally selected areas of the
- image.
- 6) box_classifier_features: a 4-D float32 tensor, of shape
- [batch_size, feature_map_height, feature_map_width, depth],
- representing the box classifier features.
- """
- proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
- rpn_box_encodings, rpn_objectness_predictions_with_background,
- anchors, image_shape)
-
- box_classifier_features = (
- self._feature_extractor.extract_box_classifier_features(
- rpn_features,
- scope=self.second_stage_feature_extractor_scope))
-
- box_predictions = self._rfcn_box_predictor.predict(
- box_classifier_features,
- num_predictions_per_location=1,
- scope=self.second_stage_box_predictor_scope,
- proposal_boxes=proposal_boxes_normalized)
- refined_box_encodings = tf.squeeze(
- box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
- class_predictions_with_background = tf.squeeze(
- box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
- axis=1)
-
- absolute_proposal_boxes = ops.normalized_to_image_coordinates(
- proposal_boxes_normalized, image_shape,
- parallel_iterations=self._parallel_iterations)
-
- prediction_dict = {
- 'refined_box_encodings': refined_box_encodings,
- 'class_predictions_with_background':
- class_predictions_with_background,
- 'num_proposals': num_proposals,
- 'proposal_boxes': absolute_proposal_boxes,
- 'box_classifier_features': box_classifier_features,
- 'proposal_boxes_normalized': proposal_boxes_normalized,
- }
- return prediction_dict
diff --git a/object_detection/meta_architectures/rfcn_meta_arch_test.py b/object_detection/meta_architectures/rfcn_meta_arch_test.py
deleted file mode 100644
index 829140ac..00000000
--- a/object_detection/meta_architectures/rfcn_meta_arch_test.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.meta_architectures.rfcn_meta_arch."""
-
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
-from object_detection.meta_architectures import rfcn_meta_arch
-
-
-class RFCNMetaArchTest(
- faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase):
-
- def _get_second_stage_box_predictor_text_proto(self):
- box_predictor_text_proto = """
- rfcn_box_predictor {
- conv_hyperparams {
- op: CONV
- activation: NONE
- regularizer {
- l2_regularizer {
- weight: 0.0005
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- """
- return box_predictor_text_proto
-
- def _get_model(self, box_predictor, **common_kwargs):
- return rfcn_meta_arch.RFCNMetaArch(
- second_stage_rfcn_box_predictor=box_predictor, **common_kwargs)
-
- def _get_box_classifier_features_shape(self,
- image_size,
- batch_size,
- max_num_proposals,
- initial_crop_size,
- maxpool_stride,
- num_features):
- return (batch_size, image_size, image_size, num_features)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/meta_architectures/ssd_meta_arch.py b/object_detection/meta_architectures/ssd_meta_arch.py
deleted file mode 100644
index f15cc4af..00000000
--- a/object_detection/meta_architectures/ssd_meta_arch.py
+++ /dev/null
@@ -1,700 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SSD Meta-architecture definition.
-
-General tensorflow implementation of convolutional Multibox/SSD detection
-models.
-"""
-from abc import abstractmethod
-
-import re
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_predictor as bpredictor
-from object_detection.core import model
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner
-from object_detection.utils import shape_utils
-from object_detection.utils import visualization_utils
-
-slim = tf.contrib.slim
-
-
-class SSDFeatureExtractor(object):
- """SSD Feature Extractor definition."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams,
- batch_norm_trainable=True,
- reuse_weights=None):
- """Constructor.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a small batch size
- (e.g. 1), it is desirable to disable batch norm update and use
- pretrained batch norm params.
- reuse_weights: whether to reuse variables. Default is None.
- """
- self._is_training = is_training
- self._depth_multiplier = depth_multiplier
- self._min_depth = min_depth
- self._pad_to_multiple = pad_to_multiple
- self._conv_hyperparams = conv_hyperparams
- self._batch_norm_trainable = batch_norm_trainable
- self._reuse_weights = reuse_weights
-
- @abstractmethod
- def preprocess(self, resized_inputs):
- """Preprocesses images for feature extraction (minus image resizing).
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- pass
-
- @abstractmethod
- def extract_features(self, preprocessed_inputs):
- """Extracts features from preprocessed inputs.
-
- This function is responsible for extracting feature maps from preprocessed
- images.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- """
- pass
-
-
-class SSDMetaArch(model.DetectionModel):
- """SSD Meta-architecture definition."""
-
- def __init__(self,
- is_training,
- anchor_generator,
- box_predictor,
- box_coder,
- feature_extractor,
- matcher,
- region_similarity_calculator,
- image_resizer_fn,
- non_max_suppression_fn,
- score_conversion_fn,
- classification_loss,
- localization_loss,
- classification_loss_weight,
- localization_loss_weight,
- normalize_loss_by_num_matches,
- hard_example_miner,
- add_summaries=True):
- """SSDMetaArch Constructor.
-
- TODO: group NMS parameters + score converter into a class and loss
- parameters into a class and write config protos for postprocessing
- and losses.
-
- Args:
- is_training: A boolean indicating whether the training version of the
- computation graph should be constructed.
- anchor_generator: an anchor_generator.AnchorGenerator object.
- box_predictor: a box_predictor.BoxPredictor object.
- box_coder: a box_coder.BoxCoder object.
- feature_extractor: a SSDFeatureExtractor object.
- matcher: a matcher.Matcher object.
- region_similarity_calculator: a
- region_similarity_calculator.RegionSimilarityCalculator object.
- image_resizer_fn: a callable for image resizing. This callable always
- takes a rank-3 image tensor (corresponding to a single image) and
- returns a rank-3 image tensor, possibly with new spatial dimensions.
- See builders/image_resizer_builder.py.
- non_max_suppression_fn: batch_multiclass_non_max_suppression
- callable that takes `boxes`, `scores` and optional `clip_window`
- inputs (with all other inputs already set) and returns a dictionary
- hold tensors with keys: `detection_boxes`, `detection_scores`,
- `detection_classes` and `num_detections`. See `post_processing.
- batch_multiclass_non_max_suppression` for the type and shape of these
- tensors.
- score_conversion_fn: callable elementwise nonlinearity (that takes tensors
- as inputs and returns tensors). This is usually used to convert logits
- to probabilities.
- classification_loss: an object_detection.core.losses.Loss object.
- localization_loss: a object_detection.core.losses.Loss object.
- classification_loss_weight: float
- localization_loss_weight: float
- normalize_loss_by_num_matches: boolean
- hard_example_miner: a losses.HardExampleMiner object (can be None)
- add_summaries: boolean (default: True) controlling whether summary ops
- should be added to tensorflow graph.
- """
- super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
- self._is_training = is_training
-
- # Needed for fine-tuning from classification checkpoints whose
- # variables do not have the feature extractor scope.
- self._extract_features_scope = 'FeatureExtractor'
-
- self._anchor_generator = anchor_generator
- self._box_predictor = box_predictor
-
- self._box_coder = box_coder
- self._feature_extractor = feature_extractor
- self._matcher = matcher
- self._region_similarity_calculator = region_similarity_calculator
-
- # TODO: handle agnostic mode and positive/negative class weights
- unmatched_cls_target = None
- unmatched_cls_target = tf.constant([1] + self.num_classes * [0], tf.float32)
- self._target_assigner = target_assigner.TargetAssigner(
- self._region_similarity_calculator,
- self._matcher,
- self._box_coder,
- positive_class_weight=1.0,
- negative_class_weight=1.0,
- unmatched_cls_target=unmatched_cls_target)
-
- self._classification_loss = classification_loss
- self._localization_loss = localization_loss
- self._classification_loss_weight = classification_loss_weight
- self._localization_loss_weight = localization_loss_weight
- self._normalize_loss_by_num_matches = normalize_loss_by_num_matches
- self._hard_example_miner = hard_example_miner
-
- self._image_resizer_fn = image_resizer_fn
- self._non_max_suppression_fn = non_max_suppression_fn
- self._score_conversion_fn = score_conversion_fn
-
- self._anchors = None
- self._add_summaries = add_summaries
-
- @property
- def anchors(self):
- if not self._anchors:
- raise RuntimeError('anchors have not been constructed yet!')
- if not isinstance(self._anchors, box_list.BoxList):
- raise RuntimeError('anchors should be a BoxList object, but is not.')
- return self._anchors
-
- def preprocess(self, inputs):
- """Feature-extractor specific preprocessing.
-
- See base class.
-
- Args:
- inputs: a [batch, height_in, width_in, channels] float tensor representing
- a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: a [batch, height_out, width_out, channels] float
- tensor representing a batch of images.
- Raises:
- ValueError: if inputs tensor does not have type tf.float32
- """
- if inputs.dtype is not tf.float32:
- raise ValueError('`preprocess` expects a tf.float32 tensor')
- with tf.name_scope('Preprocessor'):
- # TODO: revisit whether to always use batch size as the number of parallel
- # iterations vs allow for dynamic batching.
- resized_inputs = tf.map_fn(self._image_resizer_fn,
- elems=inputs,
- dtype=tf.float32)
- return self._feature_extractor.preprocess(resized_inputs)
-
- def predict(self, preprocessed_inputs):
- """Predicts unpostprocessed tensors from input tensor.
-
- This function takes an input batch of images and runs it through the forward
- pass of the network to yield unpostprocessesed predictions.
-
- A side effect of calling the predict method is that self._anchors is
- populated with a box_list.BoxList of anchors. These anchors must be
- constructed before the postprocess or loss functions can be called.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] image tensor.
-
- Returns:
- prediction_dict: a dictionary holding "raw" prediction tensors:
- 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- 2) class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions (at class index 0).
- 3) feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i].
- 4) anchors: 2-D float tensor of shape [num_anchors, 4] containing
- the generated anchors in normalized coordinates.
- """
- with tf.variable_scope(None, self._extract_features_scope,
- [preprocessed_inputs]):
- feature_maps = self._feature_extractor.extract_features(
- preprocessed_inputs)
- feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
- image_shape = tf.shape(preprocessed_inputs)
- self._anchors = self._anchor_generator.generate(
- feature_map_spatial_dims,
- im_height=image_shape[1],
- im_width=image_shape[2])
- (box_encodings, class_predictions_with_background
- ) = self._add_box_predictions_to_feature_maps(feature_maps)
- predictions_dict = {
- 'box_encodings': box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'feature_maps': feature_maps,
- 'anchors': self._anchors.get()
- }
- return predictions_dict
-
- def _add_box_predictions_to_feature_maps(self, feature_maps):
- """Adds box predictors to each feature map and returns concatenated results.
-
- Args:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
-
- Returns:
- box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions (at class index 0).
-
- Raises:
- RuntimeError: if the number of feature maps extracted via the
- extract_features method does not match the length of the
- num_anchors_per_locations list that was passed to the constructor.
- RuntimeError: if box_encodings from the box_predictor does not have
- shape of the form [batch_size, num_anchors, 1, code_size].
- """
- num_anchors_per_location_list = (
- self._anchor_generator.num_anchors_per_location())
- if len(feature_maps) != len(num_anchors_per_location_list):
- raise RuntimeError('the number of feature maps must match the '
- 'length of self.anchors.NumAnchorsPerLocation().')
- box_encodings_list = []
- cls_predictions_with_background_list = []
- for idx, (feature_map, num_anchors_per_location
- ) in enumerate(zip(feature_maps, num_anchors_per_location_list)):
- box_predictor_scope = 'BoxPredictor_{}'.format(idx)
- box_predictions = self._box_predictor.predict(feature_map,
- num_anchors_per_location,
- box_predictor_scope)
- box_encodings = box_predictions[bpredictor.BOX_ENCODINGS]
- cls_predictions_with_background = box_predictions[
- bpredictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
-
- box_encodings_shape = box_encodings.get_shape().as_list()
- if len(box_encodings_shape) != 4 or box_encodings_shape[2] != 1:
- raise RuntimeError('box_encodings from the box_predictor must be of '
- 'shape `[batch_size, num_anchors, 1, code_size]`; '
- 'actual shape', box_encodings_shape)
- box_encodings = tf.squeeze(box_encodings, axis=2)
- box_encodings_list.append(box_encodings)
- cls_predictions_with_background_list.append(
- cls_predictions_with_background)
-
- num_predictions = sum(
- [tf.shape(box_encodings)[1] for box_encodings in box_encodings_list])
- num_anchors = self.anchors.num_boxes()
- anchors_assert = tf.assert_equal(num_anchors, num_predictions, [
- 'Mismatch: number of anchors vs number of predictions', num_anchors,
- num_predictions
- ])
- with tf.control_dependencies([anchors_assert]):
- box_encodings = tf.concat(box_encodings_list, 1)
- class_predictions_with_background = tf.concat(
- cls_predictions_with_background_list, 1)
- return box_encodings, class_predictions_with_background
-
- def _get_feature_map_spatial_dims(self, feature_maps):
- """Return list of spatial dimensions for each feature map in a list.
-
- Args:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i].
-
- Returns:
- a list of pairs (height, width) for each feature map in feature_maps
- """
- feature_map_shapes = [
- shape_utils.combined_static_and_dynamic_shape(
- feature_map) for feature_map in feature_maps
- ]
- return [(shape[1], shape[2]) for shape in feature_map_shapes]
-
- def postprocess(self, prediction_dict):
- """Converts prediction tensors to final detections.
-
- This function converts raw predictions tensors to final detection results by
- slicing off the background class, decoding box predictions and applying
- non max suppression and clipping to the image window.
-
- See base class for output format conventions. Note also that by default,
- scores are to be interpreted as logits, but if a score_conversion_fn is
- used, then scores are remapped (and may thus have a different
- interpretation).
-
- Args:
- prediction_dict: a dictionary holding prediction tensors with
- 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- 2) class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions.
-
- Returns:
- detections: a dictionary containing the following fields
- detection_boxes: [batch, max_detections, 4]
- detection_scores: [batch, max_detections]
- detection_classes: [batch, max_detections]
- detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
- encoded in the prediction_dict 'box_encodings')
- num_detections: [batch]
- Raises:
- ValueError: if prediction_dict does not contain `box_encodings` or
- `class_predictions_with_background` fields.
- """
- if ('box_encodings' not in prediction_dict or
- 'class_predictions_with_background' not in prediction_dict):
- raise ValueError('prediction_dict does not contain expected entries.')
- with tf.name_scope('Postprocessor'):
- box_encodings = prediction_dict['box_encodings']
- class_predictions = prediction_dict['class_predictions_with_background']
- detection_boxes, detection_keypoints = self._batch_decode(box_encodings)
- detection_boxes = tf.expand_dims(detection_boxes, axis=2)
-
- class_predictions_without_background = tf.slice(class_predictions,
- [0, 0, 1],
- [-1, -1, -1])
- detection_scores = self._score_conversion_fn(
- class_predictions_without_background)
- clip_window = tf.constant([0, 0, 1, 1], tf.float32)
- additional_fields = None
- if detection_keypoints is not None:
- additional_fields = {
- fields.BoxListFields.keypoints: detection_keypoints}
- (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
- num_detections) = self._non_max_suppression_fn(
- detection_boxes,
- detection_scores,
- clip_window=clip_window,
- additional_fields=additional_fields)
- detection_dict = {'detection_boxes': nmsed_boxes,
- 'detection_scores': nmsed_scores,
- 'detection_classes': nmsed_classes,
- 'num_detections': tf.to_float(num_detections)}
- if (nmsed_additional_fields is not None and
- fields.BoxListFields.keypoints in nmsed_additional_fields):
- detection_dict['detection_keypoints'] = nmsed_additional_fields[
- fields.BoxListFields.keypoints]
- return detection_dict
-
- def loss(self, prediction_dict, scope=None):
- """Compute scalar loss tensors with respect to provided groundtruth.
-
- Calling this function requires that groundtruth tensors have been
- provided via the provide_groundtruth function.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors with
- 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- 2) class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions.
- scope: Optional scope name.
-
- Returns:
- a dictionary mapping loss keys (`localization_loss` and
- `classification_loss`) to scalar tensors representing corresponding loss
- values.
- """
- with tf.name_scope(scope, 'Loss', prediction_dict.values()):
- keypoints = None
- if self.groundtruth_has_field(fields.BoxListFields.keypoints):
- keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
- (batch_cls_targets, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, match_list) = self._assign_targets(
- self.groundtruth_lists(fields.BoxListFields.boxes),
- self.groundtruth_lists(fields.BoxListFields.classes),
- keypoints)
- if self._add_summaries:
- self._summarize_input(
- self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
- num_matches = tf.stack(
- [match.num_matched_columns() for match in match_list])
- location_losses = self._localization_loss(
- prediction_dict['box_encodings'],
- batch_reg_targets,
- ignore_nan_targets=True,
- weights=batch_reg_weights)
- cls_losses = self._classification_loss(
- prediction_dict['class_predictions_with_background'],
- batch_cls_targets,
- weights=batch_cls_weights)
-
- if self._hard_example_miner:
- (localization_loss, classification_loss) = self._apply_hard_mining(
- location_losses, cls_losses, prediction_dict, match_list)
- if self._add_summaries:
- self._hard_example_miner.summarize()
- else:
- if self._add_summaries:
- class_ids = tf.argmax(batch_cls_targets, axis=2)
- flattened_class_ids = tf.reshape(class_ids, [-1])
- flattened_classification_losses = tf.reshape(cls_losses, [-1])
- self._summarize_anchor_classification_loss(
- flattened_class_ids, flattened_classification_losses)
- localization_loss = tf.reduce_sum(location_losses)
- classification_loss = tf.reduce_sum(cls_losses)
-
- # Optionally normalize by number of positive matches
- normalizer = tf.constant(1.0, dtype=tf.float32)
- if self._normalize_loss_by_num_matches:
- normalizer = tf.maximum(tf.to_float(tf.reduce_sum(num_matches)), 1.0)
-
- with tf.name_scope('localization_loss'):
- localization_loss = ((self._localization_loss_weight / normalizer) *
- localization_loss)
- with tf.name_scope('classification_loss'):
- classification_loss = ((self._classification_loss_weight / normalizer) *
- classification_loss)
-
- loss_dict = {
- 'localization_loss': localization_loss,
- 'classification_loss': classification_loss
- }
- return loss_dict
-
- def _summarize_anchor_classification_loss(self, class_ids, cls_losses):
- positive_indices = tf.where(tf.greater(class_ids, 0))
- positive_anchor_cls_loss = tf.squeeze(
- tf.gather(cls_losses, positive_indices), axis=1)
- visualization_utils.add_cdf_image_summary(positive_anchor_cls_loss,
- 'PositiveAnchorLossCDF')
- negative_indices = tf.where(tf.equal(class_ids, 0))
- negative_anchor_cls_loss = tf.squeeze(
- tf.gather(cls_losses, negative_indices), axis=1)
- visualization_utils.add_cdf_image_summary(negative_anchor_cls_loss,
- 'NegativeAnchorLossCDF')
-
- def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list,
- groundtruth_keypoints_list=None):
- """Assign groundtruth targets.
-
- Adds a background class to each one-hot encoding of groundtruth classes
- and uses target assigner to obtain regression and classification targets.
-
- Args:
- groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4]
- containing coordinates of the groundtruth boxes.
- Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
- format and assumed to be normalized and clipped
- relative to the image window with y_min <= y_max and x_min <= x_max.
- groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
- shape [num_boxes, num_classes] containing the class targets with the 0th
- index assumed to map to the first non-background class.
- groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape
- [num_boxes, num_keypoints, 2]
-
- Returns:
- batch_cls_targets: a tensor with shape [batch_size, num_anchors,
- num_classes],
- batch_cls_weights: a tensor with shape [batch_size, num_anchors],
- batch_reg_targets: a tensor with shape [batch_size, num_anchors,
- box_code_dimension]
- batch_reg_weights: a tensor with shape [batch_size, num_anchors],
- match_list: a list of matcher.Match objects encoding the match between
- anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors.
- """
- groundtruth_boxlists = [
- box_list.BoxList(boxes) for boxes in groundtruth_boxes_list
- ]
- groundtruth_classes_with_background_list = [
- tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
- for one_hot_encoding in groundtruth_classes_list
- ]
- if groundtruth_keypoints_list is not None:
- for boxlist, keypoints in zip(
- groundtruth_boxlists, groundtruth_keypoints_list):
- boxlist.add_field(fields.BoxListFields.keypoints, keypoints)
- return target_assigner.batch_assign_targets(
- self._target_assigner, self.anchors, groundtruth_boxlists,
- groundtruth_classes_with_background_list)
-
- def _summarize_input(self, groundtruth_boxes_list, match_list):
- """Creates tensorflow summaries for the input boxes and anchors.
-
- This function creates four summaries corresponding to the average
- number (over images in a batch) of (1) groundtruth boxes, (2) anchors
- marked as positive, (3) anchors marked as negative, and (4) anchors marked
- as ignored.
-
- Args:
- groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4]
- containing corners of the groundtruth boxes.
- match_list: a list of matcher.Match objects encoding the match between
- anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors.
- """
- num_boxes_per_image = tf.stack(
- [tf.shape(x)[0] for x in groundtruth_boxes_list])
- pos_anchors_per_image = tf.stack(
- [match.num_matched_columns() for match in match_list])
- neg_anchors_per_image = tf.stack(
- [match.num_unmatched_columns() for match in match_list])
- ignored_anchors_per_image = tf.stack(
- [match.num_ignored_columns() for match in match_list])
- tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage',
- tf.reduce_mean(tf.to_float(num_boxes_per_image)))
- tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage',
- tf.reduce_mean(tf.to_float(pos_anchors_per_image)))
- tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage',
- tf.reduce_mean(tf.to_float(neg_anchors_per_image)))
- tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage',
- tf.reduce_mean(tf.to_float(ignored_anchors_per_image)))
-
- def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict,
- match_list):
- """Applies hard mining to anchorwise losses.
-
- Args:
- location_losses: Float tensor of shape [batch_size, num_anchors]
- representing anchorwise location losses.
- cls_losses: Float tensor of shape [batch_size, num_anchors]
- representing anchorwise classification losses.
- prediction_dict: p a dictionary holding prediction tensors with
- 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- 2) class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions.
- match_list: a list of matcher.Match objects encoding the match between
- anchors and groundtruth boxes for each image of the batch,
- with rows of the Match objects corresponding to groundtruth boxes
- and columns corresponding to anchors.
-
- Returns:
- mined_location_loss: a float scalar with sum of localization losses from
- selected hard examples.
- mined_cls_loss: a float scalar with sum of classification losses from
- selected hard examples.
- """
- class_predictions = tf.slice(
- prediction_dict['class_predictions_with_background'], [0, 0,
- 1], [-1, -1, -1])
-
- decoded_boxes, _ = self._batch_decode(prediction_dict['box_encodings'])
- decoded_box_tensors_list = tf.unstack(decoded_boxes)
- class_prediction_list = tf.unstack(class_predictions)
- decoded_boxlist_list = []
- for box_location, box_score in zip(decoded_box_tensors_list,
- class_prediction_list):
- decoded_boxlist = box_list.BoxList(box_location)
- decoded_boxlist.add_field('scores', box_score)
- decoded_boxlist_list.append(decoded_boxlist)
- return self._hard_example_miner(
- location_losses=location_losses,
- cls_losses=cls_losses,
- decoded_boxlist_list=decoded_boxlist_list,
- match_list=match_list)
-
- def _batch_decode(self, box_encodings):
- """Decodes a batch of box encodings with respect to the anchors.
-
- Args:
- box_encodings: A float32 tensor of shape
- [batch_size, num_anchors, box_code_size] containing box encodings.
-
- Returns:
- decoded_boxes: A float32 tensor of shape
- [batch_size, num_anchors, 4] containing the decoded boxes.
- decoded_keypoints: A float32 tensor of shape
- [batch_size, num_anchors, num_keypoints, 2] containing the decoded
- keypoints if present in the input `box_encodings`, None otherwise.
- """
- combined_shape = shape_utils.combined_static_and_dynamic_shape(
- box_encodings)
- batch_size = combined_shape[0]
- tiled_anchor_boxes = tf.tile(
- tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
- tiled_anchors_boxlist = box_list.BoxList(
- tf.reshape(tiled_anchor_boxes, [-1, 4]))
- decoded_boxes = self._box_coder.decode(
- tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
- tiled_anchors_boxlist)
- decoded_keypoints = None
- if decoded_boxes.has_field(fields.BoxListFields.keypoints):
- decoded_keypoints = decoded_boxes.get_field(
- fields.BoxListFields.keypoints)
- num_keypoints = decoded_keypoints.get_shape()[1]
- decoded_keypoints = tf.reshape(
- decoded_keypoints,
- tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
- decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
- [combined_shape[0], combined_shape[1], 4]))
- return decoded_boxes, decoded_keypoints
-
- def restore_map(self, from_detection_checkpoint=True):
- """Returns a map of variables to load from a foreign checkpoint.
-
- See parent class for details.
-
- Args:
- from_detection_checkpoint: whether to restore from a full detection
- checkpoint (with compatible variable names) or to restore from a
- classification checkpoint for initialization prior to training.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- variables_to_restore = {}
- for variable in tf.global_variables():
- if variable.op.name.startswith(self._extract_features_scope):
- var_name = variable.op.name
- if not from_detection_checkpoint:
- var_name = (re.split('^' + self._extract_features_scope + '/',
- var_name)[-1])
- variables_to_restore[var_name] = variable
- return variables_to_restore
diff --git a/object_detection/meta_architectures/ssd_meta_arch_test.py b/object_detection/meta_architectures/ssd_meta_arch_test.py
deleted file mode 100644
index 9112ed09..00000000
--- a/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.meta_architectures.ssd_meta_arch."""
-import functools
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import anchor_generator
-from object_detection.core import box_list
-from object_detection.core import losses
-from object_detection.core import post_processing
-from object_detection.core import region_similarity_calculator as sim_calc
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.utils import test_utils
-
-slim = tf.contrib.slim
-
-
-class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
-
- def __init__(self):
- super(FakeSSDFeatureExtractor, self).__init__(
- is_training=True,
- depth_multiplier=0,
- min_depth=0,
- pad_to_multiple=1,
- batch_norm_trainable=True,
- conv_hyperparams=None)
-
- def preprocess(self, resized_inputs):
- return tf.identity(resized_inputs)
-
- def extract_features(self, preprocessed_inputs):
- with tf.variable_scope('mock_model'):
- features = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
- kernel_size=[1, 1], scope='layer1')
- return [features]
-
-
-class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
- """Sets up a simple 2x2 anchor grid on the unit square."""
-
- def name_scope(self):
- return 'MockAnchorGenerator'
-
- def num_anchors_per_location(self):
- return [1]
-
- def _generate(self, feature_map_shape_list, im_height, im_width):
- return box_list.BoxList(
- tf.constant([[0, 0, .5, .5],
- [0, .5, .5, 1],
- [.5, 0, 1, .5],
- [.5, .5, 1, 1]], tf.float32))
-
-
-class SsdMetaArchTest(tf.test.TestCase):
-
- def setUp(self):
- """Set up mock SSD model.
-
- Here we set up a simple mock SSD model that will always predict 4
- detections that happen to always be exactly the anchors that are set up
- in the above MockAnchorGenerator. Because we let max_detections=5,
- we will also always end up with an extra padded row in the detection
- results.
- """
- is_training = False
- self._num_classes = 1
- mock_anchor_generator = MockAnchorGenerator2x2()
- mock_box_predictor = test_utils.MockBoxPredictor(
- is_training, self._num_classes)
- mock_box_coder = test_utils.MockBoxCoder()
- fake_feature_extractor = FakeSSDFeatureExtractor()
- mock_matcher = test_utils.MockMatcher()
- region_similarity_calculator = sim_calc.IouSimilarity()
-
- def image_resizer_fn(image):
- return tf.identity(image)
-
- classification_loss = losses.WeightedSigmoidClassificationLoss(
- anchorwise_output=True)
- localization_loss = losses.WeightedSmoothL1LocalizationLoss(
- anchorwise_output=True)
- non_max_suppression_fn = functools.partial(
- post_processing.batch_multiclass_non_max_suppression,
- score_thresh=-20.0,
- iou_thresh=1.0,
- max_size_per_class=5,
- max_total_size=5)
- classification_loss_weight = 1.0
- localization_loss_weight = 1.0
- normalize_loss_by_num_matches = False
-
- # This hard example miner is expected to be a no-op.
- hard_example_miner = losses.HardExampleMiner(
- num_hard_examples=None,
- iou_threshold=1.0)
-
- self._num_anchors = 4
- self._code_size = 4
- self._model = ssd_meta_arch.SSDMetaArch(
- is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder,
- fake_feature_extractor, mock_matcher, region_similarity_calculator,
- image_resizer_fn, non_max_suppression_fn, tf.identity,
- classification_loss, localization_loss, classification_loss_weight,
- localization_loss_weight, normalize_loss_by_num_matches,
- hard_example_miner)
-
- def test_preprocess_preserves_input_shapes(self):
- image_shapes = [(3, None, None, 3),
- (None, 10, 10, 3),
- (None, None, None, 3)]
- for image_shape in image_shapes:
- image_placeholder = tf.placeholder(tf.float32, shape=image_shape)
- preprocessed_inputs = self._model.preprocess(image_placeholder)
- self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape)
-
- def test_predict_results_have_correct_keys_and_shapes(self):
- batch_size = 3
- image_size = 2
- input_shapes = [(batch_size, image_size, image_size, 3),
- (None, image_size, image_size, 3),
- (batch_size, None, None, 3),
- (None, None, None, 3)]
- expected_box_encodings_shape_out = (
- batch_size, self._num_anchors, self._code_size)
- expected_class_predictions_with_background_shape_out = (
- batch_size, self._num_anchors, self._num_classes+1)
-
- for input_shape in input_shapes:
- tf_graph = tf.Graph()
- with tf_graph.as_default():
- preprocessed_input_placeholder = tf.placeholder(tf.float32,
- shape=input_shape)
- prediction_dict = self._model.predict(preprocessed_input_placeholder)
-
- self.assertTrue('box_encodings' in prediction_dict)
- self.assertTrue('class_predictions_with_background' in prediction_dict)
- self.assertTrue('feature_maps' in prediction_dict)
- self.assertTrue('anchors' in prediction_dict)
-
- init_op = tf.global_variables_initializer()
- with self.test_session(graph=tf_graph) as sess:
- sess.run(init_op)
- prediction_out = sess.run(prediction_dict,
- feed_dict={
- preprocessed_input_placeholder:
- np.random.uniform(
- size=(batch_size, 2, 2, 3))})
- self.assertAllEqual(prediction_out['box_encodings'].shape,
- expected_box_encodings_shape_out)
- self.assertAllEqual(
- prediction_out['class_predictions_with_background'].shape,
- expected_class_predictions_with_background_shape_out)
-
- def test_postprocess_results_are_correct(self):
- batch_size = 2
- image_size = 2
- input_shapes = [(batch_size, image_size, image_size, 3),
- (None, image_size, image_size, 3),
- (batch_size, None, None, 3),
- (None, None, None, 3)]
-
- expected_boxes = np.array([[[0, 0, .5, .5],
- [0, .5, .5, 1],
- [.5, 0, 1, .5],
- [.5, .5, 1, 1],
- [0, 0, 0, 0]],
- [[0, 0, .5, .5],
- [0, .5, .5, 1],
- [.5, 0, 1, .5],
- [.5, .5, 1, 1],
- [0, 0, 0, 0]]])
- expected_scores = np.array([[0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0]])
- expected_classes = np.array([[0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0]])
- expected_num_detections = np.array([4, 4])
-
- for input_shape in input_shapes:
- tf_graph = tf.Graph()
- with tf_graph.as_default():
- preprocessed_input_placeholder = tf.placeholder(tf.float32,
- shape=input_shape)
- prediction_dict = self._model.predict(preprocessed_input_placeholder)
- detections = self._model.postprocess(prediction_dict)
- self.assertTrue('detection_boxes' in detections)
- self.assertTrue('detection_scores' in detections)
- self.assertTrue('detection_classes' in detections)
- self.assertTrue('num_detections' in detections)
- init_op = tf.global_variables_initializer()
- with self.test_session(graph=tf_graph) as sess:
- sess.run(init_op)
- detections_out = sess.run(detections,
- feed_dict={
- preprocessed_input_placeholder:
- np.random.uniform(
- size=(batch_size, 2, 2, 3))})
- self.assertAllClose(detections_out['detection_boxes'], expected_boxes)
- self.assertAllClose(detections_out['detection_scores'], expected_scores)
- self.assertAllClose(detections_out['detection_classes'], expected_classes)
- self.assertAllClose(detections_out['num_detections'],
- expected_num_detections)
-
- def test_loss_results_are_correct(self):
- batch_size = 2
- preprocessed_input = tf.random_uniform((batch_size, 2, 2, 3),
- dtype=tf.float32)
- groundtruth_boxes_list = [tf.constant([[0, 0, .5, .5]], dtype=tf.float32),
- tf.constant([[0, 0, .5, .5]], dtype=tf.float32)]
- groundtruth_classes_list = [tf.constant([[1]], dtype=tf.float32),
- tf.constant([[1]], dtype=tf.float32)]
- self._model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list)
- prediction_dict = self._model.predict(preprocessed_input)
- loss_dict = self._model.loss(prediction_dict)
-
- self.assertTrue('localization_loss' in loss_dict)
- self.assertTrue('classification_loss' in loss_dict)
-
- expected_localization_loss = 0.0
- expected_classification_loss = (batch_size * self._num_anchors
- * (self._num_classes+1) * np.log(2.0))
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- losses_out = sess.run(loss_dict)
-
- self.assertAllClose(losses_out['localization_loss'],
- expected_localization_loss)
- self.assertAllClose(losses_out['classification_loss'],
- expected_classification_loss)
-
- def test_restore_map_for_detection_ckpt(self):
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver()
- save_path = self.get_temp_dir()
- with self.test_session() as sess:
- sess.run(init_op)
- saved_model_path = saver.save(sess, save_path)
- var_map = self._model.restore_map(from_detection_checkpoint=True)
- self.assertIsInstance(var_map, dict)
- saver = tf.train.Saver(var_map)
- saver.restore(sess, saved_model_path)
- for var in sess.run(tf.report_uninitialized_variables()):
- self.assertNotIn('FeatureExtractor', var.name)
-
- def test_restore_map_for_classification_ckpt(self):
- # Define mock tensorflow classification graph and save variables.
- test_graph_classification = tf.Graph()
- with test_graph_classification.as_default():
- image = tf.placeholder(dtype=tf.float32, shape=[1, 20, 20, 3])
- with tf.variable_scope('mock_model'):
- net = slim.conv2d(image, num_outputs=32, kernel_size=1, scope='layer1')
- slim.conv2d(net, num_outputs=3, kernel_size=1, scope='layer2')
-
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver()
- save_path = self.get_temp_dir()
- with self.test_session() as sess:
- sess.run(init_op)
- saved_model_path = saver.save(sess, save_path)
-
- # Create tensorflow detection graph and load variables from
- # classification checkpoint.
- test_graph_detection = tf.Graph()
- with test_graph_detection.as_default():
- inputs_shape = [2, 2, 2, 3]
- inputs = tf.to_float(tf.random_uniform(
- inputs_shape, minval=0, maxval=255, dtype=tf.int32))
- preprocessed_inputs = self._model.preprocess(inputs)
- prediction_dict = self._model.predict(preprocessed_inputs)
- self._model.postprocess(prediction_dict)
- var_map = self._model.restore_map(from_detection_checkpoint=False)
- self.assertIsInstance(var_map, dict)
- saver = tf.train.Saver(var_map)
- with self.test_session() as sess:
- saver.restore(sess, saved_model_path)
- for var in sess.run(tf.report_uninitialized_variables()):
- self.assertNotIn('FeatureExtractor', var.name)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/metrics/BUILD b/object_detection/metrics/BUILD
deleted file mode 100644
index 878f16a6..00000000
--- a/object_detection/metrics/BUILD
+++ /dev/null
@@ -1,55 +0,0 @@
-# Tensorflow Object Detection API: main runnables.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_binary(
- name = "offline_eval_map_corloc",
- srcs = [
- "offline_eval_map_corloc.py",
- ],
- deps = [
- ":tf_example_parser",
- "//tensorflow_models/object_detection:evaluator",
- "//tensorflow_models/object_detection/builders:input_reader_builder",
- "//tensorflow_models/object_detection/core:standard_fields",
- "//tensorflow_models/object_detection/utils:config_util",
- "//tensorflow_models/object_detection/utils:label_map_util",
- ],
-)
-
-py_test(
- name = "offline_eval_map_corloc_test",
- srcs = [
- "offline_eval_map_corloc_test.py",
- ],
- deps = [
- ":offline_eval_map_corloc",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "tf_example_parser",
- srcs = ["tf_example_parser.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:data_parser",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_test(
- name = "tf_example_parser_test",
- srcs = ["tf_example_parser_test.py"],
- deps = [
- ":tf_example_parser",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
diff --git a/object_detection/metrics/offline_eval_map_corloc.py b/object_detection/metrics/offline_eval_map_corloc.py
deleted file mode 100644
index 421b4d1f..00000000
--- a/object_detection/metrics/offline_eval_map_corloc.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Evaluation executable for detection data.
-
-This executable evaluates precomputed detections produced by a detection
-model and writes the evaluation results into csv file metrics.csv, stored
-in the directory, specified by --eval_dir.
-
-The evaluation metrics set is supplied in object_detection.protos.EvalConfig
-in metrics_set field.
-Currently two set of metrics are supported:
-- pascal_voc_metrics: standard PASCAL VOC 2007 metric
-- open_images_metrics: Open Image V2 metric
-All other field of object_detection.protos.EvalConfig are ignored.
-
-Example usage:
- ./compute_metrics \
- --eval_dir=path/to/eval_dir \
- --eval_config_path=path/to/evaluation/configuration/file \
- --input_config_path=path/to/input/configuration/file
-"""
-import csv
-import os
-import re
-import tensorflow as tf
-
-from object_detection import evaluator
-from object_detection.core import standard_fields
-from object_detection.metrics import tf_example_parser
-from object_detection.utils import config_util
-from object_detection.utils import label_map_util
-
-flags = tf.app.flags
-tf.logging.set_verbosity(tf.logging.INFO)
-
-flags.DEFINE_string('eval_dir', None, 'Directory to write eval summaries to.')
-flags.DEFINE_string('eval_config_path', None,
- 'Path to an eval_pb2.EvalConfig config file.')
-flags.DEFINE_string('input_config_path', None,
- 'Path to an eval_pb2.InputConfig config file.')
-
-FLAGS = flags.FLAGS
-
-
-def _generate_sharded_filenames(filename):
- m = re.search(r'@(\d{1,})', filename)
- if m:
- num_shards = int(m.group(1))
- return [
- re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards), filename)
- for i in range(num_shards)
- ]
- else:
- return [filename]
-
-
-def _generate_filenames(filenames):
- result = []
- for filename in filenames:
- result += _generate_sharded_filenames(filename)
- return result
-
-
-def read_data_and_evaluate(input_config, eval_config):
- """Reads pre-computed object detections and groundtruth from tf_record.
-
- Args:
- input_config: input config proto of type
- object_detection.protos.InputReader.
- eval_config: evaluation config proto of type
- object_detection.protos.EvalConfig.
-
- Returns:
- Evaluated detections metrics.
-
- Raises:
- ValueError: if input_reader type is not supported or metric type is unknown.
- """
- if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
- input_paths = input_config.tf_record_input_reader.input_path
-
- label_map = label_map_util.load_labelmap(input_config.label_map_path)
- max_num_classes = max([item.id for item in label_map.item])
- categories = label_map_util.convert_label_map_to_categories(
- label_map, max_num_classes)
-
- object_detection_evaluators = evaluator.get_evaluators(
- eval_config, categories)
- # Support a single evaluator
- object_detection_evaluator = object_detection_evaluators[0]
-
- skipped_images = 0
- processed_images = 0
- for input_path in _generate_filenames(input_paths):
- tf.logging.info('Processing file: {0}'.format(input_path))
-
- record_iterator = tf.python_io.tf_record_iterator(path=input_path)
- data_parser = tf_example_parser.TfExampleDetectionAndGTParser()
-
- for string_record in record_iterator:
- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
- processed_images)
- processed_images += 1
-
- example = tf.train.Example()
- example.ParseFromString(string_record)
- decoded_dict = data_parser.parse(example)
-
- if decoded_dict:
- object_detection_evaluator.add_single_ground_truth_image_info(
- decoded_dict[standard_fields.DetectionResultFields.key],
- decoded_dict)
- object_detection_evaluator.add_single_detected_image_info(
- decoded_dict[standard_fields.DetectionResultFields.key],
- decoded_dict)
- else:
- skipped_images += 1
- tf.logging.info('Skipped images: {0}'.format(skipped_images))
-
- return object_detection_evaluator.evaluate()
-
- raise ValueError('Unsupported input_reader_config.')
-
-
-def write_metrics(metrics, output_dir):
- """Write metrics to the output directory.
-
- Args:
- metrics: A dictionary containing metric names and values.
- output_dir: Directory to write metrics to.
- """
- tf.logging.info('Writing metrics.')
-
- with open(os.path.join(output_dir, 'metrics.csv'), 'w') as csvfile:
- metrics_writer = csv.writer(csvfile, delimiter=',')
- for metric_name, metric_value in metrics.items():
- metrics_writer.writerow([metric_name, str(metric_value)])
-
-
-def main(argv):
- del argv
- required_flags = ['input_config_path', 'eval_config_path', 'eval_dir']
- for flag_name in required_flags:
- if not getattr(FLAGS, flag_name):
- raise ValueError('Flag --{} is required'.format(flag_name))
-
- configs = config_util.get_configs_from_multiple_files(
- eval_input_config_path=FLAGS.input_config_path,
- eval_config_path=FLAGS.eval_config_path)
-
- eval_config = configs['eval_config']
- input_config = configs['eval_input_config']
-
- metrics = read_data_and_evaluate(input_config, eval_config)
-
- # Save metrics
- write_metrics(metrics, FLAGS.eval_dir)
-
-
-if __name__ == '__main__':
- tf.app.run(main)
diff --git a/object_detection/metrics/offline_eval_map_corloc_test.py b/object_detection/metrics/offline_eval_map_corloc_test.py
deleted file mode 100644
index 68ac3893..00000000
--- a/object_detection/metrics/offline_eval_map_corloc_test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for utilities in offline_eval_map_corloc binary."""
-
-import tensorflow as tf
-
-from object_detection.metrics import offline_eval_map_corloc as offline_eval
-
-
-class OfflineEvalMapCorlocTest(tf.test.TestCase):
-
- def test_generateShardedFilenames(self):
- test_filename = '/path/to/file'
- result = offline_eval._generate_sharded_filenames(test_filename)
- self.assertEqual(result, [test_filename])
-
- test_filename = '/path/to/file-00000-of-00050'
- result = offline_eval._generate_sharded_filenames(test_filename)
- self.assertEqual(result, [test_filename])
-
- result = offline_eval._generate_sharded_filenames('/path/to/@3.record')
- self.assertEqual(result, [
- '/path/to/-00000-of-00003.record', '/path/to/-00001-of-00003.record',
- '/path/to/-00002-of-00003.record'
- ])
-
- result = offline_eval._generate_sharded_filenames('/path/to/abc@3')
- self.assertEqual(result, [
- '/path/to/abc-00000-of-00003', '/path/to/abc-00001-of-00003',
- '/path/to/abc-00002-of-00003'
- ])
-
- result = offline_eval._generate_sharded_filenames('/path/to/@1')
- self.assertEqual(result, ['/path/to/-00000-of-00001'])
-
- def test_generateFilenames(self):
- test_filenames = ['/path/to/file', '/path/to/@3.record']
- result = offline_eval._generate_filenames(test_filenames)
- self.assertEqual(result, [
- '/path/to/file', '/path/to/-00000-of-00003.record',
- '/path/to/-00001-of-00003.record', '/path/to/-00002-of-00003.record'
- ])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/metrics/tf_example_parser.py b/object_detection/metrics/tf_example_parser.py
deleted file mode 100644
index 5b8ab7af..00000000
--- a/object_detection/metrics/tf_example_parser.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tensorflow Example proto parser for data loading.
-
-A parser to decode data containing serialized tensorflow.Example
-protos into materialized tensors (numpy arrays).
-"""
-
-import numpy as np
-
-from object_detection.core import data_parser
-from object_detection.core import standard_fields as fields
-
-
-class FloatParser(data_parser.DataToNumpyParser):
- """Tensorflow Example float parser."""
-
- def __init__(self, field_name):
- self.field_name = field_name
-
- def parse(self, tf_example):
- return np.array(
- tf_example.features.feature[self.field_name].float_list.value,
- dtype=np.float).transpose() if tf_example.features.feature[
- self.field_name].HasField("float_list") else None
-
-
-class StringParser(data_parser.DataToNumpyParser):
- """Tensorflow Example string parser."""
-
- def __init__(self, field_name):
- self.field_name = field_name
-
- def parse(self, tf_example):
- return "".join(tf_example.features.feature[self.field_name]
- .bytes_list.value) if tf_example.features.feature[
- self.field_name].HasField("bytes_list") else None
-
-
-class Int64Parser(data_parser.DataToNumpyParser):
- """Tensorflow Example int64 parser."""
-
- def __init__(self, field_name):
- self.field_name = field_name
-
- def parse(self, tf_example):
- return np.array(
- tf_example.features.feature[self.field_name].int64_list.value,
- dtype=np.int64).transpose() if tf_example.features.feature[
- self.field_name].HasField("int64_list") else None
-
-
-class BoundingBoxParser(data_parser.DataToNumpyParser):
- """Tensorflow Example bounding box parser."""
-
- def __init__(self, xmin_field_name, ymin_field_name, xmax_field_name,
- ymax_field_name):
- self.field_names = [
- ymin_field_name, xmin_field_name, ymax_field_name, xmax_field_name
- ]
-
- def parse(self, tf_example):
- result = []
- parsed = True
- for field_name in self.field_names:
- result.append(tf_example.features.feature[field_name].float_list.value)
- parsed &= (
- tf_example.features.feature[field_name].HasField("float_list"))
-
- return np.array(result).transpose() if parsed else None
-
-
-class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser):
- """Tensorflow Example proto parser."""
-
- def __init__(self):
- self.items_to_handlers = {
- fields.DetectionResultFields.key:
- StringParser(fields.TfExampleFields.source_id),
- # Object ground truth boxes and classes.
- fields.InputDataFields.groundtruth_boxes: (BoundingBoxParser(
- fields.TfExampleFields.object_bbox_xmin,
- fields.TfExampleFields.object_bbox_ymin,
- fields.TfExampleFields.object_bbox_xmax,
- fields.TfExampleFields.object_bbox_ymax)),
- fields.InputDataFields.groundtruth_classes: (
- Int64Parser(fields.TfExampleFields.object_class_label)),
- # Object detections.
- fields.DetectionResultFields.detection_boxes: (BoundingBoxParser(
- fields.TfExampleFields.detection_bbox_xmin,
- fields.TfExampleFields.detection_bbox_ymin,
- fields.TfExampleFields.detection_bbox_xmax,
- fields.TfExampleFields.detection_bbox_ymax)),
- fields.DetectionResultFields.detection_classes: (
- Int64Parser(fields.TfExampleFields.detection_class_label)),
- fields.DetectionResultFields.detection_scores: (
- FloatParser(fields.TfExampleFields.detection_score)),
- }
-
- self.optional_items_to_handlers = {
- fields.InputDataFields.groundtruth_difficult:
- Int64Parser(fields.TfExampleFields.object_difficult),
- fields.InputDataFields.groundtruth_group_of:
- Int64Parser(fields.TfExampleFields.object_group_of)
- }
-
- def parse(self, tf_example):
- """Parses tensorflow example and returns a tensor dictionary.
-
- Args:
- tf_example: a tf.Example object.
-
- Returns:
- A dictionary of the following numpy arrays:
- fields.DetectionResultFields.source_id - string containing original image
- id.
- fields.InputDataFields.groundtruth_boxes - a numpy array containing
- groundtruth boxes.
- fields.InputDataFields.groundtruth_classes - a numpy array containing
- groundtruth classes.
- fields.InputDataFields.groundtruth_group_of - a numpy array containing
- groundtruth group of flag (optional, None if not specified).
- fields.InputDataFields.groundtruth_difficult - a numpy array containing
- groundtruth difficult flag (optional, None if not specified).
- fields.DetectionResultFields.detection_boxes - a numpy array containing
- detection boxes.
- fields.DetectionResultFields.detection_classes - a numpy array containing
- detection class labels.
- fields.DetectionResultFields.detection_scores - a numpy array containing
- detection scores.
- Returns None if tf.Example was not parsed or non-optional fields were not
- found.
- """
- results_dict = {}
- parsed = True
- for key, parser in self.items_to_handlers.items():
- results_dict[key] = parser.parse(tf_example)
- parsed &= (results_dict[key] is not None)
-
- for key, parser in self.optional_items_to_handlers.items():
- results_dict[key] = parser.parse(tf_example)
-
- return results_dict if parsed else None
diff --git a/object_detection/metrics/tf_example_parser_test.py b/object_detection/metrics/tf_example_parser_test.py
deleted file mode 100644
index 6d9ce748..00000000
--- a/object_detection/metrics/tf_example_parser_test.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for object_detection.data_decoders.tf_example_parser."""
-
-import numpy as np
-import numpy.testing as np_testing
-import tensorflow as tf
-
-from object_detection.core import standard_fields as fields
-from object_detection.metrics import tf_example_parser
-
-
-class TfExampleDecoderTest(tf.test.TestCase):
-
- def _Int64Feature(self, value):
- return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
- def _FloatFeature(self, value):
- return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
- def _BytesFeature(self, value):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
- def testParseDetectionsAndGT(self):
- source_id = 'abc.jpg'
- # y_min, x_min, y_max, x_max
- object_bb = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6], [1.0, 0.6, 0.8],
- [1.0, 0.6, 0.7]]).transpose()
- detection_bb = np.array([[0.1, 0.2], [0.0, 0.8], [1.0, 0.6],
- [1.0, 0.85]]).transpose()
-
- object_class_label = [1, 1, 2]
- object_difficult = [1, 0, 0]
- object_group_of = [0, 0, 1]
- detection_class_label = [2, 1]
- detection_score = [0.5, 0.3]
- features = {
- fields.TfExampleFields.source_id:
- self._BytesFeature(source_id),
- fields.TfExampleFields.object_bbox_ymin:
- self._FloatFeature(object_bb[:, 0].tolist()),
- fields.TfExampleFields.object_bbox_xmin:
- self._FloatFeature(object_bb[:, 1].tolist()),
- fields.TfExampleFields.object_bbox_ymax:
- self._FloatFeature(object_bb[:, 2].tolist()),
- fields.TfExampleFields.object_bbox_xmax:
- self._FloatFeature(object_bb[:, 3].tolist()),
- fields.TfExampleFields.detection_bbox_ymin:
- self._FloatFeature(detection_bb[:, 0].tolist()),
- fields.TfExampleFields.detection_bbox_xmin:
- self._FloatFeature(detection_bb[:, 1].tolist()),
- fields.TfExampleFields.detection_bbox_ymax:
- self._FloatFeature(detection_bb[:, 2].tolist()),
- fields.TfExampleFields.detection_bbox_xmax:
- self._FloatFeature(detection_bb[:, 3].tolist()),
- fields.TfExampleFields.detection_class_label:
- self._Int64Feature(detection_class_label),
- fields.TfExampleFields.detection_score:
- self._FloatFeature(detection_score),
- }
-
- example = tf.train.Example(features=tf.train.Features(feature=features))
- parser = tf_example_parser.TfExampleDetectionAndGTParser()
-
- results_dict = parser.parse(example)
- self.assertIsNone(results_dict)
-
- features[fields.TfExampleFields.object_class_label] = (
- self._Int64Feature(object_class_label))
- features[fields.TfExampleFields.object_difficult] = (
- self._Int64Feature(object_difficult))
-
- example = tf.train.Example(features=tf.train.Features(feature=features))
- results_dict = parser.parse(example)
-
- self.assertIsNotNone(results_dict)
- self.assertEqual(source_id, results_dict[fields.DetectionResultFields.key])
- np_testing.assert_almost_equal(
- object_bb, results_dict[fields.InputDataFields.groundtruth_boxes])
- np_testing.assert_almost_equal(
- detection_bb,
- results_dict[fields.DetectionResultFields.detection_boxes])
- np_testing.assert_almost_equal(
- detection_score,
- results_dict[fields.DetectionResultFields.detection_scores])
- np_testing.assert_almost_equal(
- detection_class_label,
- results_dict[fields.DetectionResultFields.detection_classes])
- np_testing.assert_almost_equal(
- object_difficult,
- results_dict[fields.InputDataFields.groundtruth_difficult])
- np_testing.assert_almost_equal(
- object_class_label,
- results_dict[fields.InputDataFields.groundtruth_classes])
-
- parser = tf_example_parser.TfExampleDetectionAndGTParser()
-
- features[fields.TfExampleFields.object_group_of] = (
- self._Int64Feature(object_group_of))
-
- example = tf.train.Example(features=tf.train.Features(feature=features))
- results_dict = parser.parse(example)
- self.assertIsNotNone(results_dict)
- np_testing.assert_almost_equal(
- object_group_of,
- results_dict[fields.InputDataFields.groundtruth_group_of])
-
- def testParseString(self):
- string_val = 'abc'
- features = {'string': self._BytesFeature(string_val)}
- example = tf.train.Example(features=tf.train.Features(feature=features))
-
- parser = tf_example_parser.StringParser('string')
- result = parser.parse(example)
- self.assertIsNotNone(result)
- self.assertEqual(result, string_val)
-
- parser = tf_example_parser.StringParser('another_string')
- result = parser.parse(example)
- self.assertIsNone(result)
-
- def testParseFloat(self):
- float_array_val = [1.5, 1.4, 2.0]
- features = {'floats': self._FloatFeature(float_array_val)}
- example = tf.train.Example(features=tf.train.Features(feature=features))
-
- parser = tf_example_parser.FloatParser('floats')
- result = parser.parse(example)
- self.assertIsNotNone(result)
- np_testing.assert_almost_equal(result, float_array_val)
-
- parser = tf_example_parser.StringParser('another_floats')
- result = parser.parse(example)
- self.assertIsNone(result)
-
- def testInt64Parser(self):
- int_val = [1, 2, 3]
- features = {'ints': self._Int64Feature(int_val)}
- example = tf.train.Example(features=tf.train.Features(feature=features))
-
- parser = tf_example_parser.Int64Parser('ints')
- result = parser.parse(example)
- self.assertIsNotNone(result)
- np_testing.assert_almost_equal(result, int_val)
-
- parser = tf_example_parser.Int64Parser('another_ints')
- result = parser.parse(example)
- self.assertIsNone(result)
-
- def testBoundingBoxParser(self):
- bounding_boxes = np.array([[0.0, 0.5, 0.3], [0.0, 0.1, 0.6],
- [1.0, 0.6, 0.8], [1.0, 0.6, 0.7]]).transpose()
- features = {
- 'ymin': self._FloatFeature(bounding_boxes[:, 0]),
- 'xmin': self._FloatFeature(bounding_boxes[:, 1]),
- 'ymax': self._FloatFeature(bounding_boxes[:, 2]),
- 'xmax': self._FloatFeature(bounding_boxes[:, 3])
- }
-
- example = tf.train.Example(features=tf.train.Features(feature=features))
-
- parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax', 'ymax')
- result = parser.parse(example)
- self.assertIsNotNone(result)
- np_testing.assert_almost_equal(result, bounding_boxes)
-
- parser = tf_example_parser.BoundingBoxParser('xmin', 'ymin', 'xmax',
- 'another_ymax')
- result = parser.parse(example)
- self.assertIsNone(result)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/BUILD b/object_detection/models/BUILD
deleted file mode 100644
index 36efaba5..00000000
--- a/object_detection/models/BUILD
+++ /dev/null
@@ -1,229 +0,0 @@
-# Tensorflow Object Detection API: Models.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_library(
- name = "feature_map_generators",
- srcs = [
- "feature_map_generators.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "feature_map_generators_test",
- srcs = [
- "feature_map_generators_test.py",
- ],
- deps = [
- ":feature_map_generators",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "ssd_feature_extractor_test",
- srcs = [
- "ssd_feature_extractor_test.py",
- ],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "ssd_inception_v2_feature_extractor",
- srcs = [
- "ssd_inception_v2_feature_extractor.py",
- ],
- deps = [
- ":feature_map_generators",
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/slim:inception_v2",
- ],
-)
-
-py_library(
- name = "ssd_inception_v3_feature_extractor",
- srcs = [
- "ssd_inception_v3_feature_extractor.py",
- ],
- deps = [
- ":feature_map_generators",
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/slim:inception_v3",
- ],
-)
-
-py_library(
- name = "ssd_mobilenet_v1_feature_extractor",
- srcs = ["ssd_mobilenet_v1_feature_extractor.py"],
- deps = [
- ":feature_map_generators",
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/slim:mobilenet_v1",
- ],
-)
-
-py_library(
- name = "embedded_ssd_mobilenet_v1_feature_extractor",
- srcs = ["embedded_ssd_mobilenet_v1_feature_extractor.py"],
- deps = [
- ":feature_map_generators",
- ":ssd_mobilenet_v1_feature_extractor",
- "//tensorflow",
- "//tensorflow_models/object_detection/utils:ops",
- "//tensorflow_models/slim:mobilenet_v1",
- ],
-)
-
-py_test(
- name = "ssd_inception_v2_feature_extractor_test",
- srcs = [
- "ssd_inception_v2_feature_extractor_test.py",
- ],
- deps = [
- ":ssd_feature_extractor_test",
- ":ssd_inception_v2_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "ssd_inception_v3_feature_extractor_test",
- srcs = [
- "ssd_inception_v3_feature_extractor_test.py",
- ],
- deps = [
- ":ssd_feature_extractor_test",
- ":ssd_inception_v3_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "ssd_mobilenet_v1_feature_extractor_test",
- srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"],
- deps = [
- ":ssd_feature_extractor_test",
- ":ssd_mobilenet_v1_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "embedded_ssd_mobilenet_v1_feature_extractor_test",
- srcs = ["embedded_ssd_mobilenet_v1_feature_extractor_test.py"],
- deps = [
- ":embedded_ssd_mobilenet_v1_feature_extractor",
- ":ssd_feature_extractor_test",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "faster_rcnn_nas_feature_extractor_test",
- srcs = [
- "faster_rcnn_nas_feature_extractor_test.py",
- ],
- deps = [
- ":faster_rcnn_nas_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "faster_rcnn_nas_feature_extractor",
- srcs = [
- "faster_rcnn_nas_feature_extractor.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/slim:nasnet",
- ],
-)
-
-py_library(
- name = "faster_rcnn_inception_resnet_v2_feature_extractor",
- srcs = [
- "faster_rcnn_inception_resnet_v2_feature_extractor.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/slim:inception_resnet_v2",
- ],
-)
-
-py_test(
- name = "faster_rcnn_inception_resnet_v2_feature_extractor_test",
- srcs = [
- "faster_rcnn_inception_resnet_v2_feature_extractor_test.py",
- ],
- deps = [
- ":faster_rcnn_inception_resnet_v2_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "faster_rcnn_inception_v2_feature_extractor",
- srcs = [
- "faster_rcnn_inception_v2_feature_extractor.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/slim:inception_v2",
- ],
-)
-
-py_test(
- name = "faster_rcnn_inception_v2_feature_extractor_test",
- srcs = [
- "faster_rcnn_inception_v2_feature_extractor_test.py",
- ],
- deps = [
- ":faster_rcnn_inception_v2_feature_extractor",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "faster_rcnn_resnet_v1_feature_extractor",
- srcs = [
- "faster_rcnn_resnet_v1_feature_extractor.py",
- ],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
- "//tensorflow_models/slim:resnet_utils",
- "//tensorflow_models/slim:resnet_v1",
- ],
-)
-
-py_test(
- name = "faster_rcnn_resnet_v1_feature_extractor_test",
- srcs = [
- "faster_rcnn_resnet_v1_feature_extractor_test.py",
- ],
- deps = [
- ":faster_rcnn_resnet_v1_feature_extractor",
- "//tensorflow",
- ],
-)
diff --git a/object_detection/models/__init__.py b/object_detection/models/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/models/__pycache__/__init__.cpython-35.pyc b/object_detection/models/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 944348c8..00000000
Binary files a/object_detection/models/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc
deleted file mode 100644
index 3771c902..00000000
Binary files a/object_detection/models/__pycache__/embedded_ssd_mobilenet_v1_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc
deleted file mode 100644
index e8afff51..00000000
Binary files a/object_detection/models/__pycache__/faster_rcnn_inception_resnet_v2_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc
deleted file mode 100644
index 75128279..00000000
Binary files a/object_detection/models/__pycache__/faster_rcnn_inception_v2_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc
deleted file mode 100644
index bfd986a2..00000000
Binary files a/object_detection/models/__pycache__/faster_rcnn_nas_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc
deleted file mode 100644
index a324c94c..00000000
Binary files a/object_detection/models/__pycache__/faster_rcnn_resnet_v1_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc b/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc
deleted file mode 100644
index da26f4f0..00000000
Binary files a/object_detection/models/__pycache__/feature_map_generators.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc
deleted file mode 100644
index bba597f1..00000000
Binary files a/object_detection/models/__pycache__/ssd_inception_v2_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc
deleted file mode 100644
index 7e884ee1..00000000
Binary files a/object_detection/models/__pycache__/ssd_inception_v3_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc b/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc
deleted file mode 100644
index 3fb2c037..00000000
Binary files a/object_detection/models/__pycache__/ssd_mobilenet_v1_feature_extractor.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py b/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
deleted file mode 100644
index a29cb84f..00000000
--- a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Embedded-friendly SSDFeatureExtractor for MobilenetV1 features."""
-
-import tensorflow as tf
-
-from object_detection.models import feature_map_generators
-from object_detection.models import ssd_mobilenet_v1_feature_extractor
-from object_detection.utils import ops
-from nets import mobilenet_v1
-
-slim = tf.contrib.slim
-
-
-class EmbeddedSSDMobileNetV1FeatureExtractor(
- ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
- """Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
-
- This feature extractor is similar to SSD MobileNetV1 feature extractor, and
- it fixes input resolution to be 256x256, reduces the number of feature maps
- used for box prediction and ensures convolution kernel to be no larger
- than input tensor in spatial dimensions.
-
- This feature extractor requires support of the following ops if used in
- embedded devices:
- - Conv
- - DepthwiseConv
- - Relu6
-
- All conv/depthwiseconv use SAME padding, and no additional spatial padding is
- needed.
- """
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams,
- batch_norm_trainable=True,
- reuse_weights=None):
- """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to. For EmbeddedSSD it must be set to 1.
- conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a small batch size
- (e.g. 1), it is desirable to disable batch norm update and use
- pretrained batch norm params.
- reuse_weights: Whether to reuse variables. Default is None.
-
- Raises:
- ValueError: upon invalid `pad_to_multiple` values.
- """
- if pad_to_multiple != 1:
- raise ValueError('Embedded-specific SSD only supports `pad_to_multiple` '
- 'of 1.')
-
- super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable, reuse_weights)
-
- def extract_features(self, preprocessed_inputs):
- """Extract features from preprocessed inputs.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- """
- preprocessed_inputs.get_shape().assert_has_rank(4)
- shape_assert = tf.Assert(
- tf.logical_and(
- tf.equal(tf.shape(preprocessed_inputs)[1], 256),
- tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
- ['image size must be 256 in both height and width.'])
-
- feature_map_layout = {
- 'from_layer': [
- 'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
- ],
- 'layer_depth': [-1, -1, 512, 256, 256],
- 'conv_kernel_size': [-1, -1, 3, 3, 2],
- }
-
- with tf.control_dependencies([shape_assert]):
- with slim.arg_scope(self._conv_hyperparams):
- with tf.variable_scope('MobilenetV1',
- reuse=self._reuse_weights) as scope:
- _, image_features = mobilenet_v1.mobilenet_v1_base(
- ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=self._depth_multiplier,
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
-
- return feature_maps.values()
diff --git a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py b/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
deleted file mode 100644
index cef5de51..00000000
--- a/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor
-from object_detection.models import ssd_feature_extractor_test
-
-
-class EmbeddedSSDMobileNetV1FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- is_training=True, batch_norm_trainable=True):
- """Constructs a new feature extractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- is_training: whether the network is in training mode.
- batch_norm_trainable: whether to update batch norm parameters during
- training.
-
- Returns:
- an ssd_meta_arch.SSDFeatureExtractor object.
- """
- min_depth = 32
- conv_hyperparams = {}
- return (embedded_ssd_mobilenet_v1_feature_extractor.
- EmbeddedSSDMobileNetV1FeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable))
-
- def test_extract_features_returns_correct_shapes_256(self):
- image_height = 256
- image_width = 256
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
- (4, 4, 4, 512), (4, 2, 2, 256),
- (4, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
- image_height = 256
- image_width = 256
- depth_multiplier = 0.5**12
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32),
- (4, 2, 2, 32), (4, 1, 1, 32)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1(
- self):
- image_height = 256
- image_width = 256
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
- (4, 4, 4, 512), (4, 2, 2, 256),
- (4, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_raises_error_with_pad_to_multiple_not_1(self):
- depth_multiplier = 1.0
- pad_to_multiple = 2
- with self.assertRaises(ValueError):
- _ = self._create_feature_extractor(depth_multiplier, pad_to_multiple)
-
- def test_extract_features_raises_error_with_invalid_image_size(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- self.check_extract_features_raises_error_with_invalid_image_size(
- image_height, image_width, depth_multiplier, pad_to_multiple)
-
- def test_preprocess_returns_correct_value_range(self):
- image_height = 256
- image_width = 256
- depth_multiplier = 1
- pad_to_multiple = 1
- test_image = np.random.rand(4, image_height, image_width, 3)
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- depth_multiplier = 1
- pad_to_multiple = 1
- scope_name = 'MobilenetV1'
- self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py b/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
deleted file mode 100644
index 29430d86..00000000
--- a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Inception Resnet v2 Faster R-CNN implementation.
-
-See "Inception-v4, Inception-ResNet and the Impact of Residual Connections on
-Learning" by Szegedy et al. (https://arxiv.org/abs/1602.07261)
-as well as
-"Speed/accuracy trade-offs for modern convolutional object detectors" by
-Huang et al. (https://arxiv.org/abs/1611.10012)
-"""
-
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from nets import inception_resnet_v2
-
-slim = tf.contrib.slim
-
-
-class FasterRCNNInceptionResnetV2FeatureExtractor(
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
- """Faster R-CNN with Inception Resnet v2 feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16.
- """
- if first_stage_features_stride != 8 and first_stage_features_stride != 16:
- raise ValueError('`first_stage_features_stride` must be 8 or 16.')
- super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__(
- is_training, first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
- def preprocess(self, resized_inputs):
- """Faster R-CNN with Inception Resnet v2 preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: A [batch, height_out, width_out, channels] float32
- tensor representing a batch of images.
-
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features.
-
- Extracts features using the first half of the Inception Resnet v2 network.
- We construct the network in `align_feature_maps=True` mode, which means
- that all VALID paddings in the network are changed to SAME padding so that
- the feature maps are aligned.
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float32 tensor
- representing a batch of images.
- scope: A scope name.
-
- Returns:
- rpn_feature_map: A tensor with shape [batch, height, width, depth]
- Raises:
- InvalidArgumentError: If the spatial size of `preprocessed_inputs`
- (height or width) is less than 33.
- ValueError: If the created network is missing the required activation.
- """
- if len(preprocessed_inputs.get_shape().as_list()) != 4:
- raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
- 'tensor of shape %s' % preprocessed_inputs.get_shape())
-
- with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
- weight_decay=self._weight_decay)):
- # Forces is_training to False to disable batch norm update.
- with slim.arg_scope([slim.batch_norm],
- is_training=self._train_batch_norm):
- with tf.variable_scope('InceptionResnetV2',
- reuse=self._reuse_weights) as scope:
- rpn_feature_map, _ = (
- inception_resnet_v2.inception_resnet_v2_base(
- preprocessed_inputs, final_endpoint='PreAuxLogits',
- scope=scope, output_stride=self._first_stage_features_stride,
- align_feature_maps=True))
- return rpn_feature_map
-
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features.
-
- This function reconstructs the "second half" of the Inception ResNet v2
- network after the part defined in `_extract_proposal_features`.
-
- Args:
- proposal_feature_maps: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
- representing the feature map cropped to each proposal.
- scope: A scope name.
-
- Returns:
- proposal_classifier_features: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, height, width, depth]
- representing box classifier features for each proposal.
- """
- with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights):
- with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
- weight_decay=self._weight_decay)):
- # Forces is_training to False to disable batch norm update.
- with slim.arg_scope([slim.batch_norm],
- is_training=self._train_batch_norm):
- with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
- stride=1, padding='SAME'):
- with tf.variable_scope('Mixed_7a'):
- with tf.variable_scope('Branch_0'):
- tower_conv = slim.conv2d(proposal_feature_maps,
- 256, 1, scope='Conv2d_0a_1x1')
- tower_conv_1 = slim.conv2d(
- tower_conv, 384, 3, stride=2,
- padding='VALID', scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_1'):
- tower_conv1 = slim.conv2d(
- proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
- tower_conv1_1 = slim.conv2d(
- tower_conv1, 288, 3, stride=2,
- padding='VALID', scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_2'):
- tower_conv2 = slim.conv2d(
- proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
- tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
- scope='Conv2d_0b_3x3')
- tower_conv2_2 = slim.conv2d(
- tower_conv2_1, 320, 3, stride=2,
- padding='VALID', scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_3'):
- tower_pool = slim.max_pool2d(
- proposal_feature_maps, 3, stride=2, padding='VALID',
- scope='MaxPool_1a_3x3')
- net = tf.concat(
- [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
- net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20)
- net = inception_resnet_v2.block8(net, activation_fn=None)
- proposal_classifier_features = slim.conv2d(
- net, 1536, 1, scope='Conv2d_7b_1x1')
- return proposal_classifier_features
-
- def restore_from_classification_checkpoint_fn(
- self,
- first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Note that this overrides the default implementation in
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
- InceptionResnetV2 checkpoints.
-
- TODO: revisit whether it's possible to force the
- `Repeat` namescope as created in `_extract_box_classifier_features` to
- start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
- be used.
-
- Args:
- first_stage_feature_extractor_scope: A scope name for the first stage
- feature extractor.
- second_stage_feature_extractor_scope: A scope name for the second stage
- feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
-
- variables_to_restore = {}
- for variable in tf.global_variables():
- if variable.op.name.startswith(
- first_stage_feature_extractor_scope):
- var_name = variable.op.name.replace(
- first_stage_feature_extractor_scope + '/', '')
- variables_to_restore[var_name] = variable
- if variable.op.name.startswith(
- second_stage_feature_extractor_scope):
- var_name = variable.op.name.replace(
- second_stage_feature_extractor_scope
- + '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2')
- var_name = var_name.replace(
- second_stage_feature_extractor_scope + '/', '')
- variables_to_restore[var_name] = variable
- return variables_to_restore
-
diff --git a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py b/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
deleted file mode 100644
index 600efe65..00000000
--- a/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor."""
-
-import tensorflow as tf
-
-from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
-
-
-class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):
-
- def _build_feature_extractor(self, first_stage_features_stride):
- return frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor(
- is_training=False,
- first_stage_features_stride=first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0)
-
- def test_extract_proposal_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 299, 299, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 19, 19, 1088])
-
- def test_extract_proposal_features_stride_eight(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=8)
- preprocessed_inputs = tf.random_uniform(
- [1, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 28, 28, 1088])
-
- def test_extract_proposal_features_half_size_input(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 112, 112, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 7, 7, 1088])
-
- def test_extract_proposal_features_dies_on_invalid_stride(self):
- with self.assertRaises(ValueError):
- self._build_feature_extractor(first_stage_features_stride=99)
-
- def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [224, 224, 3], maxval=255, dtype=tf.float32)
- with self.assertRaises(ValueError):
- feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
-
- def test_extract_box_classifier_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- proposal_feature_maps = tf.random_uniform(
- [2, 17, 17, 1088], maxval=255, dtype=tf.float32)
- proposal_classifier_features = (
- feature_extractor.extract_box_classifier_features(
- proposal_feature_maps, scope='TestScope'))
- features_shape = tf.shape(proposal_classifier_features)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [2, 8, 8, 1536])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py b/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
deleted file mode 100644
index ff00a2b3..00000000
--- a/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
+++ /dev/null
@@ -1,251 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Inception V2 Faster R-CNN implementation.
-
-See "Rethinking the Inception Architecture for Computer Vision"
-https://arxiv.org/abs/1512.00567
-"""
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from nets import inception_v2
-
-slim = tf.contrib.slim
-
-
-def _batch_norm_arg_scope(list_ops,
- use_batch_norm=True,
- batch_norm_decay=0.9997,
- batch_norm_epsilon=0.001,
- batch_norm_scale=False,
- train_batch_norm=False):
- """Slim arg scope for InceptionV2 batch norm."""
- if use_batch_norm:
- batch_norm_params = {
- 'is_training': train_batch_norm,
- 'scale': batch_norm_scale,
- 'decay': batch_norm_decay,
- 'epsilon': batch_norm_epsilon
- }
- normalizer_fn = slim.batch_norm
- else:
- normalizer_fn = None
- batch_norm_params = None
-
- return slim.arg_scope(list_ops,
- normalizer_fn=normalizer_fn,
- normalizer_params=batch_norm_params)
-
-
-class FasterRCNNInceptionV2FeatureExtractor(
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
- """Faster R-CNN Inception V2 feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0,
- depth_multiplier=1.0,
- min_depth=16):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16.
- """
- if first_stage_features_stride != 8 and first_stage_features_stride != 16:
- raise ValueError('`first_stage_features_stride` must be 8 or 16.')
- self._depth_multiplier = depth_multiplier
- self._min_depth = min_depth
- super(FasterRCNNInceptionV2FeatureExtractor, self).__init__(
- is_training, first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
- def preprocess(self, resized_inputs):
- """Faster R-CNN Inception V2 preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features.
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float32 tensor
- representing a batch of images.
- scope: A scope name.
-
- Returns:
- rpn_feature_map: A tensor with shape [batch, height, width, depth]
- Raises:
- InvalidArgumentError: If the spatial size of `preprocessed_inputs`
- (height or width) is less than 33.
- ValueError: If the created network is missing the required activation.
- """
-
- preprocessed_inputs.get_shape().assert_has_rank(4)
- shape_assert = tf.Assert(
- tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
- tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
- ['image size must at least be 33 in both height and width.'])
-
- with tf.control_dependencies([shape_assert]):
- with tf.variable_scope('InceptionV2',
- reuse=self._reuse_weights) as scope:
- with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
- batch_norm_scale=True,
- train_batch_norm=self._train_batch_norm):
- _, activations = inception_v2.inception_v2_base(
- preprocessed_inputs,
- final_endpoint='Mixed_4e',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
-
- return activations['Mixed_4e']
-
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features.
-
- Args:
- proposal_feature_maps: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
- representing the feature map cropped to each proposal.
- scope: A scope name (unused).
-
- Returns:
- proposal_classifier_features: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, height, width, depth]
- representing box classifier features for each proposal.
- """
- net = proposal_feature_maps
-
- depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
- trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
-
- data_format = 'NHWC'
- concat_dim = 3 if data_format == 'NHWC' else 1
-
- with tf.variable_scope('InceptionV2', reuse=self._reuse_weights):
- with slim.arg_scope(
- [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
- stride=1,
- padding='SAME',
- data_format=data_format):
- with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
- batch_norm_scale=True,
- train_batch_norm=self._train_batch_norm):
-
- with tf.variable_scope('Mixed_5a'):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(
- net, depth(128), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
- scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(
- net, depth(192), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
- scope='Conv2d_1a_3x3')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
- scope='MaxPool_1a_3x3')
- net = tf.concat([branch_0, branch_1, branch_2], concat_dim)
-
- with tf.variable_scope('Mixed_5b'):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(352), [1, 1],
- scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(
- net, depth(192), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
- scope='Conv2d_0b_3x3')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(
- net, depth(160), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
- scope='Conv2d_0c_3x3')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
- branch_3 = slim.conv2d(
- branch_3, depth(128), [1, 1],
- weights_initializer=trunc_normal(0.1),
- scope='Conv2d_0b_1x1')
- net = tf.concat([branch_0, branch_1, branch_2, branch_3],
- concat_dim)
-
- with tf.variable_scope('Mixed_5c'):
- with tf.variable_scope('Branch_0'):
- branch_0 = slim.conv2d(net, depth(352), [1, 1],
- scope='Conv2d_0a_1x1')
- with tf.variable_scope('Branch_1'):
- branch_1 = slim.conv2d(
- net, depth(192), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
- scope='Conv2d_0b_3x3')
- with tf.variable_scope('Branch_2'):
- branch_2 = slim.conv2d(
- net, depth(192), [1, 1],
- weights_initializer=trunc_normal(0.09),
- scope='Conv2d_0a_1x1')
- branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
- scope='Conv2d_0b_3x3')
- branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
- scope='Conv2d_0c_3x3')
- with tf.variable_scope('Branch_3'):
- branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
- branch_3 = slim.conv2d(
- branch_3, depth(128), [1, 1],
- weights_initializer=trunc_normal(0.1),
- scope='Conv2d_0b_1x1')
- proposal_classifier_features = tf.concat(
- [branch_0, branch_1, branch_2, branch_3], concat_dim)
-
- return proposal_classifier_features
diff --git a/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py b/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
deleted file mode 100644
index 06b7478f..00000000
--- a/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for faster_rcnn_inception_v2_feature_extractor."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2
-
-
-class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase):
-
- def _build_feature_extractor(self, first_stage_features_stride):
- return faster_rcnn_inception_v2.FasterRCNNInceptionV2FeatureExtractor(
- is_training=False,
- first_stage_features_stride=first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0)
-
- def test_extract_proposal_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [4, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [4, 14, 14, 576])
-
- def test_extract_proposal_features_stride_eight(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=8)
- preprocessed_inputs = tf.random_uniform(
- [4, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [4, 14, 14, 576])
-
- def test_extract_proposal_features_half_size_input(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 112, 112, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 7, 7, 576])
-
- def test_extract_proposal_features_dies_on_invalid_stride(self):
- with self.assertRaises(ValueError):
- self._build_feature_extractor(first_stage_features_stride=99)
-
- def test_extract_proposal_features_dies_on_very_small_images(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- with self.assertRaises(tf.errors.InvalidArgumentError):
- sess.run(
- features_shape,
- feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)})
-
- def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [224, 224, 3], maxval=255, dtype=tf.float32)
- with self.assertRaises(ValueError):
- feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
-
- def test_extract_box_classifier_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- proposal_feature_maps = tf.random_uniform(
- [3, 14, 14, 576], maxval=255, dtype=tf.float32)
- proposal_classifier_features = (
- feature_extractor.extract_box_classifier_features(
- proposal_feature_maps, scope='TestScope'))
- features_shape = tf.shape(proposal_classifier_features)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [3, 7, 7, 1024])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/faster_rcnn_nas_feature_extractor.py b/object_detection/models/faster_rcnn_nas_feature_extractor.py
deleted file mode 100644
index 5abedebd..00000000
--- a/object_detection/models/faster_rcnn_nas_feature_extractor.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""NASNet Faster R-CNN implementation.
-
-Learning Transferable Architectures for Scalable Image Recognition
-Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le
-https://arxiv.org/abs/1707.07012
-"""
-
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from nets.nasnet import nasnet
-from nets.nasnet import nasnet_utils
-
-arg_scope = tf.contrib.framework.arg_scope
-slim = tf.contrib.slim
-
-
-def nasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
- """Defines the default arg scope for the NASNet-A Large for object detection.
-
- This provides a small edit to switch batch norm training on and off.
-
- Args:
- is_batch_norm_training: Boolean indicating whether to train with batch norm.
-
- Returns:
- An `arg_scope` to use for the NASNet Large Model.
- """
- imagenet_scope = nasnet.nasnet_large_arg_scope()
- with arg_scope(imagenet_scope):
- with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc:
- return sc
-
-
-# Note: This is largely a copy of _build_nasnet_base inside nasnet.py but
-# with special edits to remove instantiation of the stem and the special
-# ability to receive as input a pair of hidden states.
-def _build_nasnet_base(hidden_previous,
- hidden,
- normal_cell,
- reduction_cell,
- hparams,
- true_cell_num,
- start_cell_num):
- """Constructs a NASNet image model."""
-
- # Find where to place the reduction cells or stride normal cells
- reduction_indices = nasnet_utils.calc_reduction_layers(
- hparams.num_cells, hparams.num_reduction_layers)
-
- # Note: The None is prepended to match the behavior of _imagenet_stem()
- cell_outputs = [None, hidden_previous, hidden]
- net = hidden
-
- # NOTE: In the nasnet.py code, filter_scaling starts at 1.0. We instead
- # start at 2.0 because 1 reduction cell has been created which would
- # update the filter_scaling to 2.0.
- filter_scaling = 2.0
-
- # Run the cells
- for cell_num in range(start_cell_num, hparams.num_cells):
- stride = 1
- if hparams.skip_reduction_layer_input:
- prev_layer = cell_outputs[-2]
- if cell_num in reduction_indices:
- filter_scaling *= hparams.filter_scaling_rate
- net = reduction_cell(
- net,
- scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)),
- filter_scaling=filter_scaling,
- stride=2,
- prev_layer=cell_outputs[-2],
- cell_num=true_cell_num)
- true_cell_num += 1
- cell_outputs.append(net)
- if not hparams.skip_reduction_layer_input:
- prev_layer = cell_outputs[-2]
- net = normal_cell(
- net,
- scope='cell_{}'.format(cell_num),
- filter_scaling=filter_scaling,
- stride=stride,
- prev_layer=prev_layer,
- cell_num=true_cell_num)
- true_cell_num += 1
- cell_outputs.append(net)
-
- # Final nonlinearity.
- # Note that we have dropped the final pooling, dropout and softmax layers
- # from the default nasnet version.
- with tf.variable_scope('final_layer'):
- net = tf.nn.relu(net)
- return net
-
-
-# TODO: Only fixed_shape_resizer is currently supported for NASNet
-# featurization. The reason for this is that nasnet.py only supports
-# inputs with fully known shapes. We need to update nasnet.py to handle
-# shapes not known at compile time.
-class FasterRCNNNASFeatureExtractor(
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
- """Faster R-CNN with NASNet-A feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 16.
- """
- if first_stage_features_stride != 16:
- raise ValueError('`first_stage_features_stride` must be 16.')
- super(FasterRCNNNASFeatureExtractor, self).__init__(
- is_training, first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
- def preprocess(self, resized_inputs):
- """Faster R-CNN with NAS preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: A [batch, height_out, width_out, channels] float32
- tensor representing a batch of images.
-
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features.
-
- Extracts features using the first half of the NASNet network.
- We construct the network in `align_feature_maps=True` mode, which means
- that all VALID paddings in the network are changed to SAME padding so that
- the feature maps are aligned.
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float32 tensor
- representing a batch of images.
- scope: A scope name.
-
- Returns:
- rpn_feature_map: A tensor with shape [batch, height, width, depth]
- Raises:
- ValueError: If the created network is missing the required activation.
- """
- del scope
-
- if len(preprocessed_inputs.get_shape().as_list()) != 4:
- raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
- 'tensor of shape %s' % preprocessed_inputs.get_shape())
-
- with slim.arg_scope(nasnet_large_arg_scope_for_detection(
- is_batch_norm_training=self._train_batch_norm)):
- _, end_points = nasnet.build_nasnet_large(
- preprocessed_inputs, num_classes=None,
- is_training=self._is_training,
- final_endpoint='Cell_11')
-
- # Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016.
- rpn_feature_map = tf.concat([end_points['Cell_10'],
- end_points['Cell_11']], 3)
-
- # nasnet.py does not maintain the batch size in the first dimension.
- # This work around permits us retaining the batch for below.
- batch = preprocessed_inputs.get_shape().as_list()[0]
- shape_without_batch = rpn_feature_map.get_shape().as_list()[1:]
- rpn_feature_map_shape = [batch] + shape_without_batch
- rpn_feature_map.set_shape(rpn_feature_map_shape)
-
- return rpn_feature_map
-
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features.
-
- This function reconstructs the "second half" of the NASNet-A
- network after the part defined in `_extract_proposal_features`.
-
- Args:
- proposal_feature_maps: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
- representing the feature map cropped to each proposal.
- scope: A scope name.
-
- Returns:
- proposal_classifier_features: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, height, width, depth]
- representing box classifier features for each proposal.
- """
- del scope
-
- # Note that we always feed into 2 layers of equal depth
- # where the first N channels corresponds to previous hidden layer
- # and the second N channels correspond to the final hidden layer.
- hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3)
-
- # Note that what follows is largely a copy of build_nasnet_large() within
- # nasnet.py. We are copying to minimize code pollution in slim.
-
- # pylint: disable=protected-access
- hparams = nasnet._large_imagenet_config(is_training=self._is_training)
- # pylint: enable=protected-access
-
- # Calculate the total number of cells in the network
- # -- Add 2 for the reduction cells.
- total_num_cells = hparams.num_cells + 2
- # -- And add 2 for the stem cells for ImageNet training.
- total_num_cells += 2
-
- normal_cell = nasnet_utils.NasNetANormalCell(
- hparams.num_conv_filters, hparams.drop_path_keep_prob,
- total_num_cells, hparams.total_training_steps)
- reduction_cell = nasnet_utils.NasNetAReductionCell(
- hparams.num_conv_filters, hparams.drop_path_keep_prob,
- total_num_cells, hparams.total_training_steps)
- with arg_scope([slim.dropout, nasnet_utils.drop_path],
- is_training=self._is_training):
- with arg_scope([slim.batch_norm], is_training=self._train_batch_norm):
- with arg_scope([slim.avg_pool2d,
- slim.max_pool2d,
- slim.conv2d,
- slim.batch_norm,
- slim.separable_conv2d,
- nasnet_utils.factorized_reduction,
- nasnet_utils.global_avg_pool,
- nasnet_utils.get_channel_index,
- nasnet_utils.get_channel_dim],
- data_format=hparams.data_format):
-
- # This corresponds to the cell number just past 'Cell_11' used by
- # by _extract_proposal_features().
- start_cell_num = 12
- # Note that this number equals:
- # start_cell_num + 2 stem cells + 1 reduction cell
- true_cell_num = 15
-
- with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
- net = _build_nasnet_base(hidden_previous,
- hidden,
- normal_cell=normal_cell,
- reduction_cell=reduction_cell,
- hparams=hparams,
- true_cell_num=true_cell_num,
- start_cell_num=start_cell_num)
-
- proposal_classifier_features = net
- return proposal_classifier_features
-
- def restore_from_classification_checkpoint_fn(
- self,
- first_stage_feature_extractor_scope,
- second_stage_feature_extractor_scope):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Note that this overrides the default implementation in
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
- NASNet-A checkpoints.
-
- Args:
- first_stage_feature_extractor_scope: A scope name for the first stage
- feature extractor.
- second_stage_feature_extractor_scope: A scope name for the second stage
- feature extractor.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- """
- # Note that the NAS checkpoint only contains the moving average version of
- # the Variables so we need to generate an appropriate dictionary mapping.
- variables_to_restore = {}
- for variable in tf.global_variables():
- if variable.op.name.startswith(
- first_stage_feature_extractor_scope):
- var_name = variable.op.name.replace(
- first_stage_feature_extractor_scope + '/', '')
- var_name += '/ExponentialMovingAverage'
- variables_to_restore[var_name] = variable
- if variable.op.name.startswith(
- second_stage_feature_extractor_scope):
- var_name = variable.op.name.replace(
- second_stage_feature_extractor_scope + '/', '')
- var_name += '/ExponentialMovingAverage'
- variables_to_restore[var_name] = variable
- return variables_to_restore
-
diff --git a/object_detection/models/faster_rcnn_nas_feature_extractor_test.py b/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
deleted file mode 100644
index 84268d55..00000000
--- a/object_detection/models/faster_rcnn_nas_feature_extractor_test.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for models.faster_rcnn_nas_feature_extractor."""
-
-import tensorflow as tf
-
-from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
-
-
-class FasterRcnnNASFeatureExtractorTest(tf.test.TestCase):
-
- def _build_feature_extractor(self, first_stage_features_stride):
- return frcnn_nas.FasterRCNNNASFeatureExtractor(
- is_training=False,
- first_stage_features_stride=first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0)
-
- def test_extract_proposal_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 299, 299, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 19, 19, 4032])
-
- def test_extract_proposal_features_input_size_224(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 14, 14, 4032])
-
- def test_extract_proposal_features_input_size_112(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 112, 112, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 7, 7, 4032])
-
- def test_extract_proposal_features_dies_on_invalid_stride(self):
- with self.assertRaises(ValueError):
- self._build_feature_extractor(first_stage_features_stride=99)
-
- def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [224, 224, 3], maxval=255, dtype=tf.float32)
- with self.assertRaises(ValueError):
- feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
-
- def test_extract_box_classifier_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- proposal_feature_maps = tf.random_uniform(
- [2, 17, 17, 1088], maxval=255, dtype=tf.float32)
- proposal_classifier_features = (
- feature_extractor.extract_box_classifier_features(
- proposal_feature_maps, scope='TestScope'))
- features_shape = tf.shape(proposal_classifier_features)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [2, 9, 9, 4032])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py b/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
deleted file mode 100644
index 8575bf33..00000000
--- a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Resnet V1 Faster R-CNN implementation.
-
-See "Deep Residual Learning for Image Recognition" by He et al., 2015.
-https://arxiv.org/abs/1512.03385
-
-Note: this implementation assumes that the classification checkpoint used
-to finetune this model is trained using the same configuration as that of
-the MSRA provided checkpoints
-(see https://github.com/KaimingHe/deep-residual-networks), e.g., with
-same preprocessing, batch norm scaling, etc.
-"""
-import tensorflow as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-from nets import resnet_utils
-from nets import resnet_v1
-
-slim = tf.contrib.slim
-
-
-class FasterRCNNResnetV1FeatureExtractor(
- faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
- """Faster R-CNN Resnet V1 feature extractor implementation."""
-
- def __init__(self,
- architecture,
- resnet_model,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- architecture: Architecture name of the Resnet V1 model.
- resnet_model: Definition of the Resnet V1 model.
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16.
- """
- if first_stage_features_stride != 8 and first_stage_features_stride != 16:
- raise ValueError('`first_stage_features_stride` must be 8 or 16.')
- self._architecture = architecture
- self._resnet_model = resnet_model
- super(FasterRCNNResnetV1FeatureExtractor, self).__init__(
- is_training, first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
- def preprocess(self, resized_inputs):
- """Faster R-CNN Resnet V1 preprocessing.
-
- VGG style channel mean subtraction as described here:
- https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
-
- Args:
- resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: A [batch, height_out, width_out, channels] float32
- tensor representing a batch of images.
-
- """
- channel_means = [123.68, 116.779, 103.939]
- return resized_inputs - [[channel_means]]
-
- def _extract_proposal_features(self, preprocessed_inputs, scope):
- """Extracts first stage RPN features.
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float32 tensor
- representing a batch of images.
- scope: A scope name.
-
- Returns:
- rpn_feature_map: A tensor with shape [batch, height, width, depth]
- Raises:
- InvalidArgumentError: If the spatial size of `preprocessed_inputs`
- (height or width) is less than 33.
- ValueError: If the created network is missing the required activation.
- """
- if len(preprocessed_inputs.get_shape().as_list()) != 4:
- raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
- 'tensor of shape %s' % preprocessed_inputs.get_shape())
- shape_assert = tf.Assert(
- tf.logical_and(
- tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
- tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
- ['image size must at least be 33 in both height and width.'])
-
- with tf.control_dependencies([shape_assert]):
- # Disables batchnorm for fine-tuning with smaller batch sizes.
- # TODO: Figure out if it is needed when image batch size is bigger.
- with slim.arg_scope(
- resnet_utils.resnet_arg_scope(
- batch_norm_epsilon=1e-5,
- batch_norm_scale=True,
- weight_decay=self._weight_decay)):
- with tf.variable_scope(
- self._architecture, reuse=self._reuse_weights) as var_scope:
- _, activations = self._resnet_model(
- preprocessed_inputs,
- num_classes=None,
- is_training=self._train_batch_norm,
- global_pool=False,
- output_stride=self._first_stage_features_stride,
- spatial_squeeze=False,
- scope=var_scope)
-
- handle = scope + '/%s/block3' % self._architecture
- return activations[handle]
-
- def _extract_box_classifier_features(self, proposal_feature_maps, scope):
- """Extracts second stage box classifier features.
-
- Args:
- proposal_feature_maps: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
- representing the feature map cropped to each proposal.
- scope: A scope name (unused).
-
- Returns:
- proposal_classifier_features: A 4-D float tensor with shape
- [batch_size * self.max_num_proposals, height, width, depth]
- representing box classifier features for each proposal.
- """
- with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
- with slim.arg_scope(
- resnet_utils.resnet_arg_scope(
- batch_norm_epsilon=1e-5,
- batch_norm_scale=True,
- weight_decay=self._weight_decay)):
- with slim.arg_scope([slim.batch_norm],
- is_training=self._train_batch_norm):
- blocks = [
- resnet_utils.Block('block4', resnet_v1.bottleneck, [{
- 'depth': 2048,
- 'depth_bottleneck': 512,
- 'stride': 1
- }] * 3)
- ]
- proposal_classifier_features = resnet_utils.stack_blocks_dense(
- proposal_feature_maps, blocks)
- return proposal_classifier_features
-
-
-class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
- """Faster R-CNN Resnet 50 feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16,
- or if `architecture` is not supported.
- """
- super(FasterRCNNResnet50FeatureExtractor, self).__init__(
- 'resnet_v1_50', resnet_v1.resnet_v1_50, is_training,
- first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
-
-class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
- """Faster R-CNN Resnet 101 feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16,
- or if `architecture` is not supported.
- """
- super(FasterRCNNResnet101FeatureExtractor, self).__init__(
- 'resnet_v1_101', resnet_v1.resnet_v1_101, is_training,
- first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
-
-
-class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor):
- """Faster R-CNN Resnet 152 feature extractor implementation."""
-
- def __init__(self,
- is_training,
- first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0):
- """Constructor.
-
- Args:
- is_training: See base class.
- first_stage_features_stride: See base class.
- batch_norm_trainable: See base class.
- reuse_weights: See base class.
- weight_decay: See base class.
-
- Raises:
- ValueError: If `first_stage_features_stride` is not 8 or 16,
- or if `architecture` is not supported.
- """
- super(FasterRCNNResnet152FeatureExtractor, self).__init__(
- 'resnet_v1_152', resnet_v1.resnet_v1_152, is_training,
- first_stage_features_stride, batch_norm_trainable,
- reuse_weights, weight_decay)
diff --git a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py b/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
deleted file mode 100644
index 8f77ee26..00000000
--- a/object_detection/models/faster_rcnn_resnet_v1_feature_extractor_test.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.models.faster_rcnn_resnet_v1_feature_extractor."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as faster_rcnn_resnet_v1
-
-
-class FasterRcnnResnetV1FeatureExtractorTest(tf.test.TestCase):
-
- def _build_feature_extractor(self,
- first_stage_features_stride,
- architecture='resnet_v1_101'):
- feature_extractor_map = {
- 'resnet_v1_50':
- faster_rcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
- 'resnet_v1_101':
- faster_rcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
- 'resnet_v1_152':
- faster_rcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
- }
- return feature_extractor_map[architecture](
- is_training=False,
- first_stage_features_stride=first_stage_features_stride,
- batch_norm_trainable=False,
- reuse_weights=None,
- weight_decay=0.0)
-
- def test_extract_proposal_features_returns_expected_size(self):
- for architecture in ['resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152']:
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16, architecture=architecture)
- preprocessed_inputs = tf.random_uniform(
- [4, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [4, 14, 14, 1024])
-
- def test_extract_proposal_features_stride_eight(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=8)
- preprocessed_inputs = tf.random_uniform(
- [4, 224, 224, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [4, 28, 28, 1024])
-
- def test_extract_proposal_features_half_size_input(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [1, 112, 112, 3], maxval=255, dtype=tf.float32)
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [1, 7, 7, 1024])
-
- def test_extract_proposal_features_dies_on_invalid_stride(self):
- with self.assertRaises(ValueError):
- self._build_feature_extractor(first_stage_features_stride=99)
-
- def test_extract_proposal_features_dies_on_very_small_images(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- rpn_feature_map = feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
- features_shape = tf.shape(rpn_feature_map)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- with self.assertRaises(tf.errors.InvalidArgumentError):
- sess.run(
- features_shape,
- feed_dict={preprocessed_inputs: np.random.rand(4, 32, 32, 3)})
-
- def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- preprocessed_inputs = tf.random_uniform(
- [224, 224, 3], maxval=255, dtype=tf.float32)
- with self.assertRaises(ValueError):
- feature_extractor.extract_proposal_features(
- preprocessed_inputs, scope='TestScope')
-
- def test_extract_box_classifier_features_returns_expected_size(self):
- feature_extractor = self._build_feature_extractor(
- first_stage_features_stride=16)
- proposal_feature_maps = tf.random_uniform(
- [3, 7, 7, 1024], maxval=255, dtype=tf.float32)
- proposal_classifier_features = (
- feature_extractor.extract_box_classifier_features(
- proposal_feature_maps, scope='TestScope'))
- features_shape = tf.shape(proposal_classifier_features)
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- features_shape_out = sess.run(features_shape)
- self.assertAllEqual(features_shape_out, [3, 7, 7, 2048])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/feature_map_generators.py b/object_detection/models/feature_map_generators.py
deleted file mode 100644
index 8eb7e621..00000000
--- a/object_detection/models/feature_map_generators.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to generate a list of feature maps based on image features.
-
-Provides several feature map generators that can be used to build object
-detection feature extractors.
-
-Object detection feature extractors usually are built by stacking two components
-- A base feature extractor such as Inception V3 and a feature map generator.
-Feature map generators build on the base feature extractors and produce a list
-of final feature maps.
-"""
-import collections
-import tensorflow as tf
-slim = tf.contrib.slim
-
-
-def get_depth_fn(depth_multiplier, min_depth):
- """Builds a callable to compute depth (output channels) of conv filters.
-
- Args:
- depth_multiplier: a multiplier for the nominal depth.
- min_depth: a lower bound on the depth of filters.
-
- Returns:
- A callable that takes in a nominal depth and returns the depth to use.
- """
- def multiply_depth(depth):
- new_depth = int(depth * depth_multiplier)
- return max(new_depth, min_depth)
- return multiply_depth
-
-
-def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
- min_depth, insert_1x1_conv, image_features):
- """Generates multi resolution feature maps from input image features.
-
- Generates multi-scale feature maps for detection as in the SSD papers by
- Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.
-
- More specifically, it performs the following two tasks:
- 1) If a layer name is provided in the configuration, returns that layer as a
- feature map.
- 2) If a layer name is left as an empty string, constructs a new feature map
- based on the spatial shape and depth configuration. Note that the current
- implementation only supports generating new layers using convolution of
- stride 2 resulting in a spatial resolution reduction by a factor of 2.
- By default convolution kernel size is set to 3, and it can be customized
- by caller.
-
- An example of the configuration for Inception V3:
- {
- 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 128]
- }
-
- Args:
- feature_map_layout: Dictionary of specifications for the feature map
- layouts in the following format (Inception V2/V3 respectively):
- {
- 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 128]
- }
- or
- {
- 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 128]
- }
- If 'from_layer' is specified, the specified feature map is directly used
- as a box predictor layer, and the layer_depth is directly infered from the
- feature map (instead of using the provided 'layer_depth' parameter). In
- this case, our convention is to set 'layer_depth' to -1 for clarity.
- Otherwise, if 'from_layer' is an empty string, then the box predictor
- layer will be built from the previous layer using convolution operations.
- Note that the current implementation only supports generating new layers
- using convolutions of stride 2 (resulting in a spatial resolution
- reduction by a factor of 2), and will be extended to a more flexible
- design. Convolution kernel size is set to 3 by default, and can be
- customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size'
- should be set to -1 if 'from_layer' is specified). The created convolution
- operation will be a normal 2D convolution by default, and a depthwise
- convolution followed by 1x1 convolution if 'use_depthwise' is set to True.
- depth_multiplier: Depth multiplier for convolutional layers.
- min_depth: Minimum depth for convolutional layers.
- insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
- should be inserted before shrinking the feature map.
- image_features: A dictionary of handles to activation tensors from the
- base feature extractor.
-
- Returns:
- feature_maps: an OrderedDict mapping keys (feature map names) to
- tensors where each tensor has shape [batch, height_i, width_i, depth_i].
-
- Raises:
- ValueError: if the number entries in 'from_layer' and
- 'layer_depth' do not match.
- ValueError: if the generated layer does not have the same resolution
- as specified.
- """
- depth_fn = get_depth_fn(depth_multiplier, min_depth)
-
- feature_map_keys = []
- feature_maps = []
- base_from_layer = ''
- use_depthwise = False
- if 'use_depthwise' in feature_map_layout:
- use_depthwise = feature_map_layout['use_depthwise']
- for index, from_layer in enumerate(feature_map_layout['from_layer']):
- layer_depth = feature_map_layout['layer_depth'][index]
- conv_kernel_size = 3
- if 'conv_kernel_size' in feature_map_layout:
- conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
- if from_layer:
- feature_map = image_features[from_layer]
- base_from_layer = from_layer
- feature_map_keys.append(from_layer)
- else:
- pre_layer = feature_maps[-1]
- intermediate_layer = pre_layer
- if insert_1x1_conv:
- layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
- base_from_layer, index, depth_fn(layer_depth / 2))
- intermediate_layer = slim.conv2d(
- pre_layer,
- depth_fn(layer_depth / 2), [1, 1],
- padding='SAME',
- stride=1,
- scope=layer_name)
- stride = 2
- layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
- base_from_layer, index, conv_kernel_size, conv_kernel_size,
- depth_fn(layer_depth))
- if use_depthwise:
- feature_map = slim.separable_conv2d(
- intermediate_layer,
- None, [conv_kernel_size, conv_kernel_size],
- depth_multiplier=1,
- padding='SAME',
- stride=stride,
- scope=layer_name + '_depthwise')
- feature_map = slim.conv2d(
- feature_map,
- depth_fn(layer_depth), [1, 1],
- padding='SAME',
- stride=1,
- scope=layer_name)
- else:
- feature_map = slim.conv2d(
- intermediate_layer,
- depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
- padding='SAME',
- stride=stride,
- scope=layer_name)
- feature_map_keys.append(layer_name)
- feature_maps.append(feature_map)
- return collections.OrderedDict(
- [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
diff --git a/object_detection/models/feature_map_generators_test.py b/object_detection/models/feature_map_generators_test.py
deleted file mode 100644
index cb69f0e4..00000000
--- a/object_detection/models/feature_map_generators_test.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for feature map generators."""
-
-import tensorflow as tf
-
-from object_detection.models import feature_map_generators
-
-INCEPTION_V2_LAYOUT = {
- 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 256],
- 'anchor_strides': [16, 32, 64, -1, -1, -1],
- 'layer_target_norm': [20.0, -1, -1, -1, -1, -1],
-}
-
-INCEPTION_V3_LAYOUT = {
- 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 128],
- 'anchor_strides': [16, 32, 64, -1, -1, -1],
- 'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3]
-}
-
-EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
- 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
- 'layer_depth': [-1, -1, 512, 256, 256],
- 'conv_kernel_size': [-1, -1, 3, 3, 2],
-}
-
-
-# TODO(rathodv): add tests with different anchor strides.
-class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
-
- def test_get_expected_feature_map_shapes_with_inception_v2(self):
- image_features = {
- 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
- 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
- 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
- }
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=INCEPTION_V2_LAYOUT,
- depth_multiplier=1,
- min_depth=32,
- insert_1x1_conv=True,
- image_features=image_features)
-
- expected_feature_map_shapes = {
- 'Mixed_3c': (4, 28, 28, 256),
- 'Mixed_4c': (4, 14, 14, 576),
- 'Mixed_5c': (4, 7, 7, 1024),
- 'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
- 'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
- 'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
-
- def test_get_expected_feature_map_shapes_with_inception_v3(self):
- image_features = {
- 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
- 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
- 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
- }
-
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=INCEPTION_V3_LAYOUT,
- depth_multiplier=1,
- min_depth=32,
- insert_1x1_conv=True,
- image_features=image_features)
-
- expected_feature_map_shapes = {
- 'Mixed_5d': (4, 35, 35, 256),
- 'Mixed_6e': (4, 17, 17, 576),
- 'Mixed_7c': (4, 8, 8, 1024),
- 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
- 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
- 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
-
- def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
- self):
- image_features = {
- 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
- dtype=tf.float32),
- 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
- dtype=tf.float32),
- }
-
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
- depth_multiplier=1,
- min_depth=32,
- insert_1x1_conv=True,
- image_features=image_features)
-
- expected_feature_map_shapes = {
- 'Conv2d_11_pointwise': (4, 16, 16, 512),
- 'Conv2d_13_pointwise': (4, 8, 8, 1024),
- 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
- 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
- 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}
-
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- out_feature_maps = sess.run(feature_maps)
- out_feature_map_shapes = dict(
- (key, value.shape) for key, value in out_feature_maps.items())
- self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
-
-
-class GetDepthFunctionTest(tf.test.TestCase):
-
- def test_return_min_depth_when_multiplier_is_small(self):
- depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
- min_depth=16)
- self.assertEqual(depth_fn(16), 16)
-
- def test_return_correct_depth_with_multiplier(self):
- depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
- min_depth=16)
- self.assertEqual(depth_fn(64), 32)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/ssd_feature_extractor_test.py b/object_detection/models/ssd_feature_extractor_test.py
deleted file mode 100644
index 0b3da468..00000000
--- a/object_detection/models/ssd_feature_extractor_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Base test class SSDFeatureExtractors."""
-
-from abc import abstractmethod
-
-import numpy as np
-import tensorflow as tf
-
-
-class SsdFeatureExtractorTestBase(object):
-
- def _validate_features_shape(self,
- feature_extractor,
- preprocessed_inputs,
- expected_feature_map_shapes):
- """Checks the extracted features are of correct shape.
-
- Args:
- feature_extractor: The feature extractor to test.
- preprocessed_inputs: A [batch, height, width, 3] tensor to extract
- features with.
- expected_feature_map_shapes: The expected shape of the extracted features.
- """
- feature_maps = feature_extractor.extract_features(preprocessed_inputs)
- feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- feature_map_shapes_out = sess.run(feature_map_shapes)
- for shape_out, exp_shape_out in zip(
- feature_map_shapes_out, expected_feature_map_shapes):
- self.assertAllEqual(shape_out, exp_shape_out)
-
- @abstractmethod
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
- """Constructs a new feature extractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- Returns:
- an ssd_meta_arch.SSDFeatureExtractor object.
- """
- pass
-
- def check_extract_features_returns_correct_shape(
- self, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shapes_out):
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_inputs = tf.random_uniform(
- [4, image_height, image_width, 3], dtype=tf.float32)
- self._validate_features_shape(
- feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
-
- def check_extract_features_raises_error_with_invalid_image_size(
- self, image_height, image_width, depth_multiplier, pad_to_multiple):
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- feature_maps = feature_extractor.extract_features(preprocessed_inputs)
- test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
- with self.test_session() as sess:
- sess.run(tf.global_variables_initializer())
- with self.assertRaises(tf.errors.InvalidArgumentError):
- sess.run(feature_maps,
- feed_dict={preprocessed_inputs: test_preprocessed_image})
-
- def check_feature_extractor_variables_under_scope(
- self, depth_multiplier, pad_to_multiple, scope_name):
- g = tf.Graph()
- with g.as_default():
- feature_extractor = self._create_feature_extractor(
- depth_multiplier, pad_to_multiple)
- preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
- feature_extractor.extract_features(preprocessed_inputs)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- for variable in variables:
- self.assertTrue(variable.name.startswith(scope_name))
diff --git a/object_detection/models/ssd_inception_v2_feature_extractor.py b/object_detection/models/ssd_inception_v2_feature_extractor.py
deleted file mode 100644
index d1685d7f..00000000
--- a/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""SSDFeatureExtractor for InceptionV2 features."""
-import tensorflow as tf
-
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import feature_map_generators
-from object_detection.utils import ops
-from nets import inception_v2
-
-slim = tf.contrib.slim
-
-
-class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
- """SSD Feature Extractor using InceptionV2 features."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams,
- batch_norm_trainable=True,
- reuse_weights=None):
- """InceptionV2 Feature Extractor for SSD Models.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a small batch size
- (e.g. 1), it is desirable to disable batch norm update and use
- pretrained batch norm params.
- reuse_weights: Whether to reuse variables. Default is None.
- """
- super(SSDInceptionV2FeatureExtractor, self).__init__(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable, reuse_weights)
-
- def preprocess(self, resized_inputs):
- """SSD preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def extract_features(self, preprocessed_inputs):
- """Extract features from preprocessed inputs.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- """
- preprocessed_inputs.get_shape().assert_has_rank(4)
- shape_assert = tf.Assert(
- tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
- tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
- ['image size must at least be 33 in both height and width.'])
-
- feature_map_layout = {
- 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
- 'layer_depth': [-1, -1, 512, 256, 256, 128],
- }
-
- with tf.control_dependencies([shape_assert]):
- with slim.arg_scope(self._conv_hyperparams):
- with tf.variable_scope('InceptionV2',
- reuse=self._reuse_weights) as scope:
- _, image_features = inception_v2.inception_v2_base(
- ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
- final_endpoint='Mixed_5c',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=self._depth_multiplier,
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
-
- return feature_maps.values()
diff --git a/object_detection/models/ssd_inception_v2_feature_extractor_test.py b/object_detection/models/ssd_inception_v2_feature_extractor_test.py
deleted file mode 100644
index b265ccb0..00000000
--- a/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.models.ssd_inception_v2_feature_extractor."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import ssd_feature_extractor_test
-from object_detection.models import ssd_inception_v2_feature_extractor
-
-
-class SsdInceptionV2FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- is_training=True, batch_norm_trainable=True):
- """Constructs a SsdInceptionV2FeatureExtractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- is_training: whether the network is in training mode.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not
- Returns:
- an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
- """
- min_depth = 32
- conv_hyperparams = {}
- return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable)
-
- def test_extract_features_returns_correct_shapes_128(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
- (4, 2, 2, 512), (4, 1, 1, 256),
- (4, 1, 1, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_299(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
- (4, 5, 5, 512), (4, 3, 3, 256),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 0.5**12
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
- (4, 5, 5, 32), (4, 3, 3, 32),
- (4, 2, 2, 32), (4, 1, 1, 32)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 32
- expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024),
- (4, 5, 5, 512), (4, 3, 3, 256),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_raises_error_with_invalid_image_size(self):
- image_height = 32
- image_width = 32
- depth_multiplier = 1.0
- pad_to_multiple = 1
- self.check_extract_features_raises_error_with_invalid_image_size(
- image_height, image_width, depth_multiplier, pad_to_multiple)
-
- def test_preprocess_returns_correct_value_range(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1
- pad_to_multiple = 1
- test_image = np.random.rand(4, image_height, image_width, 3)
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- depth_multiplier = 1
- pad_to_multiple = 1
- scope_name = 'InceptionV2'
- self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/ssd_inception_v3_feature_extractor.py b/object_detection/models/ssd_inception_v3_feature_extractor.py
deleted file mode 100644
index 3a782eb2..00000000
--- a/object_detection/models/ssd_inception_v3_feature_extractor.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""SSDFeatureExtractor for InceptionV3 features."""
-import tensorflow as tf
-
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import feature_map_generators
-from object_detection.utils import ops
-from nets import inception_v3
-
-slim = tf.contrib.slim
-
-
-class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
- """SSD Feature Extractor using InceptionV3 features."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams,
- batch_norm_trainable=True,
- reuse_weights=None):
- """InceptionV3 Feature Extractor for SSD Models.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a small batch size
- (e.g. 1), it is desirable to disable batch norm update and use
- pretrained batch norm params.
- reuse_weights: Whether to reuse variables. Default is None.
- """
- super(SSDInceptionV3FeatureExtractor, self).__init__(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable, reuse_weights)
-
- def preprocess(self, resized_inputs):
- """SSD preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def extract_features(self, preprocessed_inputs):
- """Extract features from preprocessed inputs.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- """
- preprocessed_inputs.get_shape().assert_has_rank(4)
- shape_assert = tf.Assert(
- tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
- tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
- ['image size must at least be 33 in both height and width.'])
-
- feature_map_layout = {
- 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
- 'layer_depth': [-1, -1, -1, 512, 256, 128],
- }
-
- with tf.control_dependencies([shape_assert]):
- with slim.arg_scope(self._conv_hyperparams):
- with tf.variable_scope('InceptionV3',
- reuse=self._reuse_weights) as scope:
- _, image_features = inception_v3.inception_v3_base(
- ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
- final_endpoint='Mixed_7c',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=self._depth_multiplier,
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
-
- return feature_maps.values()
diff --git a/object_detection/models/ssd_inception_v3_feature_extractor_test.py b/object_detection/models/ssd_inception_v3_feature_extractor_test.py
deleted file mode 100644
index 89c1a288..00000000
--- a/object_detection/models/ssd_inception_v3_feature_extractor_test.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.models.ssd_inception_v3_feature_extractor."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import ssd_feature_extractor_test
-from object_detection.models import ssd_inception_v3_feature_extractor
-
-
-class SsdInceptionV3FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- is_training=True, batch_norm_trainable=True):
- """Constructs a SsdInceptionV3FeatureExtractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- is_training: whether the network is in training mode.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not
- Returns:
- an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
- """
- min_depth = 32
- conv_hyperparams = {}
- return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable)
-
- def test_extract_features_returns_correct_shapes_128(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768),
- (4, 2, 2, 2048), (4, 1, 1, 512),
- (4, 1, 1, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_299(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768),
- (4, 8, 8, 2048), (4, 4, 4, 512),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 0.5**12
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128),
- (4, 8, 8, 192), (4, 4, 4, 32),
- (4, 2, 2, 32), (4, 1, 1, 32)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 32
- expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768),
- (4, 8, 8, 2048), (4, 4, 4, 512),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_raises_error_with_invalid_image_size(self):
- image_height = 32
- image_width = 32
- depth_multiplier = 1.0
- pad_to_multiple = 1
- self.check_extract_features_raises_error_with_invalid_image_size(
- image_height, image_width, depth_multiplier, pad_to_multiple)
-
- def test_preprocess_returns_correct_value_range(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1
- pad_to_multiple = 1
- test_image = np.random.rand(4, image_height, image_width, 3)
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- depth_multiplier = 1
- pad_to_multiple = 1
- scope_name = 'InceptionV3'
- self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/models/ssd_mobilenet_v1_feature_extractor.py b/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
deleted file mode 100644
index 456e2d1d..00000000
--- a/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""SSDFeatureExtractor for MobilenetV1 features."""
-
-import tensorflow as tf
-
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.models import feature_map_generators
-from object_detection.utils import ops
-from nets import mobilenet_v1
-
-slim = tf.contrib.slim
-
-
-class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
- """SSD Feature Extractor using MobilenetV1 features."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams,
- batch_norm_trainable=True,
- reuse_weights=None):
- """MobileNetV1 Feature Extractor for SSD Models.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not. When training with a small batch size
- (e.g. 1), it is desirable to disable batch norm update and use
- pretrained batch norm params.
- reuse_weights: Whether to reuse variables. Default is None.
- """
- super(SSDMobileNetV1FeatureExtractor, self).__init__(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable, reuse_weights)
-
- def preprocess(self, resized_inputs):
- """SSD preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def extract_features(self, preprocessed_inputs):
- """Extract features from preprocessed inputs.
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- """
- preprocessed_inputs.get_shape().assert_has_rank(4)
- shape_assert = tf.Assert(
- tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
- tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
- ['image size must at least be 33 in both height and width.'])
-
- feature_map_layout = {
- 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
- '', ''],
- 'layer_depth': [-1, -1, 512, 256, 256, 128],
- }
-
- with tf.control_dependencies([shape_assert]):
- with slim.arg_scope(self._conv_hyperparams):
- with slim.arg_scope([slim.batch_norm], fused=False):
- with tf.variable_scope('MobilenetV1',
- reuse=self._reuse_weights) as scope:
- _, image_features = mobilenet_v1.mobilenet_v1_base(
- ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=self._depth_multiplier,
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
-
- return feature_maps.values()
diff --git a/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py b/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
deleted file mode 100644
index 9159ceb1..00000000
--- a/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for ssd_mobilenet_v1_feature_extractor."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.models import ssd_feature_extractor_test
-from object_detection.models import ssd_mobilenet_v1_feature_extractor
-
-slim = tf.contrib.slim
-
-
-class SsdMobilenetV1FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
-
- def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
- is_training=True, batch_norm_trainable=True):
- """Constructs a new feature extractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- is_training: whether the network is in training mode.
- batch_norm_trainable: Whether to update batch norm parameters during
- training or not.
- Returns:
- an ssd_meta_arch.SSDFeatureExtractor object.
- """
- min_depth = 32
- with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
- conv_hyperparams = sc
- return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, batch_norm_trainable)
-
- def test_extract_features_returns_correct_shapes_128(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
- (4, 2, 2, 512), (4, 1, 1, 256),
- (4, 1, 1, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_299(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
- (4, 5, 5, 512), (4, 3, 3, 256),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 0.5**12
- pad_to_multiple = 1
- expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
- (4, 5, 5, 32), (4, 3, 3, 32),
- (4, 2, 2, 32), (4, 1, 1, 32)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 32
- expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024),
- (4, 5, 5, 512), (4, 3, 3, 256),
- (4, 2, 2, 256), (4, 1, 1, 128)]
- self.check_extract_features_returns_correct_shape(
- image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_raises_error_with_invalid_image_size(self):
- image_height = 32
- image_width = 32
- depth_multiplier = 1.0
- pad_to_multiple = 1
- self.check_extract_features_raises_error_with_invalid_image_size(
- image_height, image_width, depth_multiplier, pad_to_multiple)
-
- def test_preprocess_returns_correct_value_range(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1
- pad_to_multiple = 1
- test_image = np.random.rand(4, image_height, image_width, 3)
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- depth_multiplier = 1
- pad_to_multiple = 1
- scope_name = 'MobilenetV1'
- self.check_feature_extractor_variables_under_scope(
- depth_multiplier, pad_to_multiple, scope_name)
-
- def test_nofused_batchnorm(self):
- image_height = 40
- image_width = 40
- depth_multiplier = 1
- pad_to_multiple = 1
- image_placeholder = tf.placeholder(tf.float32,
- [1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(image_placeholder)
- _ = feature_extractor.extract_features(preprocessed_image)
- self.assertFalse(any(op.type == 'FusedBatchNorm'
- for op in tf.get_default_graph().get_operations()))
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/object_detection_tutorial.ipynb b/object_detection/object_detection_tutorial.ipynb
deleted file mode 100644
index 6e251ff4..00000000
--- a/object_detection/object_detection_tutorial.ipynb
+++ /dev/null
@@ -1,298 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Object Detection Demo\n",
- "Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Imports"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import os\n",
- "import six.moves.urllib as urllib\n",
- "import sys\n",
- "import tarfile\n",
- "import tensorflow as tf\n",
- "import zipfile\n",
- "\n",
- "from collections import defaultdict\n",
- "from io import StringIO\n",
- "from matplotlib import pyplot as plt\n",
- "from PIL import Image\n",
- "\n",
- "if tf.__version__ < '1.4.0':\n",
- " raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Env setup"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# This is needed to display the images.\n",
- "%matplotlib inline\n",
- "\n",
- "# This is needed since the notebook is stored in the object_detection folder.\n",
- "sys.path.append(\"..\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Object detection imports\n",
- "Here are the imports from the object detection module."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from utils import label_map_util\n",
- "\n",
- "from utils import visualization_utils as vis_util"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Model preparation "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Variables\n",
- "\n",
- "Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. \n",
- "\n",
- "By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# What model to download.\n",
- "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n",
- "MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
- "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
- "\n",
- "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
- "PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n",
- "\n",
- "# List of the strings that is used to add correct label for each box.\n",
- "PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n",
- "\n",
- "NUM_CLASSES = 90"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Download Model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "opener = urllib.request.URLopener()\n",
- "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
- "tar_file = tarfile.open(MODEL_FILE)\n",
- "for file in tar_file.getmembers():\n",
- " file_name = os.path.basename(file.name)\n",
- " if 'frozen_inference_graph.pb' in file_name:\n",
- " tar_file.extract(file, os.getcwd())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Load a (frozen) Tensorflow model into memory."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "detection_graph = tf.Graph()\n",
- "with detection_graph.as_default():\n",
- " od_graph_def = tf.GraphDef()\n",
- " with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n",
- " serialized_graph = fid.read()\n",
- " od_graph_def.ParseFromString(serialized_graph)\n",
- " tf.import_graph_def(od_graph_def, name='')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Loading label map\n",
- "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
- "categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
- "category_index = label_map_util.create_category_index(categories)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Helper code"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def load_image_into_numpy_array(image):\n",
- " (im_width, im_height) = image.size\n",
- " return np.array(image.getdata()).reshape(\n",
- " (im_height, im_width, 3)).astype(np.uint8)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Detection"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# For the sake of simplicity we will use only 2 images:\n",
- "# image1.jpg\n",
- "# image2.jpg\n",
- "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n",
- "PATH_TO_TEST_IMAGES_DIR = 'test_images'\n",
- "TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n",
- "\n",
- "# Size, in inches, of the output images.\n",
- "IMAGE_SIZE = (12, 8)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "with detection_graph.as_default():\n",
- " with tf.Session(graph=detection_graph) as sess:\n",
- " # Definite input and output Tensors for detection_graph\n",
- " image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
- " # Each box represents a part of the image where a particular object was detected.\n",
- " detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
- " # Each score represent how level of confidence for each of the objects.\n",
- " # Score is shown on the result image, together with the class label.\n",
- " detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
- " detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
- " num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
- " for image_path in TEST_IMAGE_PATHS:\n",
- " image = Image.open(image_path)\n",
- " # the array based representation of the image will be used later in order to prepare the\n",
- " # result image with boxes and labels on it.\n",
- " image_np = load_image_into_numpy_array(image)\n",
- " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
- " image_np_expanded = np.expand_dims(image_np, axis=0)\n",
- " # Actual detection.\n",
- " (boxes, scores, classes, num) = sess.run(\n",
- " [detection_boxes, detection_scores, detection_classes, num_detections],\n",
- " feed_dict={image_tensor: image_np_expanded})\n",
- " # Visualization of the results of a detection.\n",
- " vis_util.visualize_boxes_and_labels_on_image_array(\n",
- " image_np,\n",
- " np.squeeze(boxes),\n",
- " np.squeeze(classes).astype(np.int32),\n",
- " np.squeeze(scores),\n",
- " category_index,\n",
- " use_normalized_coordinates=True,\n",
- " line_thickness=8)\n",
- " plt.figure(figsize=IMAGE_SIZE)\n",
- " plt.imshow(image_np)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.10"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/object_detection/protos/BUILD b/object_detection/protos/BUILD
deleted file mode 100644
index 1b7eb148..00000000
--- a/object_detection/protos/BUILD
+++ /dev/null
@@ -1,341 +0,0 @@
-# Tensorflow Object Detection API: Configuration protos.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-proto_library(
- name = "argmax_matcher_proto",
- srcs = ["argmax_matcher.proto"],
-)
-
-py_proto_library(
- name = "argmax_matcher_py_pb2",
- api_version = 2,
- deps = [":argmax_matcher_proto"],
-)
-
-proto_library(
- name = "bipartite_matcher_proto",
- srcs = ["bipartite_matcher.proto"],
-)
-
-py_proto_library(
- name = "bipartite_matcher_py_pb2",
- api_version = 2,
- deps = [":bipartite_matcher_proto"],
-)
-
-proto_library(
- name = "matcher_proto",
- srcs = ["matcher.proto"],
- deps = [
- ":argmax_matcher_proto",
- ":bipartite_matcher_proto",
- ],
-)
-
-py_proto_library(
- name = "matcher_py_pb2",
- api_version = 2,
- deps = [":matcher_proto"],
-)
-
-proto_library(
- name = "faster_rcnn_box_coder_proto",
- srcs = ["faster_rcnn_box_coder.proto"],
-)
-
-py_proto_library(
- name = "faster_rcnn_box_coder_py_pb2",
- api_version = 2,
- deps = [":faster_rcnn_box_coder_proto"],
-)
-
-proto_library(
- name = "keypoint_box_coder_proto",
- srcs = ["keypoint_box_coder.proto"],
-)
-
-py_proto_library(
- name = "keypoint_box_coder_py_pb2",
- api_version = 2,
- deps = [":keypoint_box_coder_proto"],
-)
-
-proto_library(
- name = "mean_stddev_box_coder_proto",
- srcs = ["mean_stddev_box_coder.proto"],
-)
-
-py_proto_library(
- name = "mean_stddev_box_coder_py_pb2",
- api_version = 2,
- deps = [":mean_stddev_box_coder_proto"],
-)
-
-proto_library(
- name = "square_box_coder_proto",
- srcs = ["square_box_coder.proto"],
-)
-
-py_proto_library(
- name = "square_box_coder_py_pb2",
- api_version = 2,
- deps = [":square_box_coder_proto"],
-)
-
-proto_library(
- name = "box_coder_proto",
- srcs = ["box_coder.proto"],
- deps = [
- ":faster_rcnn_box_coder_proto",
- ":keypoint_box_coder_proto",
- ":mean_stddev_box_coder_proto",
- ":square_box_coder_proto",
- ],
-)
-
-py_proto_library(
- name = "box_coder_py_pb2",
- api_version = 2,
- deps = [":box_coder_proto"],
-)
-
-proto_library(
- name = "grid_anchor_generator_proto",
- srcs = ["grid_anchor_generator.proto"],
-)
-
-py_proto_library(
- name = "grid_anchor_generator_py_pb2",
- api_version = 2,
- deps = [":grid_anchor_generator_proto"],
-)
-
-proto_library(
- name = "ssd_anchor_generator_proto",
- srcs = ["ssd_anchor_generator.proto"],
-)
-
-py_proto_library(
- name = "ssd_anchor_generator_py_pb2",
- api_version = 2,
- deps = [":ssd_anchor_generator_proto"],
-)
-
-proto_library(
- name = "anchor_generator_proto",
- srcs = ["anchor_generator.proto"],
- deps = [
- ":grid_anchor_generator_proto",
- ":ssd_anchor_generator_proto",
- ],
-)
-
-py_proto_library(
- name = "anchor_generator_py_pb2",
- api_version = 2,
- deps = [":anchor_generator_proto"],
-)
-
-proto_library(
- name = "input_reader_proto",
- srcs = ["input_reader.proto"],
-)
-
-py_proto_library(
- name = "input_reader_py_pb2",
- api_version = 2,
- deps = [":input_reader_proto"],
-)
-
-proto_library(
- name = "losses_proto",
- srcs = ["losses.proto"],
-)
-
-py_proto_library(
- name = "losses_py_pb2",
- api_version = 2,
- deps = [":losses_proto"],
-)
-
-proto_library(
- name = "optimizer_proto",
- srcs = ["optimizer.proto"],
-)
-
-py_proto_library(
- name = "optimizer_py_pb2",
- api_version = 2,
- deps = [":optimizer_proto"],
-)
-
-proto_library(
- name = "post_processing_proto",
- srcs = ["post_processing.proto"],
-)
-
-py_proto_library(
- name = "post_processing_py_pb2",
- api_version = 2,
- deps = [":post_processing_proto"],
-)
-
-proto_library(
- name = "hyperparams_proto",
- srcs = ["hyperparams.proto"],
-)
-
-py_proto_library(
- name = "hyperparams_py_pb2",
- api_version = 2,
- deps = [":hyperparams_proto"],
-)
-
-proto_library(
- name = "box_predictor_proto",
- srcs = ["box_predictor.proto"],
- deps = [":hyperparams_proto"],
-)
-
-py_proto_library(
- name = "box_predictor_py_pb2",
- api_version = 2,
- deps = [":box_predictor_proto"],
-)
-
-proto_library(
- name = "region_similarity_calculator_proto",
- srcs = ["region_similarity_calculator.proto"],
- deps = [],
-)
-
-py_proto_library(
- name = "region_similarity_calculator_py_pb2",
- api_version = 2,
- deps = [":region_similarity_calculator_proto"],
-)
-
-proto_library(
- name = "preprocessor_proto",
- srcs = ["preprocessor.proto"],
-)
-
-py_proto_library(
- name = "preprocessor_py_pb2",
- api_version = 2,
- deps = [":preprocessor_proto"],
-)
-
-proto_library(
- name = "train_proto",
- srcs = ["train.proto"],
- deps = [
- ":optimizer_proto",
- ":preprocessor_proto",
- ],
-)
-
-py_proto_library(
- name = "train_py_pb2",
- api_version = 2,
- deps = [":train_proto"],
-)
-
-proto_library(
- name = "eval_proto",
- srcs = ["eval.proto"],
-)
-
-py_proto_library(
- name = "eval_py_pb2",
- api_version = 2,
- deps = [":eval_proto"],
-)
-
-proto_library(
- name = "image_resizer_proto",
- srcs = ["image_resizer.proto"],
-)
-
-py_proto_library(
- name = "image_resizer_py_pb2",
- api_version = 2,
- deps = [":image_resizer_proto"],
-)
-
-proto_library(
- name = "faster_rcnn_proto",
- srcs = ["faster_rcnn.proto"],
- deps = [
- ":box_predictor_proto",
- "//object_detection/protos:anchor_generator_proto",
- "//object_detection/protos:hyperparams_proto",
- "//object_detection/protos:image_resizer_proto",
- "//object_detection/protos:losses_proto",
- "//object_detection/protos:post_processing_proto",
- ],
-)
-
-proto_library(
- name = "ssd_proto",
- srcs = ["ssd.proto"],
- deps = [
- ":anchor_generator_proto",
- ":box_coder_proto",
- ":box_predictor_proto",
- ":hyperparams_proto",
- ":image_resizer_proto",
- ":losses_proto",
- ":matcher_proto",
- ":post_processing_proto",
- ":region_similarity_calculator_proto",
- ],
-)
-
-proto_library(
- name = "model_proto",
- srcs = ["model.proto"],
- deps = [
- ":faster_rcnn_proto",
- ":ssd_proto",
- ],
-)
-
-py_proto_library(
- name = "model_py_pb2",
- api_version = 2,
- deps = [":model_proto"],
-)
-
-proto_library(
- name = "pipeline_proto",
- srcs = ["pipeline.proto"],
- deps = [
- ":eval_proto",
- ":input_reader_proto",
- ":model_proto",
- ":train_proto",
- ],
-)
-
-py_proto_library(
- name = "pipeline_py_pb2",
- api_version = 2,
- deps = [":pipeline_proto"],
-)
-
-proto_library(
- name = "string_int_label_map_proto",
- srcs = ["string_int_label_map.proto"],
-)
-
-py_proto_library(
- name = "string_int_label_map_py_pb2",
- api_version = 2,
- deps = [":string_int_label_map_proto"],
-)
diff --git a/object_detection/protos/__init__.py b/object_detection/protos/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/protos/__pycache__/__init__.cpython-35.pyc b/object_detection/protos/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index 4feb7bdd..00000000
Binary files a/object_detection/protos/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc
deleted file mode 100644
index e2124087..00000000
Binary files a/object_detection/protos/__pycache__/anchor_generator_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc
deleted file mode 100644
index 9bf3b1ca..00000000
Binary files a/object_detection/protos/__pycache__/argmax_matcher_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc
deleted file mode 100644
index 50042354..00000000
Binary files a/object_detection/protos/__pycache__/bipartite_matcher_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc
deleted file mode 100644
index 59ba53c1..00000000
Binary files a/object_detection/protos/__pycache__/box_coder_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc
deleted file mode 100644
index 5fa2b6d0..00000000
Binary files a/object_detection/protos/__pycache__/box_predictor_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc
deleted file mode 100644
index 204e286b..00000000
Binary files a/object_detection/protos/__pycache__/eval_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc
deleted file mode 100644
index 373d5d2d..00000000
Binary files a/object_detection/protos/__pycache__/faster_rcnn_box_coder_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc
deleted file mode 100644
index ab8f8fa6..00000000
Binary files a/object_detection/protos/__pycache__/faster_rcnn_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc
deleted file mode 100644
index 39621fcb..00000000
Binary files a/object_detection/protos/__pycache__/grid_anchor_generator_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc
deleted file mode 100644
index be194d05..00000000
Binary files a/object_detection/protos/__pycache__/hyperparams_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc
deleted file mode 100644
index cafe3f70..00000000
Binary files a/object_detection/protos/__pycache__/image_resizer_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc
deleted file mode 100644
index c09e601b..00000000
Binary files a/object_detection/protos/__pycache__/input_reader_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc
deleted file mode 100644
index 805f3829..00000000
Binary files a/object_detection/protos/__pycache__/keypoint_box_coder_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc
deleted file mode 100644
index 030d1a3b..00000000
Binary files a/object_detection/protos/__pycache__/losses_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc
deleted file mode 100644
index c7983bf9..00000000
Binary files a/object_detection/protos/__pycache__/matcher_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc
deleted file mode 100644
index 1154fa6b..00000000
Binary files a/object_detection/protos/__pycache__/mean_stddev_box_coder_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc
deleted file mode 100644
index 80e7e0e4..00000000
Binary files a/object_detection/protos/__pycache__/model_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc
deleted file mode 100644
index ce52d0f8..00000000
Binary files a/object_detection/protos/__pycache__/optimizer_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc
deleted file mode 100644
index 3c1c062c..00000000
Binary files a/object_detection/protos/__pycache__/pipeline_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc
deleted file mode 100644
index 9091ba12..00000000
Binary files a/object_detection/protos/__pycache__/post_processing_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc
deleted file mode 100644
index dd4491c8..00000000
Binary files a/object_detection/protos/__pycache__/preprocessor_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc
deleted file mode 100644
index bf9f25f2..00000000
Binary files a/object_detection/protos/__pycache__/region_similarity_calculator_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc
deleted file mode 100644
index 1c38e2ee..00000000
Binary files a/object_detection/protos/__pycache__/square_box_coder_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc
deleted file mode 100644
index 19f6c085..00000000
Binary files a/object_detection/protos/__pycache__/ssd_anchor_generator_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc
deleted file mode 100644
index 9a4405d9..00000000
Binary files a/object_detection/protos/__pycache__/ssd_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc
deleted file mode 100644
index 5c608546..00000000
Binary files a/object_detection/protos/__pycache__/string_int_label_map_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc b/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc
deleted file mode 100644
index aa7ae131..00000000
Binary files a/object_detection/protos/__pycache__/train_pb2.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/protos/anchor_generator.proto b/object_detection/protos/anchor_generator.proto
deleted file mode 100644
index 4b7b1d62..00000000
--- a/object_detection/protos/anchor_generator.proto
+++ /dev/null
@@ -1,15 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/grid_anchor_generator.proto";
-import "object_detection/protos/ssd_anchor_generator.proto";
-
-// Configuration proto for the anchor generator to use in the object detection
-// pipeline. See core/anchor_generator.py for details.
-message AnchorGenerator {
- oneof anchor_generator_oneof {
- GridAnchorGenerator grid_anchor_generator = 1;
- SsdAnchorGenerator ssd_anchor_generator = 2;
- }
-}
diff --git a/object_detection/protos/anchor_generator_pb2.py b/object_detection/protos/anchor_generator_pb2.py
deleted file mode 100644
index 748848a9..00000000
--- a/object_detection/protos/anchor_generator_pb2.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/anchor_generator.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import grid_anchor_generator_pb2 as object__detection_dot_protos_dot_grid__anchor__generator__pb2
-from object_detection.protos import ssd_anchor_generator_pb2 as object__detection_dot_protos_dot_ssd__anchor__generator__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/anchor_generator.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n.object_detection/protos/anchor_generator.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/grid_anchor_generator.proto\x1a\x32object_detection/protos/ssd_anchor_generator.proto\"\xc7\x01\n\x0f\x41nchorGenerator\x12M\n\x15grid_anchor_generator\x18\x01 \x01(\x0b\x32,.object_detection.protos.GridAnchorGeneratorH\x00\x12K\n\x14ssd_anchor_generator\x18\x02 \x01(\x0b\x32+.object_detection.protos.SsdAnchorGeneratorH\x00\x42\x18\n\x16\x61nchor_generator_oneof')
- ,
- dependencies=[object__detection_dot_protos_dot_grid__anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_ssd__anchor__generator__pb2.DESCRIPTOR,])
-
-
-
-
-_ANCHORGENERATOR = _descriptor.Descriptor(
- name='AnchorGenerator',
- full_name='object_detection.protos.AnchorGenerator',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='grid_anchor_generator', full_name='object_detection.protos.AnchorGenerator.grid_anchor_generator', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd_anchor_generator', full_name='object_detection.protos.AnchorGenerator.ssd_anchor_generator', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='anchor_generator_oneof', full_name='object_detection.protos.AnchorGenerator.anchor_generator_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=181,
- serialized_end=380,
-)
-
-_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].message_type = object__detection_dot_protos_dot_grid__anchor__generator__pb2._GRIDANCHORGENERATOR
-_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].message_type = object__detection_dot_protos_dot_ssd__anchor__generator__pb2._SSDANCHORGENERATOR
-_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append(
- _ANCHORGENERATOR.fields_by_name['grid_anchor_generator'])
-_ANCHORGENERATOR.fields_by_name['grid_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof']
-_ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof'].fields.append(
- _ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'])
-_ANCHORGENERATOR.fields_by_name['ssd_anchor_generator'].containing_oneof = _ANCHORGENERATOR.oneofs_by_name['anchor_generator_oneof']
-DESCRIPTOR.message_types_by_name['AnchorGenerator'] = _ANCHORGENERATOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-AnchorGenerator = _reflection.GeneratedProtocolMessageType('AnchorGenerator', (_message.Message,), dict(
- DESCRIPTOR = _ANCHORGENERATOR,
- __module__ = 'object_detection.protos.anchor_generator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.AnchorGenerator)
- ))
-_sym_db.RegisterMessage(AnchorGenerator)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/argmax_matcher.proto b/object_detection/protos/argmax_matcher.proto
deleted file mode 100644
index 88c50318..00000000
--- a/object_detection/protos/argmax_matcher.proto
+++ /dev/null
@@ -1,25 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for ArgMaxMatcher. See
-// matchers/argmax_matcher.py for details.
-message ArgMaxMatcher {
- // Threshold for positive matches.
- optional float matched_threshold = 1 [default = 0.5];
-
- // Threshold for negative matches.
- optional float unmatched_threshold = 2 [default = 0.5];
-
- // Whether to construct ArgMaxMatcher without thresholds.
- optional bool ignore_thresholds = 3 [default = false];
-
- // If True then negative matches are the ones below the unmatched_threshold,
- // whereas ignored matches are in between the matched and umatched
- // threshold. If False, then negative matches are in between the matched
- // and unmatched threshold, and everything lower than unmatched is ignored.
- optional bool negatives_lower_than_unmatched = 4 [default = true];
-
- // Whether to ensure each row is matched to at least one column.
- optional bool force_match_for_each_row = 5 [default = false];
-}
diff --git a/object_detection/protos/argmax_matcher_pb2.py b/object_detection/protos/argmax_matcher_pb2.py
deleted file mode 100644
index 8c78f3ab..00000000
--- a/object_detection/protos/argmax_matcher_pb2.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/argmax_matcher.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/argmax_matcher.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n,object_detection/protos/argmax_matcher.proto\x12\x17object_detection.protos\"\xca\x01\n\rArgMaxMatcher\x12\x1e\n\x11matched_threshold\x18\x01 \x01(\x02:\x03\x30.5\x12 \n\x13unmatched_threshold\x18\x02 \x01(\x02:\x03\x30.5\x12 \n\x11ignore_thresholds\x18\x03 \x01(\x08:\x05\x66\x61lse\x12,\n\x1enegatives_lower_than_unmatched\x18\x04 \x01(\x08:\x04true\x12\'\n\x18\x66orce_match_for_each_row\x18\x05 \x01(\x08:\x05\x66\x61lse')
-)
-
-
-
-
-_ARGMAXMATCHER = _descriptor.Descriptor(
- name='ArgMaxMatcher',
- full_name='object_detection.protos.ArgMaxMatcher',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='matched_threshold', full_name='object_detection.protos.ArgMaxMatcher.matched_threshold', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='unmatched_threshold', full_name='object_detection.protos.ArgMaxMatcher.unmatched_threshold', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ignore_thresholds', full_name='object_detection.protos.ArgMaxMatcher.ignore_thresholds', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='negatives_lower_than_unmatched', full_name='object_detection.protos.ArgMaxMatcher.negatives_lower_than_unmatched', index=3,
- number=4, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='force_match_for_each_row', full_name='object_detection.protos.ArgMaxMatcher.force_match_for_each_row', index=4,
- number=5, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=74,
- serialized_end=276,
-)
-
-DESCRIPTOR.message_types_by_name['ArgMaxMatcher'] = _ARGMAXMATCHER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ArgMaxMatcher = _reflection.GeneratedProtocolMessageType('ArgMaxMatcher', (_message.Message,), dict(
- DESCRIPTOR = _ARGMAXMATCHER,
- __module__ = 'object_detection.protos.argmax_matcher_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ArgMaxMatcher)
- ))
-_sym_db.RegisterMessage(ArgMaxMatcher)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/bipartite_matcher.proto b/object_detection/protos/bipartite_matcher.proto
deleted file mode 100644
index 7e5a9e5c..00000000
--- a/object_detection/protos/bipartite_matcher.proto
+++ /dev/null
@@ -1,8 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for bipartite matcher. See
-// matchers/bipartite_matcher.py for details.
-message BipartiteMatcher {
-}
diff --git a/object_detection/protos/bipartite_matcher_pb2.py b/object_detection/protos/bipartite_matcher_pb2.py
deleted file mode 100644
index dc258ec8..00000000
--- a/object_detection/protos/bipartite_matcher_pb2.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/bipartite_matcher.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/bipartite_matcher.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n/object_detection/protos/bipartite_matcher.proto\x12\x17object_detection.protos\"\x12\n\x10\x42ipartiteMatcher')
-)
-
-
-
-
-_BIPARTITEMATCHER = _descriptor.Descriptor(
- name='BipartiteMatcher',
- full_name='object_detection.protos.BipartiteMatcher',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=76,
- serialized_end=94,
-)
-
-DESCRIPTOR.message_types_by_name['BipartiteMatcher'] = _BIPARTITEMATCHER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-BipartiteMatcher = _reflection.GeneratedProtocolMessageType('BipartiteMatcher', (_message.Message,), dict(
- DESCRIPTOR = _BIPARTITEMATCHER,
- __module__ = 'object_detection.protos.bipartite_matcher_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BipartiteMatcher)
- ))
-_sym_db.RegisterMessage(BipartiteMatcher)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/box_coder.proto b/object_detection/protos/box_coder.proto
deleted file mode 100644
index 79b81812..00000000
--- a/object_detection/protos/box_coder.proto
+++ /dev/null
@@ -1,19 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/faster_rcnn_box_coder.proto";
-import "object_detection/protos/keypoint_box_coder.proto";
-import "object_detection/protos/mean_stddev_box_coder.proto";
-import "object_detection/protos/square_box_coder.proto";
-
-// Configuration proto for the box coder to be used in the object detection
-// pipeline. See core/box_coder.py for details.
-message BoxCoder {
- oneof box_coder_oneof {
- FasterRcnnBoxCoder faster_rcnn_box_coder = 1;
- MeanStddevBoxCoder mean_stddev_box_coder = 2;
- SquareBoxCoder square_box_coder = 3;
- KeypointBoxCoder keypoint_box_coder = 4;
- }
-}
diff --git a/object_detection/protos/box_coder_pb2.py b/object_detection/protos/box_coder_pb2.py
deleted file mode 100644
index 6e91d095..00000000
--- a/object_detection/protos/box_coder_pb2.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/box_coder.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import faster_rcnn_box_coder_pb2 as object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2
-from object_detection.protos import keypoint_box_coder_pb2 as object__detection_dot_protos_dot_keypoint__box__coder__pb2
-from object_detection.protos import mean_stddev_box_coder_pb2 as object__detection_dot_protos_dot_mean__stddev__box__coder__pb2
-from object_detection.protos import square_box_coder_pb2 as object__detection_dot_protos_dot_square__box__coder__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/box_coder.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n\'object_detection/protos/box_coder.proto\x12\x17object_detection.protos\x1a\x33object_detection/protos/faster_rcnn_box_coder.proto\x1a\x30object_detection/protos/keypoint_box_coder.proto\x1a\x33object_detection/protos/mean_stddev_box_coder.proto\x1a.object_detection/protos/square_box_coder.proto\"\xc7\x02\n\x08\x42oxCoder\x12L\n\x15\x66\x61ster_rcnn_box_coder\x18\x01 \x01(\x0b\x32+.object_detection.protos.FasterRcnnBoxCoderH\x00\x12L\n\x15mean_stddev_box_coder\x18\x02 \x01(\x0b\x32+.object_detection.protos.MeanStddevBoxCoderH\x00\x12\x43\n\x10square_box_coder\x18\x03 \x01(\x0b\x32\'.object_detection.protos.SquareBoxCoderH\x00\x12G\n\x12keypoint_box_coder\x18\x04 \x01(\x0b\x32).object_detection.protos.KeypointBoxCoderH\x00\x42\x11\n\x0f\x62ox_coder_oneof')
- ,
- dependencies=[object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_keypoint__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_mean__stddev__box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_square__box__coder__pb2.DESCRIPTOR,])
-
-
-
-
-_BOXCODER = _descriptor.Descriptor(
- name='BoxCoder',
- full_name='object_detection.protos.BoxCoder',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='faster_rcnn_box_coder', full_name='object_detection.protos.BoxCoder.faster_rcnn_box_coder', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='mean_stddev_box_coder', full_name='object_detection.protos.BoxCoder.mean_stddev_box_coder', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='square_box_coder', full_name='object_detection.protos.BoxCoder.square_box_coder', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='keypoint_box_coder', full_name='object_detection.protos.BoxCoder.keypoint_box_coder', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='box_coder_oneof', full_name='object_detection.protos.BoxCoder.box_coder_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=273,
- serialized_end=600,
-)
-
-_BOXCODER.fields_by_name['faster_rcnn_box_coder'].message_type = object__detection_dot_protos_dot_faster__rcnn__box__coder__pb2._FASTERRCNNBOXCODER
-_BOXCODER.fields_by_name['mean_stddev_box_coder'].message_type = object__detection_dot_protos_dot_mean__stddev__box__coder__pb2._MEANSTDDEVBOXCODER
-_BOXCODER.fields_by_name['square_box_coder'].message_type = object__detection_dot_protos_dot_square__box__coder__pb2._SQUAREBOXCODER
-_BOXCODER.fields_by_name['keypoint_box_coder'].message_type = object__detection_dot_protos_dot_keypoint__box__coder__pb2._KEYPOINTBOXCODER
-_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append(
- _BOXCODER.fields_by_name['faster_rcnn_box_coder'])
-_BOXCODER.fields_by_name['faster_rcnn_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof']
-_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append(
- _BOXCODER.fields_by_name['mean_stddev_box_coder'])
-_BOXCODER.fields_by_name['mean_stddev_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof']
-_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append(
- _BOXCODER.fields_by_name['square_box_coder'])
-_BOXCODER.fields_by_name['square_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof']
-_BOXCODER.oneofs_by_name['box_coder_oneof'].fields.append(
- _BOXCODER.fields_by_name['keypoint_box_coder'])
-_BOXCODER.fields_by_name['keypoint_box_coder'].containing_oneof = _BOXCODER.oneofs_by_name['box_coder_oneof']
-DESCRIPTOR.message_types_by_name['BoxCoder'] = _BOXCODER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-BoxCoder = _reflection.GeneratedProtocolMessageType('BoxCoder', (_message.Message,), dict(
- DESCRIPTOR = _BOXCODER,
- __module__ = 'object_detection.protos.box_coder_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BoxCoder)
- ))
-_sym_db.RegisterMessage(BoxCoder)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/box_predictor.proto b/object_detection/protos/box_predictor.proto
deleted file mode 100644
index 4aa445cc..00000000
--- a/object_detection/protos/box_predictor.proto
+++ /dev/null
@@ -1,101 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/hyperparams.proto";
-
-
-// Configuration proto for box predictor. See core/box_predictor.py for details.
-message BoxPredictor {
- oneof box_predictor_oneof {
- ConvolutionalBoxPredictor convolutional_box_predictor = 1;
- MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
- RfcnBoxPredictor rfcn_box_predictor = 3;
- }
-}
-
-// Configuration proto for Convolutional box predictor.
-message ConvolutionalBoxPredictor {
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 1;
-
- // Minumum feature depth prior to predicting box encodings and class
- // predictions.
- optional int32 min_depth = 2 [default = 0];
-
- // Maximum feature depth prior to predicting box encodings and class
- // predictions. If max_depth is set to 0, no additional feature map will be
- // inserted before location and class predictions.
- optional int32 max_depth = 3 [default = 0];
-
- // Number of the additional conv layers before the predictor.
- optional int32 num_layers_before_predictor = 4 [default = 0];
-
- // Whether to use dropout for class prediction.
- optional bool use_dropout = 5 [default = true];
-
- // Keep probability for dropout
- optional float dropout_keep_probability = 6 [default = 0.8];
-
- // Size of final convolution kernel. If the spatial resolution of the feature
- // map is smaller than the kernel size, then the kernel size is set to
- // min(feature_width, feature_height).
- optional int32 kernel_size = 7 [default = 1];
-
- // Size of the encoding for boxes.
- optional int32 box_code_size = 8 [default = 4];
-
- // Whether to apply sigmoid to the output of class predictions.
- // TODO: Do we need this since we have a post processing module.?
- optional bool apply_sigmoid_to_scores = 9 [default = false];
-
- optional float class_prediction_bias_init = 10 [default = 0.0];
-}
-
-message MaskRCNNBoxPredictor {
- // Hyperparameters for fully connected ops used in the box predictor.
- optional Hyperparams fc_hyperparams = 1;
-
- // Whether to use dropout op prior to the both box and class predictions.
- optional bool use_dropout = 2 [default= false];
-
- // Keep probability for dropout. This is only used if use_dropout is true.
- optional float dropout_keep_probability = 3 [default = 0.5];
-
- // Size of the encoding for the boxes.
- optional int32 box_code_size = 4 [default = 4];
-
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 5;
-
- // Whether to predict instance masks inside detection boxes.
- optional bool predict_instance_masks = 6 [default = false];
-
- // The depth for the first conv2d_transpose op applied to the
- // image_features in the mask prediciton branch
- optional int32 mask_prediction_conv_depth = 7 [default = 256];
-
- // Whether to predict keypoints inside detection boxes.
- optional bool predict_keypoints = 8 [default = false];
-}
-
-message RfcnBoxPredictor {
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 1;
-
- // Bin sizes for RFCN crops.
- optional int32 num_spatial_bins_height = 2 [default = 3];
-
- optional int32 num_spatial_bins_width = 3 [default = 3];
-
- // Target depth to reduce the input image features to.
- optional int32 depth = 4 [default=1024];
-
- // Size of the encoding for the boxes.
- optional int32 box_code_size = 5 [default = 4];
-
- // Size to resize the rfcn crops to.
- optional int32 crop_height = 6 [default= 12];
-
- optional int32 crop_width = 7 [default=12];
-}
diff --git a/object_detection/protos/box_predictor_pb2.py b/object_detection/protos/box_predictor_pb2.py
deleted file mode 100644
index 79525e47..00000000
--- a/object_detection/protos/box_predictor_pb2.py
+++ /dev/null
@@ -1,375 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/box_predictor.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/box_predictor.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n+object_detection/protos/box_predictor.proto\x12\x17object_detection.protos\x1a)object_detection/protos/hyperparams.proto\"\x9b\x02\n\x0c\x42oxPredictor\x12Y\n\x1b\x63onvolutional_box_predictor\x18\x01 \x01(\x0b\x32\x32.object_detection.protos.ConvolutionalBoxPredictorH\x00\x12P\n\x17mask_rcnn_box_predictor\x18\x02 \x01(\x0b\x32-.object_detection.protos.MaskRCNNBoxPredictorH\x00\x12G\n\x12rfcn_box_predictor\x18\x03 \x01(\x0b\x32).object_detection.protos.RfcnBoxPredictorH\x00\x42\x15\n\x13\x62ox_predictor_oneof\"\xf2\x02\n\x19\x43onvolutionalBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x14\n\tmin_depth\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\tmax_depth\x18\x03 \x01(\x05:\x01\x30\x12&\n\x1bnum_layers_before_predictor\x18\x04 \x01(\x05:\x01\x30\x12\x19\n\x0buse_dropout\x18\x05 \x01(\x08:\x04true\x12%\n\x18\x64ropout_keep_probability\x18\x06 \x01(\x02:\x03\x30.8\x12\x16\n\x0bkernel_size\x18\x07 \x01(\x05:\x01\x31\x12\x18\n\rbox_code_size\x18\x08 \x01(\x05:\x01\x34\x12&\n\x17\x61pply_sigmoid_to_scores\x18\t \x01(\x08:\x05\x66\x61lse\x12%\n\x1a\x63lass_prediction_bias_init\x18\n \x01(\x02:\x01\x30\"\xe3\x02\n\x14MaskRCNNBoxPredictor\x12<\n\x0e\x66\x63_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0buse_dropout\x18\x02 \x01(\x08:\x05\x66\x61lse\x12%\n\x18\x64ropout_keep_probability\x18\x03 \x01(\x02:\x03\x30.5\x12\x18\n\rbox_code_size\x18\x04 \x01(\x05:\x01\x34\x12>\n\x10\x63onv_hyperparams\x18\x05 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12%\n\x16predict_instance_masks\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\'\n\x1amask_prediction_conv_depth\x18\x07 \x01(\x05:\x03\x32\x35\x36\x12 \n\x11predict_keypoints\x18\x08 \x01(\x08:\x05\x66\x61lse\"\xf9\x01\n\x10RfcnBoxPredictor\x12>\n\x10\x63onv_hyperparams\x18\x01 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\"\n\x17num_spatial_bins_height\x18\x02 \x01(\x05:\x01\x33\x12!\n\x16num_spatial_bins_width\x18\x03 \x01(\x05:\x01\x33\x12\x13\n\x05\x64\x65pth\x18\x04 \x01(\x05:\x04\x31\x30\x32\x34\x12\x18\n\rbox_code_size\x18\x05 \x01(\x05:\x01\x34\x12\x17\n\x0b\x63rop_height\x18\x06 \x01(\x05:\x02\x31\x32\x12\x16\n\ncrop_width\x18\x07 \x01(\x05:\x02\x31\x32')
- ,
- dependencies=[object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,])
-
-
-
-
-_BOXPREDICTOR = _descriptor.Descriptor(
- name='BoxPredictor',
- full_name='object_detection.protos.BoxPredictor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='convolutional_box_predictor', full_name='object_detection.protos.BoxPredictor.convolutional_box_predictor', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='mask_rcnn_box_predictor', full_name='object_detection.protos.BoxPredictor.mask_rcnn_box_predictor', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='rfcn_box_predictor', full_name='object_detection.protos.BoxPredictor.rfcn_box_predictor', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='box_predictor_oneof', full_name='object_detection.protos.BoxPredictor.box_predictor_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=116,
- serialized_end=399,
-)
-
-
-_CONVOLUTIONALBOXPREDICTOR = _descriptor.Descriptor(
- name='ConvolutionalBoxPredictor',
- full_name='object_detection.protos.ConvolutionalBoxPredictor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='conv_hyperparams', full_name='object_detection.protos.ConvolutionalBoxPredictor.conv_hyperparams', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.min_depth', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_depth', full_name='object_detection.protos.ConvolutionalBoxPredictor.max_depth', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_layers_before_predictor', full_name='object_detection.protos.ConvolutionalBoxPredictor.num_layers_before_predictor', index=3,
- number=4, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='use_dropout', full_name='object_detection.protos.ConvolutionalBoxPredictor.use_dropout', index=4,
- number=5, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='dropout_keep_probability', full_name='object_detection.protos.ConvolutionalBoxPredictor.dropout_keep_probability', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.8),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='kernel_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.kernel_size', index=6,
- number=7, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='box_code_size', full_name='object_detection.protos.ConvolutionalBoxPredictor.box_code_size', index=7,
- number=8, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=4,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='apply_sigmoid_to_scores', full_name='object_detection.protos.ConvolutionalBoxPredictor.apply_sigmoid_to_scores', index=8,
- number=9, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='class_prediction_bias_init', full_name='object_detection.protos.ConvolutionalBoxPredictor.class_prediction_bias_init', index=9,
- number=10, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=402,
- serialized_end=772,
-)
-
-
-_MASKRCNNBOXPREDICTOR = _descriptor.Descriptor(
- name='MaskRCNNBoxPredictor',
- full_name='object_detection.protos.MaskRCNNBoxPredictor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='fc_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.fc_hyperparams', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='use_dropout', full_name='object_detection.protos.MaskRCNNBoxPredictor.use_dropout', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='dropout_keep_probability', full_name='object_detection.protos.MaskRCNNBoxPredictor.dropout_keep_probability', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='box_code_size', full_name='object_detection.protos.MaskRCNNBoxPredictor.box_code_size', index=3,
- number=4, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=4,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='conv_hyperparams', full_name='object_detection.protos.MaskRCNNBoxPredictor.conv_hyperparams', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='predict_instance_masks', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_instance_masks', index=5,
- number=6, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='mask_prediction_conv_depth', full_name='object_detection.protos.MaskRCNNBoxPredictor.mask_prediction_conv_depth', index=6,
- number=7, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=256,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='predict_keypoints', full_name='object_detection.protos.MaskRCNNBoxPredictor.predict_keypoints', index=7,
- number=8, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=775,
- serialized_end=1130,
-)
-
-
-_RFCNBOXPREDICTOR = _descriptor.Descriptor(
- name='RfcnBoxPredictor',
- full_name='object_detection.protos.RfcnBoxPredictor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='conv_hyperparams', full_name='object_detection.protos.RfcnBoxPredictor.conv_hyperparams', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_spatial_bins_height', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_height', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=3,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_spatial_bins_width', full_name='object_detection.protos.RfcnBoxPredictor.num_spatial_bins_width', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=3,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='depth', full_name='object_detection.protos.RfcnBoxPredictor.depth', index=3,
- number=4, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1024,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='box_code_size', full_name='object_detection.protos.RfcnBoxPredictor.box_code_size', index=4,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=4,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='crop_height', full_name='object_detection.protos.RfcnBoxPredictor.crop_height', index=5,
- number=6, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=12,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='crop_width', full_name='object_detection.protos.RfcnBoxPredictor.crop_width', index=6,
- number=7, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=12,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1133,
- serialized_end=1382,
-)
-
-_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].message_type = _CONVOLUTIONALBOXPREDICTOR
-_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].message_type = _MASKRCNNBOXPREDICTOR
-_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].message_type = _RFCNBOXPREDICTOR
-_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append(
- _BOXPREDICTOR.fields_by_name['convolutional_box_predictor'])
-_BOXPREDICTOR.fields_by_name['convolutional_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof']
-_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append(
- _BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'])
-_BOXPREDICTOR.fields_by_name['mask_rcnn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof']
-_BOXPREDICTOR.oneofs_by_name['box_predictor_oneof'].fields.append(
- _BOXPREDICTOR.fields_by_name['rfcn_box_predictor'])
-_BOXPREDICTOR.fields_by_name['rfcn_box_predictor'].containing_oneof = _BOXPREDICTOR.oneofs_by_name['box_predictor_oneof']
-_CONVOLUTIONALBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-_MASKRCNNBOXPREDICTOR.fields_by_name['fc_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-_MASKRCNNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-_RFCNBOXPREDICTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-DESCRIPTOR.message_types_by_name['BoxPredictor'] = _BOXPREDICTOR
-DESCRIPTOR.message_types_by_name['ConvolutionalBoxPredictor'] = _CONVOLUTIONALBOXPREDICTOR
-DESCRIPTOR.message_types_by_name['MaskRCNNBoxPredictor'] = _MASKRCNNBOXPREDICTOR
-DESCRIPTOR.message_types_by_name['RfcnBoxPredictor'] = _RFCNBOXPREDICTOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-BoxPredictor = _reflection.GeneratedProtocolMessageType('BoxPredictor', (_message.Message,), dict(
- DESCRIPTOR = _BOXPREDICTOR,
- __module__ = 'object_detection.protos.box_predictor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BoxPredictor)
- ))
-_sym_db.RegisterMessage(BoxPredictor)
-
-ConvolutionalBoxPredictor = _reflection.GeneratedProtocolMessageType('ConvolutionalBoxPredictor', (_message.Message,), dict(
- DESCRIPTOR = _CONVOLUTIONALBOXPREDICTOR,
- __module__ = 'object_detection.protos.box_predictor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ConvolutionalBoxPredictor)
- ))
-_sym_db.RegisterMessage(ConvolutionalBoxPredictor)
-
-MaskRCNNBoxPredictor = _reflection.GeneratedProtocolMessageType('MaskRCNNBoxPredictor', (_message.Message,), dict(
- DESCRIPTOR = _MASKRCNNBOXPREDICTOR,
- __module__ = 'object_detection.protos.box_predictor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.MaskRCNNBoxPredictor)
- ))
-_sym_db.RegisterMessage(MaskRCNNBoxPredictor)
-
-RfcnBoxPredictor = _reflection.GeneratedProtocolMessageType('RfcnBoxPredictor', (_message.Message,), dict(
- DESCRIPTOR = _RFCNBOXPREDICTOR,
- __module__ = 'object_detection.protos.box_predictor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RfcnBoxPredictor)
- ))
-_sym_db.RegisterMessage(RfcnBoxPredictor)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/eval.proto b/object_detection/protos/eval.proto
deleted file mode 100644
index c5a30ec6..00000000
--- a/object_detection/protos/eval.proto
+++ /dev/null
@@ -1,47 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Message for configuring DetectionModel evaluation jobs (eval.py).
-message EvalConfig {
- // Number of visualization images to generate.
- optional uint32 num_visualizations = 1 [default=10];
-
- // Number of examples to process of evaluation.
- optional uint32 num_examples = 2 [default=5000];
-
- // How often to run evaluation.
- optional uint32 eval_interval_secs = 3 [default=300];
-
- // Maximum number of times to run evaluation. If set to 0, will run forever.
- optional uint32 max_evals = 4 [default=0];
-
- // Whether the TensorFlow graph used for evaluation should be saved to disk.
- optional bool save_graph = 5 [default=false];
-
- // Path to directory to store visualizations in. If empty, visualization
- // images are not exported (only shown on Tensorboard).
- optional string visualization_export_dir = 6 [default=""];
-
- // BNS name of the TensorFlow master.
- optional string eval_master = 7 [default=""];
-
- // Type of metrics to use for evaluation. Currently supports only Pascal VOC
- // detection metrics.
- optional string metrics_set = 8 [default="pascal_voc_metrics"];
-
- // Path to export detections to COCO compatible JSON format.
- optional string export_path = 9 [default=''];
-
- // Option to not read groundtruth labels and only export detections to
- // COCO-compatible JSON file.
- optional bool ignore_groundtruth = 10 [default=false];
-
- // Use exponential moving averages of variables for evaluation.
- optional bool use_moving_averages = 11 [default=false];
-
- // Whether to evaluate instance masks.
- // Note that since there is no evaluation code currently for instance
- // segmenation this option is unused.
- optional bool eval_instance_masks = 12 [default=false];
-}
diff --git a/object_detection/protos/eval_pb2.py b/object_detection/protos/eval_pb2.py
deleted file mode 100644
index 9c33244a..00000000
--- a/object_detection/protos/eval_pb2.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/eval.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/eval.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n\"object_detection/protos/eval.proto\x12\x17object_detection.protos\"\x80\x03\n\nEvalConfig\x12\x1e\n\x12num_visualizations\x18\x01 \x01(\r:\x02\x31\x30\x12\x1a\n\x0cnum_examples\x18\x02 \x01(\r:\x04\x35\x30\x30\x30\x12\x1f\n\x12\x65val_interval_secs\x18\x03 \x01(\r:\x03\x33\x30\x30\x12\x14\n\tmax_evals\x18\x04 \x01(\r:\x01\x30\x12\x19\n\nsave_graph\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\"\n\x18visualization_export_dir\x18\x06 \x01(\t:\x00\x12\x15\n\x0b\x65val_master\x18\x07 \x01(\t:\x00\x12\'\n\x0bmetrics_set\x18\x08 \x01(\t:\x12pascal_voc_metrics\x12\x15\n\x0b\x65xport_path\x18\t \x01(\t:\x00\x12!\n\x12ignore_groundtruth\x18\n \x01(\x08:\x05\x66\x61lse\x12\"\n\x13use_moving_averages\x18\x0b \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x65val_instance_masks\x18\x0c \x01(\x08:\x05\x66\x61lse')
-)
-
-
-
-
-_EVALCONFIG = _descriptor.Descriptor(
- name='EvalConfig',
- full_name='object_detection.protos.EvalConfig',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='num_visualizations', full_name='object_detection.protos.EvalConfig.num_visualizations', index=0,
- number=1, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=10,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_examples', full_name='object_detection.protos.EvalConfig.num_examples', index=1,
- number=2, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=5000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='eval_interval_secs', full_name='object_detection.protos.EvalConfig.eval_interval_secs', index=2,
- number=3, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=300,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_evals', full_name='object_detection.protos.EvalConfig.max_evals', index=3,
- number=4, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='save_graph', full_name='object_detection.protos.EvalConfig.save_graph', index=4,
- number=5, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='visualization_export_dir', full_name='object_detection.protos.EvalConfig.visualization_export_dir', index=5,
- number=6, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='eval_master', full_name='object_detection.protos.EvalConfig.eval_master', index=6,
- number=7, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='metrics_set', full_name='object_detection.protos.EvalConfig.metrics_set', index=7,
- number=8, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("pascal_voc_metrics").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='export_path', full_name='object_detection.protos.EvalConfig.export_path', index=8,
- number=9, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ignore_groundtruth', full_name='object_detection.protos.EvalConfig.ignore_groundtruth', index=9,
- number=10, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='use_moving_averages', full_name='object_detection.protos.EvalConfig.use_moving_averages', index=10,
- number=11, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='eval_instance_masks', full_name='object_detection.protos.EvalConfig.eval_instance_masks', index=11,
- number=12, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=64,
- serialized_end=448,
-)
-
-DESCRIPTOR.message_types_by_name['EvalConfig'] = _EVALCONFIG
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-EvalConfig = _reflection.GeneratedProtocolMessageType('EvalConfig', (_message.Message,), dict(
- DESCRIPTOR = _EVALCONFIG,
- __module__ = 'object_detection.protos.eval_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.EvalConfig)
- ))
-_sym_db.RegisterMessage(EvalConfig)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/faster_rcnn.proto b/object_detection/protos/faster_rcnn.proto
deleted file mode 100644
index 20c859e2..00000000
--- a/object_detection/protos/faster_rcnn.proto
+++ /dev/null
@@ -1,149 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/anchor_generator.proto";
-import "object_detection/protos/box_predictor.proto";
-import "object_detection/protos/hyperparams.proto";
-import "object_detection/protos/image_resizer.proto";
-import "object_detection/protos/losses.proto";
-import "object_detection/protos/post_processing.proto";
-
-// Configuration for Faster R-CNN models.
-// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
-//
-// Naming conventions:
-// Faster R-CNN models have two stages: a first stage region proposal network
-// (or RPN) and a second stage box classifier. We thus use the prefixes
-// `first_stage_` and `second_stage_` to indicate the stage to which each
-// parameter pertains when relevant.
-message FasterRcnn {
-
- // Whether to construct only the Region Proposal Network (RPN).
- optional bool first_stage_only = 1 [default=false];
-
- // Number of classes to predict.
- optional int32 num_classes = 3;
-
- // Image resizer for preprocessing the input image.
- optional ImageResizer image_resizer = 4;
-
- // Feature extractor config.
- optional FasterRcnnFeatureExtractor feature_extractor = 5;
-
-
- // (First stage) region proposal network (RPN) parameters.
-
- // Anchor generator to compute RPN anchors.
- optional AnchorGenerator first_stage_anchor_generator = 6;
-
- // Atrous rate for the convolution op applied to the
- // `first_stage_features_to_crop` tensor to obtain box predictions.
- optional int32 first_stage_atrous_rate = 7 [default=1];
-
- // Hyperparameters for the convolutional RPN box predictor.
- optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
-
- // Kernel size to use for the convolution op just prior to RPN box
- // predictions.
- optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
-
- // Output depth for the convolution op just prior to RPN box predictions.
- optional int32 first_stage_box_predictor_depth = 10 [default=512];
-
- // The batch size to use for computing the first stage objectness and
- // location losses.
- optional int32 first_stage_minibatch_size = 11 [default=256];
-
- // Fraction of positive examples per image for the RPN.
- optional float first_stage_positive_balance_fraction = 12 [default=0.5];
-
- // Non max suppression score threshold applied to first stage RPN proposals.
- optional float first_stage_nms_score_threshold = 13 [default=0.0];
-
- // Non max suppression IOU threshold applied to first stage RPN proposals.
- optional float first_stage_nms_iou_threshold = 14 [default=0.7];
-
- // Maximum number of RPN proposals retained after first stage postprocessing.
- optional int32 first_stage_max_proposals = 15 [default=300];
-
- // First stage RPN localization loss weight.
- optional float first_stage_localization_loss_weight = 16 [default=1.0];
-
- // First stage RPN objectness loss weight.
- optional float first_stage_objectness_loss_weight = 17 [default=1.0];
-
-
- // Per-region cropping parameters.
- // Note that if a R-FCN model is constructed the per region cropping
- // parameters below are ignored.
-
- // Output size (width and height are set to be the same) of the initial
- // bilinear interpolation based cropping during ROI pooling.
- optional int32 initial_crop_size = 18;
-
- // Kernel size of the max pool op on the cropped feature map during
- // ROI pooling.
- optional int32 maxpool_kernel_size = 19;
-
- // Stride of the max pool op on the cropped feature map during ROI pooling.
- optional int32 maxpool_stride = 20;
-
-
- // (Second stage) box classifier parameters
-
- // Hyperparameters for the second stage box predictor. If box predictor type
- // is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
- // Faster R-CNN model is constructed.
- optional BoxPredictor second_stage_box_predictor = 21;
-
- // The batch size per image used for computing the classification and refined
- // location loss of the box classifier.
- // Note that this field is ignored if `hard_example_miner` is configured.
- optional int32 second_stage_batch_size = 22 [default=64];
-
- // Fraction of positive examples to use per image for the box classifier.
- optional float second_stage_balance_fraction = 23 [default=0.25];
-
- // Post processing to apply on the second stage box classifier predictions.
- // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
- // is taken from this `second_stage_post_processing` proto.
- optional PostProcessing second_stage_post_processing = 24;
-
- // Second stage refined localization loss weight.
- optional float second_stage_localization_loss_weight = 25 [default=1.0];
-
- // Second stage classification loss weight
- optional float second_stage_classification_loss_weight = 26 [default=1.0];
-
- // Second stage instance mask loss weight. Note that this is only applicable
- // when `MaskRCNNBoxPredictor` is selected for second stage and configured to
- // predict instance masks.
- optional float second_stage_mask_prediction_loss_weight = 27 [default=1.0];
-
- // If not left to default, applies hard example mining only to classification
- // and localization loss..
- optional HardExampleMiner hard_example_miner = 28;
-
- // Loss for second stage box classifers, supports Softmax and Sigmoid.
- // Note that score converter must be consistent with loss type.
- // When there are multiple labels assigned to the same boxes, recommend
- // to use sigmoid loss and enable merge_multiple_label_boxes.
- // If not specified, Softmax loss is used as default.
- optional ClassificationLoss second_stage_classification_loss = 29;
-}
-
-
-message FasterRcnnFeatureExtractor {
- // Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
- // See builders/model_builder.py for expected types).
- optional string type = 1;
-
- // Output stride of extracted RPN feature map.
- optional int32 first_stage_features_stride = 2 [default=16];
-
- // Whether to update batch norm parameters during training or not.
- // When training with a relative large batch size (e.g. 8), it could be
- // desirable to enable batch norm update.
- optional bool batch_norm_trainable = 3 [default=false];
-}
diff --git a/object_detection/protos/faster_rcnn_box_coder.proto b/object_detection/protos/faster_rcnn_box_coder.proto
deleted file mode 100644
index 512a20a1..00000000
--- a/object_detection/protos/faster_rcnn_box_coder.proto
+++ /dev/null
@@ -1,17 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for FasterRCNNBoxCoder. See
-// box_coders/faster_rcnn_box_coder.py for details.
-message FasterRcnnBoxCoder {
- // Scale factor for anchor encoded box center.
- optional float y_scale = 1 [default = 10.0];
- optional float x_scale = 2 [default = 10.0];
-
- // Scale factor for anchor encoded box height.
- optional float height_scale = 3 [default = 5.0];
-
- // Scale factor for anchor encoded box width.
- optional float width_scale = 4 [default = 5.0];
-}
diff --git a/object_detection/protos/faster_rcnn_box_coder_pb2.py b/object_detection/protos/faster_rcnn_box_coder_pb2.py
deleted file mode 100644
index 6faee77d..00000000
--- a/object_detection/protos/faster_rcnn_box_coder_pb2.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/faster_rcnn_box_coder.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/faster_rcnn_box_coder.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n3object_detection/protos/faster_rcnn_box_coder.proto\x12\x17object_detection.protos\"o\n\x12\x46\x61sterRcnnBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0cheight_scale\x18\x03 \x01(\x02:\x01\x35\x12\x16\n\x0bwidth_scale\x18\x04 \x01(\x02:\x01\x35')
-)
-
-
-
-
-_FASTERRCNNBOXCODER = _descriptor.Descriptor(
- name='FasterRcnnBoxCoder',
- full_name='object_detection.protos.FasterRcnnBoxCoder',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='y_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.y_scale', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='x_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.x_scale', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.height_scale', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_scale', full_name='object_detection.protos.FasterRcnnBoxCoder.width_scale', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=80,
- serialized_end=191,
-)
-
-DESCRIPTOR.message_types_by_name['FasterRcnnBoxCoder'] = _FASTERRCNNBOXCODER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-FasterRcnnBoxCoder = _reflection.GeneratedProtocolMessageType('FasterRcnnBoxCoder', (_message.Message,), dict(
- DESCRIPTOR = _FASTERRCNNBOXCODER,
- __module__ = 'object_detection.protos.faster_rcnn_box_coder_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnnBoxCoder)
- ))
-_sym_db.RegisterMessage(FasterRcnnBoxCoder)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/faster_rcnn_pb2.py b/object_detection/protos/faster_rcnn_pb2.py
deleted file mode 100644
index 2e98f9f9..00000000
--- a/object_detection/protos/faster_rcnn_pb2.py
+++ /dev/null
@@ -1,326 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/faster_rcnn.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import anchor_generator_pb2 as object__detection_dot_protos_dot_anchor__generator__pb2
-from object_detection.protos import box_predictor_pb2 as object__detection_dot_protos_dot_box__predictor__pb2
-from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2
-from object_detection.protos import image_resizer_pb2 as object__detection_dot_protos_dot_image__resizer__pb2
-from object_detection.protos import losses_pb2 as object__detection_dot_protos_dot_losses__pb2
-from object_detection.protos import post_processing_pb2 as object__detection_dot_protos_dot_post__processing__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/faster_rcnn.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n)object_detection/protos/faster_rcnn.proto\x12\x17object_detection.protos\x1a.object_detection/protos/anchor_generator.proto\x1a+object_detection/protos/box_predictor.proto\x1a)object_detection/protos/hyperparams.proto\x1a+object_detection/protos/image_resizer.proto\x1a$object_detection/protos/losses.proto\x1a-object_detection/protos/post_processing.proto\"\xb0\x0b\n\nFasterRcnn\x12\x1f\n\x10\x66irst_stage_only\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x0bnum_classes\x18\x03 \x01(\x05\x12<\n\rimage_resizer\x18\x04 \x01(\x0b\x32%.object_detection.protos.ImageResizer\x12N\n\x11\x66\x65\x61ture_extractor\x18\x05 \x01(\x0b\x32\x33.object_detection.protos.FasterRcnnFeatureExtractor\x12N\n\x1c\x66irst_stage_anchor_generator\x18\x06 \x01(\x0b\x32(.object_detection.protos.AnchorGenerator\x12\"\n\x17\x66irst_stage_atrous_rate\x18\x07 \x01(\x05:\x01\x31\x12X\n*first_stage_box_predictor_conv_hyperparams\x18\x08 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x30\n%first_stage_box_predictor_kernel_size\x18\t \x01(\x05:\x01\x33\x12,\n\x1f\x66irst_stage_box_predictor_depth\x18\n \x01(\x05:\x03\x35\x31\x32\x12\'\n\x1a\x66irst_stage_minibatch_size\x18\x0b \x01(\x05:\x03\x32\x35\x36\x12\x32\n%first_stage_positive_balance_fraction\x18\x0c \x01(\x02:\x03\x30.5\x12*\n\x1f\x66irst_stage_nms_score_threshold\x18\r \x01(\x02:\x01\x30\x12*\n\x1d\x66irst_stage_nms_iou_threshold\x18\x0e \x01(\x02:\x03\x30.7\x12&\n\x19\x66irst_stage_max_proposals\x18\x0f \x01(\x05:\x03\x33\x30\x30\x12/\n$first_stage_localization_loss_weight\x18\x10 \x01(\x02:\x01\x31\x12-\n\"first_stage_objectness_loss_weight\x18\x11 \x01(\x02:\x01\x31\x12\x19\n\x11initial_crop_size\x18\x12 \x01(\x05\x12\x1b\n\x13maxpool_kernel_size\x18\x13 \x01(\x05\x12\x16\n\x0emaxpool_stride\x18\x14 \x01(\x05\x12I\n\x1asecond_stage_box_predictor\x18\x15 \x01(\x0b\x32%.object_detection.protos.BoxPredictor\x12#\n\x17second_stage_batch_size\x18\x16 \x01(\x05:\x02\x36\x34\x12+\n\x1dsecond_stage_balance_fraction\x18\x17 \x01(\x02:\x04\x30.25\x12M\n\x1csecond_stage_post_processing\x18\x18 \x01(\x0b\x32\'.object_detection.protos.PostProcessing\x12\x30\n%second_stage_localization_loss_weight\x18\x19 \x01(\x02:\x01\x31\x12\x32\n\'second_stage_classification_loss_weight\x18\x1a \x01(\x02:\x01\x31\x12\x33\n(second_stage_mask_prediction_loss_weight\x18\x1b \x01(\x02:\x01\x31\x12\x45\n\x12hard_example_miner\x18\x1c \x01(\x0b\x32).object_detection.protos.HardExampleMiner\x12U\n second_stage_classification_loss\x18\x1d \x01(\x0b\x32+.object_detection.protos.ClassificationLoss\"x\n\x1a\x46\x61sterRcnnFeatureExtractor\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\'\n\x1b\x66irst_stage_features_stride\x18\x02 \x01(\x05:\x02\x31\x36\x12#\n\x14\x62\x61tch_norm_trainable\x18\x03 \x01(\x08:\x05\x66\x61lse')
- ,
- dependencies=[object__detection_dot_protos_dot_anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__predictor__pb2.DESCRIPTOR,object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,object__detection_dot_protos_dot_image__resizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_losses__pb2.DESCRIPTOR,object__detection_dot_protos_dot_post__processing__pb2.DESCRIPTOR,])
-
-
-
-
-_FASTERRCNN = _descriptor.Descriptor(
- name='FasterRcnn',
- full_name='object_detection.protos.FasterRcnn',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='first_stage_only', full_name='object_detection.protos.FasterRcnn.first_stage_only', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_classes', full_name='object_detection.protos.FasterRcnn.num_classes', index=1,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='image_resizer', full_name='object_detection.protos.FasterRcnn.image_resizer', index=2,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='feature_extractor', full_name='object_detection.protos.FasterRcnn.feature_extractor', index=3,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_anchor_generator', full_name='object_detection.protos.FasterRcnn.first_stage_anchor_generator', index=4,
- number=6, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_atrous_rate', full_name='object_detection.protos.FasterRcnn.first_stage_atrous_rate', index=5,
- number=7, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_box_predictor_conv_hyperparams', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_conv_hyperparams', index=6,
- number=8, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_box_predictor_kernel_size', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_kernel_size', index=7,
- number=9, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=3,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_box_predictor_depth', full_name='object_detection.protos.FasterRcnn.first_stage_box_predictor_depth', index=8,
- number=10, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=512,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_minibatch_size', full_name='object_detection.protos.FasterRcnn.first_stage_minibatch_size', index=9,
- number=11, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=256,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_positive_balance_fraction', full_name='object_detection.protos.FasterRcnn.first_stage_positive_balance_fraction', index=10,
- number=12, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_nms_score_threshold', full_name='object_detection.protos.FasterRcnn.first_stage_nms_score_threshold', index=11,
- number=13, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_nms_iou_threshold', full_name='object_detection.protos.FasterRcnn.first_stage_nms_iou_threshold', index=12,
- number=14, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.7),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_max_proposals', full_name='object_detection.protos.FasterRcnn.first_stage_max_proposals', index=13,
- number=15, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=300,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_localization_loss_weight', full_name='object_detection.protos.FasterRcnn.first_stage_localization_loss_weight', index=14,
- number=16, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_objectness_loss_weight', full_name='object_detection.protos.FasterRcnn.first_stage_objectness_loss_weight', index=15,
- number=17, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='initial_crop_size', full_name='object_detection.protos.FasterRcnn.initial_crop_size', index=16,
- number=18, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='maxpool_kernel_size', full_name='object_detection.protos.FasterRcnn.maxpool_kernel_size', index=17,
- number=19, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='maxpool_stride', full_name='object_detection.protos.FasterRcnn.maxpool_stride', index=18,
- number=20, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_box_predictor', full_name='object_detection.protos.FasterRcnn.second_stage_box_predictor', index=19,
- number=21, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_batch_size', full_name='object_detection.protos.FasterRcnn.second_stage_batch_size', index=20,
- number=22, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=64,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_balance_fraction', full_name='object_detection.protos.FasterRcnn.second_stage_balance_fraction', index=21,
- number=23, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.25),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_post_processing', full_name='object_detection.protos.FasterRcnn.second_stage_post_processing', index=22,
- number=24, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_localization_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_localization_loss_weight', index=23,
- number=25, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_classification_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_classification_loss_weight', index=24,
- number=26, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_mask_prediction_loss_weight', full_name='object_detection.protos.FasterRcnn.second_stage_mask_prediction_loss_weight', index=25,
- number=27, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='hard_example_miner', full_name='object_detection.protos.FasterRcnn.hard_example_miner', index=26,
- number=28, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='second_stage_classification_loss', full_name='object_detection.protos.FasterRcnn.second_stage_classification_loss', index=27,
- number=29, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=337,
- serialized_end=1793,
-)
-
-
-_FASTERRCNNFEATUREEXTRACTOR = _descriptor.Descriptor(
- name='FasterRcnnFeatureExtractor',
- full_name='object_detection.protos.FasterRcnnFeatureExtractor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='type', full_name='object_detection.protos.FasterRcnnFeatureExtractor.type', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='first_stage_features_stride', full_name='object_detection.protos.FasterRcnnFeatureExtractor.first_stage_features_stride', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=16,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='batch_norm_trainable', full_name='object_detection.protos.FasterRcnnFeatureExtractor.batch_norm_trainable', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1795,
- serialized_end=1915,
-)
-
-_FASTERRCNN.fields_by_name['image_resizer'].message_type = object__detection_dot_protos_dot_image__resizer__pb2._IMAGERESIZER
-_FASTERRCNN.fields_by_name['feature_extractor'].message_type = _FASTERRCNNFEATUREEXTRACTOR
-_FASTERRCNN.fields_by_name['first_stage_anchor_generator'].message_type = object__detection_dot_protos_dot_anchor__generator__pb2._ANCHORGENERATOR
-_FASTERRCNN.fields_by_name['first_stage_box_predictor_conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-_FASTERRCNN.fields_by_name['second_stage_box_predictor'].message_type = object__detection_dot_protos_dot_box__predictor__pb2._BOXPREDICTOR
-_FASTERRCNN.fields_by_name['second_stage_post_processing'].message_type = object__detection_dot_protos_dot_post__processing__pb2._POSTPROCESSING
-_FASTERRCNN.fields_by_name['hard_example_miner'].message_type = object__detection_dot_protos_dot_losses__pb2._HARDEXAMPLEMINER
-_FASTERRCNN.fields_by_name['second_stage_classification_loss'].message_type = object__detection_dot_protos_dot_losses__pb2._CLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['FasterRcnn'] = _FASTERRCNN
-DESCRIPTOR.message_types_by_name['FasterRcnnFeatureExtractor'] = _FASTERRCNNFEATUREEXTRACTOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-FasterRcnn = _reflection.GeneratedProtocolMessageType('FasterRcnn', (_message.Message,), dict(
- DESCRIPTOR = _FASTERRCNN,
- __module__ = 'object_detection.protos.faster_rcnn_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnn)
- ))
-_sym_db.RegisterMessage(FasterRcnn)
-
-FasterRcnnFeatureExtractor = _reflection.GeneratedProtocolMessageType('FasterRcnnFeatureExtractor', (_message.Message,), dict(
- DESCRIPTOR = _FASTERRCNNFEATUREEXTRACTOR,
- __module__ = 'object_detection.protos.faster_rcnn_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.FasterRcnnFeatureExtractor)
- ))
-_sym_db.RegisterMessage(FasterRcnnFeatureExtractor)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/grid_anchor_generator.proto b/object_detection/protos/grid_anchor_generator.proto
deleted file mode 100644
index 85168f8f..00000000
--- a/object_detection/protos/grid_anchor_generator.proto
+++ /dev/null
@@ -1,34 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for GridAnchorGenerator. See
-// anchor_generators/grid_anchor_generator.py for details.
-message GridAnchorGenerator {
- // Anchor height in pixels.
- optional int32 height = 1 [default = 256];
-
- // Anchor width in pixels.
- optional int32 width = 2 [default = 256];
-
- // Anchor stride in height dimension in pixels.
- optional int32 height_stride = 3 [default = 16];
-
- // Anchor stride in width dimension in pixels.
- optional int32 width_stride = 4 [default = 16];
-
- // Anchor height offset in pixels.
- optional int32 height_offset = 5 [default = 0];
-
- // Anchor width offset in pixels.
- optional int32 width_offset = 6 [default = 0];
-
- // At any given location, len(scales) * len(aspect_ratios) anchors are
- // generated with all possible combinations of scales and aspect ratios.
-
- // List of scales for the anchors.
- repeated float scales = 7;
-
- // List of aspect ratios for the anchors.
- repeated float aspect_ratios = 8;
-}
diff --git a/object_detection/protos/grid_anchor_generator_pb2.py b/object_detection/protos/grid_anchor_generator_pb2.py
deleted file mode 100644
index 83b6011c..00000000
--- a/object_detection/protos/grid_anchor_generator_pb2.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/grid_anchor_generator.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/grid_anchor_generator.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n3object_detection/protos/grid_anchor_generator.proto\x12\x17object_detection.protos\"\xcd\x01\n\x13GridAnchorGenerator\x12\x13\n\x06height\x18\x01 \x01(\x05:\x03\x32\x35\x36\x12\x12\n\x05width\x18\x02 \x01(\x05:\x03\x32\x35\x36\x12\x19\n\rheight_stride\x18\x03 \x01(\x05:\x02\x31\x36\x12\x18\n\x0cwidth_stride\x18\x04 \x01(\x05:\x02\x31\x36\x12\x18\n\rheight_offset\x18\x05 \x01(\x05:\x01\x30\x12\x17\n\x0cwidth_offset\x18\x06 \x01(\x05:\x01\x30\x12\x0e\n\x06scales\x18\x07 \x03(\x02\x12\x15\n\raspect_ratios\x18\x08 \x03(\x02')
-)
-
-
-
-
-_GRIDANCHORGENERATOR = _descriptor.Descriptor(
- name='GridAnchorGenerator',
- full_name='object_detection.protos.GridAnchorGenerator',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='height', full_name='object_detection.protos.GridAnchorGenerator.height', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=256,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width', full_name='object_detection.protos.GridAnchorGenerator.width', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=256,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_stride', full_name='object_detection.protos.GridAnchorGenerator.height_stride', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=16,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_stride', full_name='object_detection.protos.GridAnchorGenerator.width_stride', index=3,
- number=4, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=16,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_offset', full_name='object_detection.protos.GridAnchorGenerator.height_offset', index=4,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_offset', full_name='object_detection.protos.GridAnchorGenerator.width_offset', index=5,
- number=6, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='scales', full_name='object_detection.protos.GridAnchorGenerator.scales', index=6,
- number=7, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='aspect_ratios', full_name='object_detection.protos.GridAnchorGenerator.aspect_ratios', index=7,
- number=8, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=81,
- serialized_end=286,
-)
-
-DESCRIPTOR.message_types_by_name['GridAnchorGenerator'] = _GRIDANCHORGENERATOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-GridAnchorGenerator = _reflection.GeneratedProtocolMessageType('GridAnchorGenerator', (_message.Message,), dict(
- DESCRIPTOR = _GRIDANCHORGENERATOR,
- __module__ = 'object_detection.protos.grid_anchor_generator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.GridAnchorGenerator)
- ))
-_sym_db.RegisterMessage(GridAnchorGenerator)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/hyperparams.proto b/object_detection/protos/hyperparams.proto
deleted file mode 100644
index b8b9972e..00000000
--- a/object_detection/protos/hyperparams.proto
+++ /dev/null
@@ -1,103 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for the convolution op hyperparameters to use in the
-// object detection pipeline.
-message Hyperparams {
-
- // Operations affected by hyperparameters.
- enum Op {
- // Convolution, Separable Convolution, Convolution transpose.
- CONV = 1;
-
- // Fully connected
- FC = 2;
- }
- optional Op op = 1 [default = CONV];
-
- // Regularizer for the weights of the convolution op.
- optional Regularizer regularizer = 2;
-
- // Initializer for the weights of the convolution op.
- optional Initializer initializer = 3;
-
- // Type of activation to apply after convolution.
- enum Activation {
- // Use None (no activation)
- NONE = 0;
-
- // Use tf.nn.relu
- RELU = 1;
-
- // Use tf.nn.relu6
- RELU_6 = 2;
- }
- optional Activation activation = 4 [default = RELU];
-
- // BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is
- // not applied!
- optional BatchNorm batch_norm = 5;
-}
-
-// Proto with one-of field for regularizers.
-message Regularizer {
- oneof regularizer_oneof {
- L1Regularizer l1_regularizer = 1;
- L2Regularizer l2_regularizer = 2;
- }
-}
-
-// Configuration proto for L1 Regularizer.
-// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer
-message L1Regularizer {
- optional float weight = 1 [default = 1.0];
-}
-
-// Configuration proto for L2 Regularizer.
-// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer
-message L2Regularizer {
- optional float weight = 1 [default = 1.0];
-}
-
-// Proto with one-of field for initializers.
-message Initializer {
- oneof initializer_oneof {
- TruncatedNormalInitializer truncated_normal_initializer = 1;
- VarianceScalingInitializer variance_scaling_initializer = 2;
- }
-}
-
-// Configuration proto for truncated normal initializer. See
-// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
-message TruncatedNormalInitializer {
- optional float mean = 1 [default = 0.0];
- optional float stddev = 2 [default = 1.0];
-}
-
-// Configuration proto for variance scaling initializer. See
-// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/
-// variance_scaling_initializer
-message VarianceScalingInitializer {
- optional float factor = 1 [default = 2.0];
- optional bool uniform = 2 [default = false];
- enum Mode {
- FAN_IN = 0;
- FAN_OUT = 1;
- FAN_AVG = 2;
- }
- optional Mode mode = 3 [default = FAN_IN];
-}
-
-// Configuration proto for batch norm to apply after convolution op. See
-// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
-message BatchNorm {
- optional float decay = 1 [default = 0.999];
- optional bool center = 2 [default = true];
- optional bool scale = 3 [default = false];
- optional float epsilon = 4 [default = 0.001];
- // Whether to train the batch norm variables. If this is set to false during
- // training, the current value of the batch_norm variables are used for
- // forward pass but they are never updated.
- optional bool train = 5 [default = true];
-}
diff --git a/object_detection/protos/hyperparams_pb2.py b/object_detection/protos/hyperparams_pb2.py
deleted file mode 100644
index 9aaaa032..00000000
--- a/object_detection/protos/hyperparams_pb2.py
+++ /dev/null
@@ -1,541 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/hyperparams.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/hyperparams.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n)object_detection/protos/hyperparams.proto\x12\x17object_detection.protos\"\x87\x03\n\x0bHyperparams\x12\x39\n\x02op\x18\x01 \x01(\x0e\x32\'.object_detection.protos.Hyperparams.Op:\x04\x43ONV\x12\x39\n\x0bregularizer\x18\x02 \x01(\x0b\x32$.object_detection.protos.Regularizer\x12\x39\n\x0binitializer\x18\x03 \x01(\x0b\x32$.object_detection.protos.Initializer\x12I\n\nactivation\x18\x04 \x01(\x0e\x32/.object_detection.protos.Hyperparams.Activation:\x04RELU\x12\x36\n\nbatch_norm\x18\x05 \x01(\x0b\x32\".object_detection.protos.BatchNorm\"\x16\n\x02Op\x12\x08\n\x04\x43ONV\x10\x01\x12\x06\n\x02\x46\x43\x10\x02\",\n\nActivation\x12\x08\n\x04NONE\x10\x00\x12\x08\n\x04RELU\x10\x01\x12\n\n\x06RELU_6\x10\x02\"\xa6\x01\n\x0bRegularizer\x12@\n\x0el1_regularizer\x18\x01 \x01(\x0b\x32&.object_detection.protos.L1RegularizerH\x00\x12@\n\x0el2_regularizer\x18\x02 \x01(\x0b\x32&.object_detection.protos.L2RegularizerH\x00\x42\x13\n\x11regularizer_oneof\"\"\n\rL1Regularizer\x12\x11\n\x06weight\x18\x01 \x01(\x02:\x01\x31\"\"\n\rL2Regularizer\x12\x11\n\x06weight\x18\x01 \x01(\x02:\x01\x31\"\xdc\x01\n\x0bInitializer\x12[\n\x1ctruncated_normal_initializer\x18\x01 \x01(\x0b\x32\x33.object_detection.protos.TruncatedNormalInitializerH\x00\x12[\n\x1cvariance_scaling_initializer\x18\x02 \x01(\x0b\x32\x33.object_detection.protos.VarianceScalingInitializerH\x00\x42\x13\n\x11initializer_oneof\"@\n\x1aTruncatedNormalInitializer\x12\x0f\n\x04mean\x18\x01 \x01(\x02:\x01\x30\x12\x11\n\x06stddev\x18\x02 \x01(\x02:\x01\x31\"\xc5\x01\n\x1aVarianceScalingInitializer\x12\x11\n\x06\x66\x61\x63tor\x18\x01 \x01(\x02:\x01\x32\x12\x16\n\x07uniform\x18\x02 \x01(\x08:\x05\x66\x61lse\x12N\n\x04mode\x18\x03 \x01(\x0e\x32\x38.object_detection.protos.VarianceScalingInitializer.Mode:\x06\x46\x41N_IN\",\n\x04Mode\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"z\n\tBatchNorm\x12\x14\n\x05\x64\x65\x63\x61y\x18\x01 \x01(\x02:\x05\x30.999\x12\x14\n\x06\x63\x65nter\x18\x02 \x01(\x08:\x04true\x12\x14\n\x05scale\x18\x03 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07\x65psilon\x18\x04 \x01(\x02:\x05\x30.001\x12\x13\n\x05train\x18\x05 \x01(\x08:\x04true')
-)
-
-
-
-_HYPERPARAMS_OP = _descriptor.EnumDescriptor(
- name='Op',
- full_name='object_detection.protos.Hyperparams.Op',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='CONV', index=0, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='FC', index=1, number=2,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=394,
- serialized_end=416,
-)
-_sym_db.RegisterEnumDescriptor(_HYPERPARAMS_OP)
-
-_HYPERPARAMS_ACTIVATION = _descriptor.EnumDescriptor(
- name='Activation',
- full_name='object_detection.protos.Hyperparams.Activation',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='NONE', index=0, number=0,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='RELU', index=1, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='RELU_6', index=2, number=2,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=418,
- serialized_end=462,
-)
-_sym_db.RegisterEnumDescriptor(_HYPERPARAMS_ACTIVATION)
-
-_VARIANCESCALINGINITIALIZER_MODE = _descriptor.EnumDescriptor(
- name='Mode',
- full_name='object_detection.protos.VarianceScalingInitializer.Mode',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='FAN_IN', index=0, number=0,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='FAN_OUT', index=1, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='FAN_AVG', index=2, number=2,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=1148,
- serialized_end=1192,
-)
-_sym_db.RegisterEnumDescriptor(_VARIANCESCALINGINITIALIZER_MODE)
-
-
-_HYPERPARAMS = _descriptor.Descriptor(
- name='Hyperparams',
- full_name='object_detection.protos.Hyperparams',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='op', full_name='object_detection.protos.Hyperparams.op', index=0,
- number=1, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='regularizer', full_name='object_detection.protos.Hyperparams.regularizer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='initializer', full_name='object_detection.protos.Hyperparams.initializer', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='activation', full_name='object_detection.protos.Hyperparams.activation', index=3,
- number=4, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='batch_norm', full_name='object_detection.protos.Hyperparams.batch_norm', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- _HYPERPARAMS_OP,
- _HYPERPARAMS_ACTIVATION,
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=71,
- serialized_end=462,
-)
-
-
-_REGULARIZER = _descriptor.Descriptor(
- name='Regularizer',
- full_name='object_detection.protos.Regularizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='l1_regularizer', full_name='object_detection.protos.Regularizer.l1_regularizer', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='l2_regularizer', full_name='object_detection.protos.Regularizer.l2_regularizer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='regularizer_oneof', full_name='object_detection.protos.Regularizer.regularizer_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=465,
- serialized_end=631,
-)
-
-
-_L1REGULARIZER = _descriptor.Descriptor(
- name='L1Regularizer',
- full_name='object_detection.protos.L1Regularizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='weight', full_name='object_detection.protos.L1Regularizer.weight', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=633,
- serialized_end=667,
-)
-
-
-_L2REGULARIZER = _descriptor.Descriptor(
- name='L2Regularizer',
- full_name='object_detection.protos.L2Regularizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='weight', full_name='object_detection.protos.L2Regularizer.weight', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=669,
- serialized_end=703,
-)
-
-
-_INITIALIZER = _descriptor.Descriptor(
- name='Initializer',
- full_name='object_detection.protos.Initializer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='truncated_normal_initializer', full_name='object_detection.protos.Initializer.truncated_normal_initializer', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='variance_scaling_initializer', full_name='object_detection.protos.Initializer.variance_scaling_initializer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='initializer_oneof', full_name='object_detection.protos.Initializer.initializer_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=706,
- serialized_end=926,
-)
-
-
-_TRUNCATEDNORMALINITIALIZER = _descriptor.Descriptor(
- name='TruncatedNormalInitializer',
- full_name='object_detection.protos.TruncatedNormalInitializer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='mean', full_name='object_detection.protos.TruncatedNormalInitializer.mean', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='stddev', full_name='object_detection.protos.TruncatedNormalInitializer.stddev', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=928,
- serialized_end=992,
-)
-
-
-_VARIANCESCALINGINITIALIZER = _descriptor.Descriptor(
- name='VarianceScalingInitializer',
- full_name='object_detection.protos.VarianceScalingInitializer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='factor', full_name='object_detection.protos.VarianceScalingInitializer.factor', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(2),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='uniform', full_name='object_detection.protos.VarianceScalingInitializer.uniform', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='mode', full_name='object_detection.protos.VarianceScalingInitializer.mode', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- _VARIANCESCALINGINITIALIZER_MODE,
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=995,
- serialized_end=1192,
-)
-
-
-_BATCHNORM = _descriptor.Descriptor(
- name='BatchNorm',
- full_name='object_detection.protos.BatchNorm',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='decay', full_name='object_detection.protos.BatchNorm.decay', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.999),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='center', full_name='object_detection.protos.BatchNorm.center', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='scale', full_name='object_detection.protos.BatchNorm.scale', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='epsilon', full_name='object_detection.protos.BatchNorm.epsilon', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.001),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='train', full_name='object_detection.protos.BatchNorm.train', index=4,
- number=5, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1194,
- serialized_end=1316,
-)
-
-_HYPERPARAMS.fields_by_name['op'].enum_type = _HYPERPARAMS_OP
-_HYPERPARAMS.fields_by_name['regularizer'].message_type = _REGULARIZER
-_HYPERPARAMS.fields_by_name['initializer'].message_type = _INITIALIZER
-_HYPERPARAMS.fields_by_name['activation'].enum_type = _HYPERPARAMS_ACTIVATION
-_HYPERPARAMS.fields_by_name['batch_norm'].message_type = _BATCHNORM
-_HYPERPARAMS_OP.containing_type = _HYPERPARAMS
-_HYPERPARAMS_ACTIVATION.containing_type = _HYPERPARAMS
-_REGULARIZER.fields_by_name['l1_regularizer'].message_type = _L1REGULARIZER
-_REGULARIZER.fields_by_name['l2_regularizer'].message_type = _L2REGULARIZER
-_REGULARIZER.oneofs_by_name['regularizer_oneof'].fields.append(
- _REGULARIZER.fields_by_name['l1_regularizer'])
-_REGULARIZER.fields_by_name['l1_regularizer'].containing_oneof = _REGULARIZER.oneofs_by_name['regularizer_oneof']
-_REGULARIZER.oneofs_by_name['regularizer_oneof'].fields.append(
- _REGULARIZER.fields_by_name['l2_regularizer'])
-_REGULARIZER.fields_by_name['l2_regularizer'].containing_oneof = _REGULARIZER.oneofs_by_name['regularizer_oneof']
-_INITIALIZER.fields_by_name['truncated_normal_initializer'].message_type = _TRUNCATEDNORMALINITIALIZER
-_INITIALIZER.fields_by_name['variance_scaling_initializer'].message_type = _VARIANCESCALINGINITIALIZER
-_INITIALIZER.oneofs_by_name['initializer_oneof'].fields.append(
- _INITIALIZER.fields_by_name['truncated_normal_initializer'])
-_INITIALIZER.fields_by_name['truncated_normal_initializer'].containing_oneof = _INITIALIZER.oneofs_by_name['initializer_oneof']
-_INITIALIZER.oneofs_by_name['initializer_oneof'].fields.append(
- _INITIALIZER.fields_by_name['variance_scaling_initializer'])
-_INITIALIZER.fields_by_name['variance_scaling_initializer'].containing_oneof = _INITIALIZER.oneofs_by_name['initializer_oneof']
-_VARIANCESCALINGINITIALIZER.fields_by_name['mode'].enum_type = _VARIANCESCALINGINITIALIZER_MODE
-_VARIANCESCALINGINITIALIZER_MODE.containing_type = _VARIANCESCALINGINITIALIZER
-DESCRIPTOR.message_types_by_name['Hyperparams'] = _HYPERPARAMS
-DESCRIPTOR.message_types_by_name['Regularizer'] = _REGULARIZER
-DESCRIPTOR.message_types_by_name['L1Regularizer'] = _L1REGULARIZER
-DESCRIPTOR.message_types_by_name['L2Regularizer'] = _L2REGULARIZER
-DESCRIPTOR.message_types_by_name['Initializer'] = _INITIALIZER
-DESCRIPTOR.message_types_by_name['TruncatedNormalInitializer'] = _TRUNCATEDNORMALINITIALIZER
-DESCRIPTOR.message_types_by_name['VarianceScalingInitializer'] = _VARIANCESCALINGINITIALIZER
-DESCRIPTOR.message_types_by_name['BatchNorm'] = _BATCHNORM
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Hyperparams = _reflection.GeneratedProtocolMessageType('Hyperparams', (_message.Message,), dict(
- DESCRIPTOR = _HYPERPARAMS,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Hyperparams)
- ))
-_sym_db.RegisterMessage(Hyperparams)
-
-Regularizer = _reflection.GeneratedProtocolMessageType('Regularizer', (_message.Message,), dict(
- DESCRIPTOR = _REGULARIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Regularizer)
- ))
-_sym_db.RegisterMessage(Regularizer)
-
-L1Regularizer = _reflection.GeneratedProtocolMessageType('L1Regularizer', (_message.Message,), dict(
- DESCRIPTOR = _L1REGULARIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.L1Regularizer)
- ))
-_sym_db.RegisterMessage(L1Regularizer)
-
-L2Regularizer = _reflection.GeneratedProtocolMessageType('L2Regularizer', (_message.Message,), dict(
- DESCRIPTOR = _L2REGULARIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.L2Regularizer)
- ))
-_sym_db.RegisterMessage(L2Regularizer)
-
-Initializer = _reflection.GeneratedProtocolMessageType('Initializer', (_message.Message,), dict(
- DESCRIPTOR = _INITIALIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Initializer)
- ))
-_sym_db.RegisterMessage(Initializer)
-
-TruncatedNormalInitializer = _reflection.GeneratedProtocolMessageType('TruncatedNormalInitializer', (_message.Message,), dict(
- DESCRIPTOR = _TRUNCATEDNORMALINITIALIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.TruncatedNormalInitializer)
- ))
-_sym_db.RegisterMessage(TruncatedNormalInitializer)
-
-VarianceScalingInitializer = _reflection.GeneratedProtocolMessageType('VarianceScalingInitializer', (_message.Message,), dict(
- DESCRIPTOR = _VARIANCESCALINGINITIALIZER,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.VarianceScalingInitializer)
- ))
-_sym_db.RegisterMessage(VarianceScalingInitializer)
-
-BatchNorm = _reflection.GeneratedProtocolMessageType('BatchNorm', (_message.Message,), dict(
- DESCRIPTOR = _BATCHNORM,
- __module__ = 'object_detection.protos.hyperparams_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BatchNorm)
- ))
-_sym_db.RegisterMessage(BatchNorm)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/image_resizer.proto b/object_detection/protos/image_resizer.proto
deleted file mode 100644
index 67f6cacd..00000000
--- a/object_detection/protos/image_resizer.proto
+++ /dev/null
@@ -1,44 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for image resizing operations.
-// See builders/image_resizer_builder.py for details.
-message ImageResizer {
- oneof image_resizer_oneof {
- KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
- FixedShapeResizer fixed_shape_resizer = 2;
- }
-}
-
-// Enumeration type for image resizing methods provided in TensorFlow.
-enum ResizeType {
- BILINEAR = 0; // Corresponds to tf.image.ResizeMethod.BILINEAR
- NEAREST_NEIGHBOR = 1; // Corresponds to tf.image.ResizeMethod.NEAREST_NEIGHBOR
- BICUBIC = 2; // Corresponds to tf.image.ResizeMethod.BICUBIC
- AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
-}
-
-// Configuration proto for image resizer that keeps aspect ratio.
-message KeepAspectRatioResizer {
- // Desired size of the smaller image dimension in pixels.
- optional int32 min_dimension = 1 [default = 600];
-
- // Desired size of the larger image dimension in pixels.
- optional int32 max_dimension = 2 [default = 1024];
-
- // Desired method when resizing image.
- optional ResizeType resize_method = 3 [default = BILINEAR];
-}
-
-// Configuration proto for image resizer that resizes to a fixed shape.
-message FixedShapeResizer {
- // Desired height of image in pixels.
- optional int32 height = 1 [default = 300];
-
- // Desired width of image in pixels.
- optional int32 width = 2 [default = 300];
-
- // Desired method when resizing image.
- optional ResizeType resize_method = 3 [default = BILINEAR];
-}
diff --git a/object_detection/protos/image_resizer_pb2.py b/object_detection/protos/image_resizer_pb2.py
deleted file mode 100644
index 2c953b77..00000000
--- a/object_detection/protos/image_resizer_pb2.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/image_resizer.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/image_resizer.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n+object_detection/protos/image_resizer.proto\x12\x17object_detection.protos\"\xc6\x01\n\x0cImageResizer\x12T\n\x19keep_aspect_ratio_resizer\x18\x01 \x01(\x0b\x32/.object_detection.protos.KeepAspectRatioResizerH\x00\x12I\n\x13\x66ixed_shape_resizer\x18\x02 \x01(\x0b\x32*.object_detection.protos.FixedShapeResizerH\x00\x42\x15\n\x13image_resizer_oneof\"\x97\x01\n\x16KeepAspectRatioResizer\x12\x1a\n\rmin_dimension\x18\x01 \x01(\x05:\x03\x36\x30\x30\x12\x1b\n\rmax_dimension\x18\x02 \x01(\x05:\x04\x31\x30\x32\x34\x12\x44\n\rresize_method\x18\x03 \x01(\x0e\x32#.object_detection.protos.ResizeType:\x08\x42ILINEAR\"\x82\x01\n\x11\x46ixedShapeResizer\x12\x13\n\x06height\x18\x01 \x01(\x05:\x03\x33\x30\x30\x12\x12\n\x05width\x18\x02 \x01(\x05:\x03\x33\x30\x30\x12\x44\n\rresize_method\x18\x03 \x01(\x0e\x32#.object_detection.protos.ResizeType:\x08\x42ILINEAR*G\n\nResizeType\x12\x0c\n\x08\x42ILINEAR\x10\x00\x12\x14\n\x10NEAREST_NEIGHBOR\x10\x01\x12\x0b\n\x07\x42ICUBIC\x10\x02\x12\x08\n\x04\x41REA\x10\x03')
-)
-
-_RESIZETYPE = _descriptor.EnumDescriptor(
- name='ResizeType',
- full_name='object_detection.protos.ResizeType',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='BILINEAR', index=0, number=0,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='NEAREST_NEIGHBOR', index=1, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='BICUBIC', index=2, number=2,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='AREA', index=3, number=3,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=560,
- serialized_end=631,
-)
-_sym_db.RegisterEnumDescriptor(_RESIZETYPE)
-
-ResizeType = enum_type_wrapper.EnumTypeWrapper(_RESIZETYPE)
-BILINEAR = 0
-NEAREST_NEIGHBOR = 1
-BICUBIC = 2
-AREA = 3
-
-
-
-_IMAGERESIZER = _descriptor.Descriptor(
- name='ImageResizer',
- full_name='object_detection.protos.ImageResizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='keep_aspect_ratio_resizer', full_name='object_detection.protos.ImageResizer.keep_aspect_ratio_resizer', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='fixed_shape_resizer', full_name='object_detection.protos.ImageResizer.fixed_shape_resizer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='image_resizer_oneof', full_name='object_detection.protos.ImageResizer.image_resizer_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=73,
- serialized_end=271,
-)
-
-
-_KEEPASPECTRATIORESIZER = _descriptor.Descriptor(
- name='KeepAspectRatioResizer',
- full_name='object_detection.protos.KeepAspectRatioResizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_dimension', full_name='object_detection.protos.KeepAspectRatioResizer.min_dimension', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=600,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_dimension', full_name='object_detection.protos.KeepAspectRatioResizer.max_dimension', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1024,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='resize_method', full_name='object_detection.protos.KeepAspectRatioResizer.resize_method', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=274,
- serialized_end=425,
-)
-
-
-_FIXEDSHAPERESIZER = _descriptor.Descriptor(
- name='FixedShapeResizer',
- full_name='object_detection.protos.FixedShapeResizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='height', full_name='object_detection.protos.FixedShapeResizer.height', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=300,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width', full_name='object_detection.protos.FixedShapeResizer.width', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=300,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='resize_method', full_name='object_detection.protos.FixedShapeResizer.resize_method', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=428,
- serialized_end=558,
-)
-
-_IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer'].message_type = _KEEPASPECTRATIORESIZER
-_IMAGERESIZER.fields_by_name['fixed_shape_resizer'].message_type = _FIXEDSHAPERESIZER
-_IMAGERESIZER.oneofs_by_name['image_resizer_oneof'].fields.append(
- _IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer'])
-_IMAGERESIZER.fields_by_name['keep_aspect_ratio_resizer'].containing_oneof = _IMAGERESIZER.oneofs_by_name['image_resizer_oneof']
-_IMAGERESIZER.oneofs_by_name['image_resizer_oneof'].fields.append(
- _IMAGERESIZER.fields_by_name['fixed_shape_resizer'])
-_IMAGERESIZER.fields_by_name['fixed_shape_resizer'].containing_oneof = _IMAGERESIZER.oneofs_by_name['image_resizer_oneof']
-_KEEPASPECTRATIORESIZER.fields_by_name['resize_method'].enum_type = _RESIZETYPE
-_FIXEDSHAPERESIZER.fields_by_name['resize_method'].enum_type = _RESIZETYPE
-DESCRIPTOR.message_types_by_name['ImageResizer'] = _IMAGERESIZER
-DESCRIPTOR.message_types_by_name['KeepAspectRatioResizer'] = _KEEPASPECTRATIORESIZER
-DESCRIPTOR.message_types_by_name['FixedShapeResizer'] = _FIXEDSHAPERESIZER
-DESCRIPTOR.enum_types_by_name['ResizeType'] = _RESIZETYPE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ImageResizer = _reflection.GeneratedProtocolMessageType('ImageResizer', (_message.Message,), dict(
- DESCRIPTOR = _IMAGERESIZER,
- __module__ = 'object_detection.protos.image_resizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ImageResizer)
- ))
-_sym_db.RegisterMessage(ImageResizer)
-
-KeepAspectRatioResizer = _reflection.GeneratedProtocolMessageType('KeepAspectRatioResizer', (_message.Message,), dict(
- DESCRIPTOR = _KEEPASPECTRATIORESIZER,
- __module__ = 'object_detection.protos.image_resizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.KeepAspectRatioResizer)
- ))
-_sym_db.RegisterMessage(KeepAspectRatioResizer)
-
-FixedShapeResizer = _reflection.GeneratedProtocolMessageType('FixedShapeResizer', (_message.Message,), dict(
- DESCRIPTOR = _FIXEDSHAPERESIZER,
- __module__ = 'object_detection.protos.image_resizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.FixedShapeResizer)
- ))
-_sym_db.RegisterMessage(FixedShapeResizer)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/input_reader.proto b/object_detection/protos/input_reader.proto
deleted file mode 100644
index ed460dad..00000000
--- a/object_detection/protos/input_reader.proto
+++ /dev/null
@@ -1,60 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for defining input readers that generate Object Detection
-// Examples from input sources. Input readers are expected to generate a
-// dictionary of tensors, with the following fields populated:
-//
-// 'image': an [image_height, image_width, channels] image tensor that detection
-// will be run on.
-// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class
-// labels of detected boxes in the image.
-// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of
-// detected boxes in the image.
-// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
-// image_width] float tensor storing binary mask of the objects in boxes.
-
-message InputReader {
- // Path to StringIntLabelMap pbtxt file specifying the mapping from string
- // labels to integer ids.
- optional string label_map_path = 1 [default=""];
-
- // Whether data should be processed in the order they are read in, or
- // shuffled randomly.
- optional bool shuffle = 2 [default=true];
-
- // Maximum number of records to keep in reader queue.
- optional uint32 queue_capacity = 3 [default=2000];
-
- // Minimum number of records to keep in reader queue. A large value is needed
- // to generate a good random shuffle.
- optional uint32 min_after_dequeue = 4 [default=1000];
-
- // The number of times a data source is read. If set to zero, the data source
- // will be reused indefinitely.
- optional uint32 num_epochs = 5 [default=0];
-
- // Number of reader instances to create.
- optional uint32 num_readers = 6 [default=8];
-
- // Whether to load groundtruth instance masks.
- optional bool load_instance_masks = 7 [default = false];
-
- oneof input_reader {
- TFRecordInputReader tf_record_input_reader = 8;
- ExternalInputReader external_input_reader = 9;
- }
-}
-
-// An input reader that reads TF Example protos from local TFRecord files.
-message TFRecordInputReader {
- // Path(s) to `TFRecordFile`s.
- repeated string input_path = 1;
-}
-
-// An externally defined input reader. Users may define an extension to this
-// proto to interface their own input readers.
-message ExternalInputReader {
- extensions 1 to 999;
-}
diff --git a/object_detection/protos/input_reader_pb2.py b/object_detection/protos/input_reader_pb2.py
deleted file mode 100644
index 1e8022b9..00000000
--- a/object_detection/protos/input_reader_pb2.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/input_reader.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/input_reader.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n*object_detection/protos/input_reader.proto\x12\x17object_detection.protos\"\xff\x02\n\x0bInputReader\x12\x18\n\x0elabel_map_path\x18\x01 \x01(\t:\x00\x12\x15\n\x07shuffle\x18\x02 \x01(\x08:\x04true\x12\x1c\n\x0equeue_capacity\x18\x03 \x01(\r:\x04\x32\x30\x30\x30\x12\x1f\n\x11min_after_dequeue\x18\x04 \x01(\r:\x04\x31\x30\x30\x30\x12\x15\n\nnum_epochs\x18\x05 \x01(\r:\x01\x30\x12\x16\n\x0bnum_readers\x18\x06 \x01(\r:\x01\x38\x12\"\n\x13load_instance_masks\x18\x07 \x01(\x08:\x05\x66\x61lse\x12N\n\x16tf_record_input_reader\x18\x08 \x01(\x0b\x32,.object_detection.protos.TFRecordInputReaderH\x00\x12M\n\x15\x65xternal_input_reader\x18\t \x01(\x0b\x32,.object_detection.protos.ExternalInputReaderH\x00\x42\x0e\n\x0cinput_reader\")\n\x13TFRecordInputReader\x12\x12\n\ninput_path\x18\x01 \x03(\t\"\x1c\n\x13\x45xternalInputReader*\x05\x08\x01\x10\xe8\x07')
-)
-
-
-
-
-_INPUTREADER = _descriptor.Descriptor(
- name='InputReader',
- full_name='object_detection.protos.InputReader',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='label_map_path', full_name='object_detection.protos.InputReader.label_map_path', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='shuffle', full_name='object_detection.protos.InputReader.shuffle', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='queue_capacity', full_name='object_detection.protos.InputReader.queue_capacity', index=2,
- number=3, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=2000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_after_dequeue', full_name='object_detection.protos.InputReader.min_after_dequeue', index=3,
- number=4, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=1000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_epochs', full_name='object_detection.protos.InputReader.num_epochs', index=4,
- number=5, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_readers', full_name='object_detection.protos.InputReader.num_readers', index=5,
- number=6, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=8,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='load_instance_masks', full_name='object_detection.protos.InputReader.load_instance_masks', index=6,
- number=7, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='tf_record_input_reader', full_name='object_detection.protos.InputReader.tf_record_input_reader', index=7,
- number=8, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='external_input_reader', full_name='object_detection.protos.InputReader.external_input_reader', index=8,
- number=9, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='input_reader', full_name='object_detection.protos.InputReader.input_reader',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=72,
- serialized_end=455,
-)
-
-
-_TFRECORDINPUTREADER = _descriptor.Descriptor(
- name='TFRecordInputReader',
- full_name='object_detection.protos.TFRecordInputReader',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='input_path', full_name='object_detection.protos.TFRecordInputReader.input_path', index=0,
- number=1, type=9, cpp_type=9, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=457,
- serialized_end=498,
-)
-
-
-_EXTERNALINPUTREADER = _descriptor.Descriptor(
- name='ExternalInputReader',
- full_name='object_detection.protos.ExternalInputReader',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=True,
- syntax='proto2',
- extension_ranges=[(1, 1000), ],
- oneofs=[
- ],
- serialized_start=500,
- serialized_end=528,
-)
-
-_INPUTREADER.fields_by_name['tf_record_input_reader'].message_type = _TFRECORDINPUTREADER
-_INPUTREADER.fields_by_name['external_input_reader'].message_type = _EXTERNALINPUTREADER
-_INPUTREADER.oneofs_by_name['input_reader'].fields.append(
- _INPUTREADER.fields_by_name['tf_record_input_reader'])
-_INPUTREADER.fields_by_name['tf_record_input_reader'].containing_oneof = _INPUTREADER.oneofs_by_name['input_reader']
-_INPUTREADER.oneofs_by_name['input_reader'].fields.append(
- _INPUTREADER.fields_by_name['external_input_reader'])
-_INPUTREADER.fields_by_name['external_input_reader'].containing_oneof = _INPUTREADER.oneofs_by_name['input_reader']
-DESCRIPTOR.message_types_by_name['InputReader'] = _INPUTREADER
-DESCRIPTOR.message_types_by_name['TFRecordInputReader'] = _TFRECORDINPUTREADER
-DESCRIPTOR.message_types_by_name['ExternalInputReader'] = _EXTERNALINPUTREADER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-InputReader = _reflection.GeneratedProtocolMessageType('InputReader', (_message.Message,), dict(
- DESCRIPTOR = _INPUTREADER,
- __module__ = 'object_detection.protos.input_reader_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.InputReader)
- ))
-_sym_db.RegisterMessage(InputReader)
-
-TFRecordInputReader = _reflection.GeneratedProtocolMessageType('TFRecordInputReader', (_message.Message,), dict(
- DESCRIPTOR = _TFRECORDINPUTREADER,
- __module__ = 'object_detection.protos.input_reader_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.TFRecordInputReader)
- ))
-_sym_db.RegisterMessage(TFRecordInputReader)
-
-ExternalInputReader = _reflection.GeneratedProtocolMessageType('ExternalInputReader', (_message.Message,), dict(
- DESCRIPTOR = _EXTERNALINPUTREADER,
- __module__ = 'object_detection.protos.input_reader_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ExternalInputReader)
- ))
-_sym_db.RegisterMessage(ExternalInputReader)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/keypoint_box_coder.proto b/object_detection/protos/keypoint_box_coder.proto
deleted file mode 100644
index 542ebbfb..00000000
--- a/object_detection/protos/keypoint_box_coder.proto
+++ /dev/null
@@ -1,19 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for KeypointBoxCoder. See
-// box_coders/keypoint_box_coder.py for details.
-message KeypointBoxCoder {
- optional int32 num_keypoints = 1;
-
- // Scale factor for anchor encoded box center and keypoints.
- optional float y_scale = 2 [default = 10.0];
- optional float x_scale = 3 [default = 10.0];
-
- // Scale factor for anchor encoded box height.
- optional float height_scale = 4 [default = 5.0];
-
- // Scale factor for anchor encoded box width.
- optional float width_scale = 5 [default = 5.0];
-}
diff --git a/object_detection/protos/keypoint_box_coder_pb2.py b/object_detection/protos/keypoint_box_coder_pb2.py
deleted file mode 100644
index d473bc9d..00000000
--- a/object_detection/protos/keypoint_box_coder_pb2.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/keypoint_box_coder.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/keypoint_box_coder.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n0object_detection/protos/keypoint_box_coder.proto\x12\x17object_detection.protos\"\x84\x01\n\x10KeypointBoxCoder\x12\x15\n\rnum_keypoints\x18\x01 \x01(\x05\x12\x13\n\x07y_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x03 \x01(\x02:\x02\x31\x30\x12\x17\n\x0cheight_scale\x18\x04 \x01(\x02:\x01\x35\x12\x16\n\x0bwidth_scale\x18\x05 \x01(\x02:\x01\x35')
-)
-
-
-
-
-_KEYPOINTBOXCODER = _descriptor.Descriptor(
- name='KeypointBoxCoder',
- full_name='object_detection.protos.KeypointBoxCoder',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='num_keypoints', full_name='object_detection.protos.KeypointBoxCoder.num_keypoints', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='y_scale', full_name='object_detection.protos.KeypointBoxCoder.y_scale', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='x_scale', full_name='object_detection.protos.KeypointBoxCoder.x_scale', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_scale', full_name='object_detection.protos.KeypointBoxCoder.height_scale', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_scale', full_name='object_detection.protos.KeypointBoxCoder.width_scale', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=78,
- serialized_end=210,
-)
-
-DESCRIPTOR.message_types_by_name['KeypointBoxCoder'] = _KEYPOINTBOXCODER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-KeypointBoxCoder = _reflection.GeneratedProtocolMessageType('KeypointBoxCoder', (_message.Message,), dict(
- DESCRIPTOR = _KEYPOINTBOXCODER,
- __module__ = 'object_detection.protos.keypoint_box_coder_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.KeypointBoxCoder)
- ))
-_sym_db.RegisterMessage(KeypointBoxCoder)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/losses.proto b/object_detection/protos/losses.proto
deleted file mode 100644
index e2d189b5..00000000
--- a/object_detection/protos/losses.proto
+++ /dev/null
@@ -1,130 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Message for configuring the localization loss, classification loss and hard
-// example miner used for training object detection models. See core/losses.py
-// for details
-message Loss {
- // Localization loss to use.
- optional LocalizationLoss localization_loss = 1;
-
- // Classification loss to use.
- optional ClassificationLoss classification_loss = 2;
-
- // If not left to default, applies hard example mining.
- optional HardExampleMiner hard_example_miner = 3;
-
- // Classification loss weight.
- optional float classification_weight = 4 [default=1.0];
-
- // Localization loss weight.
- optional float localization_weight = 5 [default=1.0];
-}
-
-// Configuration for bounding box localization loss function.
-message LocalizationLoss {
- oneof localization_loss {
- WeightedL2LocalizationLoss weighted_l2 = 1;
- WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2;
- WeightedIOULocalizationLoss weighted_iou = 3;
- }
-}
-
-// L2 location loss: 0.5 * ||weight * (a - b)|| ^ 2
-message WeightedL2LocalizationLoss {
- // Output loss per anchor.
- optional bool anchorwise_output = 1 [default=false];
-}
-
-// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
-message WeightedSmoothL1LocalizationLoss {
- // Output loss per anchor.
- optional bool anchorwise_output = 1 [default=false];
-}
-
-// Intersection over union location loss: 1 - IOU
-message WeightedIOULocalizationLoss {
-}
-
-// Configuration for class prediction loss function.
-message ClassificationLoss {
- oneof classification_loss {
- WeightedSigmoidClassificationLoss weighted_sigmoid = 1;
- WeightedSoftmaxClassificationLoss weighted_softmax = 2;
- BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3;
- SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4;
- }
-}
-
-// Classification loss using a sigmoid function over class predictions.
-message WeightedSigmoidClassificationLoss {
- // Output loss per anchor.
- optional bool anchorwise_output = 1 [default=false];
-}
-
-// Sigmoid Focal cross entropy loss as described in
-// https://arxiv.org/abs/1708.02002
-message SigmoidFocalClassificationLoss {
- optional bool anchorwise_output = 1 [default = false];
- // modulating factor for the loss.
- optional float gamma = 2 [default = 2.0];
- // alpha weighting factor for the loss.
- optional float alpha = 3;
-}
-
-// Classification loss using a softmax function over class predictions.
-message WeightedSoftmaxClassificationLoss {
- // Output loss per anchor.
- optional bool anchorwise_output = 1 [default=false];
- // Scale logit (input) value before calculating softmax classification loss.
- // Typically used for softmax distillation.
- optional float logit_scale = 2 [default = 1.0];
-}
-
-// Classification loss using a sigmoid function over the class prediction with
-// the highest prediction score.
-message BootstrappedSigmoidClassificationLoss {
- // Interpolation weight between 0 and 1.
- optional float alpha = 1;
-
- // Whether hard boot strapping should be used or not. If true, will only use
- // one class favored by model. Othewise, will use all predicted class
- // probabilities.
- optional bool hard_bootstrap = 2 [default=false];
-
- // Output loss per anchor.
- optional bool anchorwise_output = 3 [default=false];
-}
-
-// Configuation for hard example miner.
-message HardExampleMiner {
- // Maximum number of hard examples to be selected per image (prior to
- // enforcing max negative to positive ratio constraint). If set to 0,
- // all examples obtained after NMS are considered.
- optional int32 num_hard_examples = 1 [default=64];
-
- // Minimum intersection over union for an example to be discarded during NMS.
- optional float iou_threshold = 2 [default=0.7];
-
- // Whether to use classification losses ('cls', default), localization losses
- // ('loc') or both losses ('both'). In the case of 'both', cls_loss_weight and
- // loc_loss_weight are used to compute weighted sum of the two losses.
- enum LossType {
- BOTH = 0;
- CLASSIFICATION = 1;
- LOCALIZATION = 2;
- }
- optional LossType loss_type = 3 [default=BOTH];
-
- // Maximum number of negatives to retain for each positive anchor. If
- // num_negatives_per_positive is 0 no prespecified negative:positive ratio is
- // enforced.
- optional int32 max_negatives_per_positive = 4 [default=0];
-
- // Minimum number of negative anchors to sample for a given image. Setting
- // this to a positive number samples negatives in an image without any
- // positive anchors and thus not bias the model towards having at least one
- // detection per image.
- optional int32 min_negatives_per_image = 5 [default=0];
-}
diff --git a/object_detection/protos/losses_pb2.py b/object_detection/protos/losses_pb2.py
deleted file mode 100644
index 51077a3c..00000000
--- a/object_detection/protos/losses_pb2.py
+++ /dev/null
@@ -1,644 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/losses.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/losses.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n$object_detection/protos/losses.proto\x12\x17object_detection.protos\"\x9f\x02\n\x04Loss\x12\x44\n\x11localization_loss\x18\x01 \x01(\x0b\x32).object_detection.protos.LocalizationLoss\x12H\n\x13\x63lassification_loss\x18\x02 \x01(\x0b\x32+.object_detection.protos.ClassificationLoss\x12\x45\n\x12hard_example_miner\x18\x03 \x01(\x0b\x32).object_detection.protos.HardExampleMiner\x12 \n\x15\x63lassification_weight\x18\x04 \x01(\x02:\x01\x31\x12\x1e\n\x13localization_weight\x18\x05 \x01(\x02:\x01\x31\"\x9a\x02\n\x10LocalizationLoss\x12J\n\x0bweighted_l2\x18\x01 \x01(\x0b\x32\x33.object_detection.protos.WeightedL2LocalizationLossH\x00\x12W\n\x12weighted_smooth_l1\x18\x02 \x01(\x0b\x32\x39.object_detection.protos.WeightedSmoothL1LocalizationLossH\x00\x12L\n\x0cweighted_iou\x18\x03 \x01(\x0b\x32\x34.object_detection.protos.WeightedIOULocalizationLossH\x00\x42\x13\n\x11localization_loss\">\n\x1aWeightedL2LocalizationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"D\n WeightedSmoothL1LocalizationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"\x1d\n\x1bWeightedIOULocalizationLoss\"\x96\x03\n\x12\x43lassificationLoss\x12V\n\x10weighted_sigmoid\x18\x01 \x01(\x0b\x32:.object_detection.protos.WeightedSigmoidClassificationLossH\x00\x12V\n\x10weighted_softmax\x18\x02 \x01(\x0b\x32:.object_detection.protos.WeightedSoftmaxClassificationLossH\x00\x12^\n\x14\x62ootstrapped_sigmoid\x18\x03 \x01(\x0b\x32>.object_detection.protos.BootstrappedSigmoidClassificationLossH\x00\x12Y\n\x16weighted_sigmoid_focal\x18\x04 \x01(\x0b\x32\x37.object_detection.protos.SigmoidFocalClassificationLossH\x00\x42\x15\n\x13\x63lassification_loss\"E\n!WeightedSigmoidClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\"c\n\x1eSigmoidFocalClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\r\n\x05\x61lpha\x18\x03 \x01(\x02\"]\n!WeightedSoftmaxClassificationLoss\x12 \n\x11\x61nchorwise_output\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x0blogit_scale\x18\x02 \x01(\x02:\x01\x31\"w\n%BootstrappedSigmoidClassificationLoss\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x1d\n\x0ehard_bootstrap\x18\x02 \x01(\x08:\x05\x66\x61lse\x12 \n\x11\x61nchorwise_output\x18\x03 \x01(\x08:\x05\x66\x61lse\"\xa1\x02\n\x10HardExampleMiner\x12\x1d\n\x11num_hard_examples\x18\x01 \x01(\x05:\x02\x36\x34\x12\x1a\n\riou_threshold\x18\x02 \x01(\x02:\x03\x30.7\x12K\n\tloss_type\x18\x03 \x01(\x0e\x32\x32.object_detection.protos.HardExampleMiner.LossType:\x04\x42OTH\x12%\n\x1amax_negatives_per_positive\x18\x04 \x01(\x05:\x01\x30\x12\"\n\x17min_negatives_per_image\x18\x05 \x01(\x05:\x01\x30\":\n\x08LossType\x12\x08\n\x04\x42OTH\x10\x00\x12\x12\n\x0e\x43LASSIFICATION\x10\x01\x12\x10\n\x0cLOCALIZATION\x10\x02')
-)
-
-
-
-_HARDEXAMPLEMINER_LOSSTYPE = _descriptor.EnumDescriptor(
- name='LossType',
- full_name='object_detection.protos.HardExampleMiner.LossType',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='BOTH', index=0, number=0,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='CLASSIFICATION', index=1, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='LOCALIZATION', index=2, number=2,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=1834,
- serialized_end=1892,
-)
-_sym_db.RegisterEnumDescriptor(_HARDEXAMPLEMINER_LOSSTYPE)
-
-
-_LOSS = _descriptor.Descriptor(
- name='Loss',
- full_name='object_detection.protos.Loss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='localization_loss', full_name='object_detection.protos.Loss.localization_loss', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='classification_loss', full_name='object_detection.protos.Loss.classification_loss', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='hard_example_miner', full_name='object_detection.protos.Loss.hard_example_miner', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='classification_weight', full_name='object_detection.protos.Loss.classification_weight', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='localization_weight', full_name='object_detection.protos.Loss.localization_weight', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=66,
- serialized_end=353,
-)
-
-
-_LOCALIZATIONLOSS = _descriptor.Descriptor(
- name='LocalizationLoss',
- full_name='object_detection.protos.LocalizationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='weighted_l2', full_name='object_detection.protos.LocalizationLoss.weighted_l2', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='weighted_smooth_l1', full_name='object_detection.protos.LocalizationLoss.weighted_smooth_l1', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='weighted_iou', full_name='object_detection.protos.LocalizationLoss.weighted_iou', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='localization_loss', full_name='object_detection.protos.LocalizationLoss.localization_loss',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=356,
- serialized_end=638,
-)
-
-
-_WEIGHTEDL2LOCALIZATIONLOSS = _descriptor.Descriptor(
- name='WeightedL2LocalizationLoss',
- full_name='object_detection.protos.WeightedL2LocalizationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.WeightedL2LocalizationLoss.anchorwise_output', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=640,
- serialized_end=702,
-)
-
-
-_WEIGHTEDSMOOTHL1LOCALIZATIONLOSS = _descriptor.Descriptor(
- name='WeightedSmoothL1LocalizationLoss',
- full_name='object_detection.protos.WeightedSmoothL1LocalizationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.WeightedSmoothL1LocalizationLoss.anchorwise_output', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=704,
- serialized_end=772,
-)
-
-
-_WEIGHTEDIOULOCALIZATIONLOSS = _descriptor.Descriptor(
- name='WeightedIOULocalizationLoss',
- full_name='object_detection.protos.WeightedIOULocalizationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=774,
- serialized_end=803,
-)
-
-
-_CLASSIFICATIONLOSS = _descriptor.Descriptor(
- name='ClassificationLoss',
- full_name='object_detection.protos.ClassificationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='weighted_sigmoid', full_name='object_detection.protos.ClassificationLoss.weighted_sigmoid', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='weighted_softmax', full_name='object_detection.protos.ClassificationLoss.weighted_softmax', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='bootstrapped_sigmoid', full_name='object_detection.protos.ClassificationLoss.bootstrapped_sigmoid', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='weighted_sigmoid_focal', full_name='object_detection.protos.ClassificationLoss.weighted_sigmoid_focal', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='classification_loss', full_name='object_detection.protos.ClassificationLoss.classification_loss',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=806,
- serialized_end=1212,
-)
-
-
-_WEIGHTEDSIGMOIDCLASSIFICATIONLOSS = _descriptor.Descriptor(
- name='WeightedSigmoidClassificationLoss',
- full_name='object_detection.protos.WeightedSigmoidClassificationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.WeightedSigmoidClassificationLoss.anchorwise_output', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1214,
- serialized_end=1283,
-)
-
-
-_SIGMOIDFOCALCLASSIFICATIONLOSS = _descriptor.Descriptor(
- name='SigmoidFocalClassificationLoss',
- full_name='object_detection.protos.SigmoidFocalClassificationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.SigmoidFocalClassificationLoss.anchorwise_output', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='gamma', full_name='object_detection.protos.SigmoidFocalClassificationLoss.gamma', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(2),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='alpha', full_name='object_detection.protos.SigmoidFocalClassificationLoss.alpha', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1285,
- serialized_end=1384,
-)
-
-
-_WEIGHTEDSOFTMAXCLASSIFICATIONLOSS = _descriptor.Descriptor(
- name='WeightedSoftmaxClassificationLoss',
- full_name='object_detection.protos.WeightedSoftmaxClassificationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.WeightedSoftmaxClassificationLoss.anchorwise_output', index=0,
- number=1, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='logit_scale', full_name='object_detection.protos.WeightedSoftmaxClassificationLoss.logit_scale', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1386,
- serialized_end=1479,
-)
-
-
-_BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS = _descriptor.Descriptor(
- name='BootstrappedSigmoidClassificationLoss',
- full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='alpha', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.alpha', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='hard_bootstrap', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.hard_bootstrap', index=1,
- number=2, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='anchorwise_output', full_name='object_detection.protos.BootstrappedSigmoidClassificationLoss.anchorwise_output', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1481,
- serialized_end=1600,
-)
-
-
-_HARDEXAMPLEMINER = _descriptor.Descriptor(
- name='HardExampleMiner',
- full_name='object_detection.protos.HardExampleMiner',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='num_hard_examples', full_name='object_detection.protos.HardExampleMiner.num_hard_examples', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=64,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='iou_threshold', full_name='object_detection.protos.HardExampleMiner.iou_threshold', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.7),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='loss_type', full_name='object_detection.protos.HardExampleMiner.loss_type', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_negatives_per_positive', full_name='object_detection.protos.HardExampleMiner.max_negatives_per_positive', index=3,
- number=4, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_negatives_per_image', full_name='object_detection.protos.HardExampleMiner.min_negatives_per_image', index=4,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- _HARDEXAMPLEMINER_LOSSTYPE,
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1603,
- serialized_end=1892,
-)
-
-_LOSS.fields_by_name['localization_loss'].message_type = _LOCALIZATIONLOSS
-_LOSS.fields_by_name['classification_loss'].message_type = _CLASSIFICATIONLOSS
-_LOSS.fields_by_name['hard_example_miner'].message_type = _HARDEXAMPLEMINER
-_LOCALIZATIONLOSS.fields_by_name['weighted_l2'].message_type = _WEIGHTEDL2LOCALIZATIONLOSS
-_LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1'].message_type = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS
-_LOCALIZATIONLOSS.fields_by_name['weighted_iou'].message_type = _WEIGHTEDIOULOCALIZATIONLOSS
-_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append(
- _LOCALIZATIONLOSS.fields_by_name['weighted_l2'])
-_LOCALIZATIONLOSS.fields_by_name['weighted_l2'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss']
-_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append(
- _LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1'])
-_LOCALIZATIONLOSS.fields_by_name['weighted_smooth_l1'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss']
-_LOCALIZATIONLOSS.oneofs_by_name['localization_loss'].fields.append(
- _LOCALIZATIONLOSS.fields_by_name['weighted_iou'])
-_LOCALIZATIONLOSS.fields_by_name['weighted_iou'].containing_oneof = _LOCALIZATIONLOSS.oneofs_by_name['localization_loss']
-_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid'].message_type = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS
-_CLASSIFICATIONLOSS.fields_by_name['weighted_softmax'].message_type = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS
-_CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid'].message_type = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS
-_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal'].message_type = _SIGMOIDFOCALCLASSIFICATIONLOSS
-_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append(
- _CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid'])
-_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss']
-_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append(
- _CLASSIFICATIONLOSS.fields_by_name['weighted_softmax'])
-_CLASSIFICATIONLOSS.fields_by_name['weighted_softmax'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss']
-_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append(
- _CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid'])
-_CLASSIFICATIONLOSS.fields_by_name['bootstrapped_sigmoid'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss']
-_CLASSIFICATIONLOSS.oneofs_by_name['classification_loss'].fields.append(
- _CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal'])
-_CLASSIFICATIONLOSS.fields_by_name['weighted_sigmoid_focal'].containing_oneof = _CLASSIFICATIONLOSS.oneofs_by_name['classification_loss']
-_HARDEXAMPLEMINER.fields_by_name['loss_type'].enum_type = _HARDEXAMPLEMINER_LOSSTYPE
-_HARDEXAMPLEMINER_LOSSTYPE.containing_type = _HARDEXAMPLEMINER
-DESCRIPTOR.message_types_by_name['Loss'] = _LOSS
-DESCRIPTOR.message_types_by_name['LocalizationLoss'] = _LOCALIZATIONLOSS
-DESCRIPTOR.message_types_by_name['WeightedL2LocalizationLoss'] = _WEIGHTEDL2LOCALIZATIONLOSS
-DESCRIPTOR.message_types_by_name['WeightedSmoothL1LocalizationLoss'] = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS
-DESCRIPTOR.message_types_by_name['WeightedIOULocalizationLoss'] = _WEIGHTEDIOULOCALIZATIONLOSS
-DESCRIPTOR.message_types_by_name['ClassificationLoss'] = _CLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['WeightedSigmoidClassificationLoss'] = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['SigmoidFocalClassificationLoss'] = _SIGMOIDFOCALCLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['WeightedSoftmaxClassificationLoss'] = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['BootstrappedSigmoidClassificationLoss'] = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS
-DESCRIPTOR.message_types_by_name['HardExampleMiner'] = _HARDEXAMPLEMINER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Loss = _reflection.GeneratedProtocolMessageType('Loss', (_message.Message,), dict(
- DESCRIPTOR = _LOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Loss)
- ))
-_sym_db.RegisterMessage(Loss)
-
-LocalizationLoss = _reflection.GeneratedProtocolMessageType('LocalizationLoss', (_message.Message,), dict(
- DESCRIPTOR = _LOCALIZATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.LocalizationLoss)
- ))
-_sym_db.RegisterMessage(LocalizationLoss)
-
-WeightedL2LocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedL2LocalizationLoss', (_message.Message,), dict(
- DESCRIPTOR = _WEIGHTEDL2LOCALIZATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedL2LocalizationLoss)
- ))
-_sym_db.RegisterMessage(WeightedL2LocalizationLoss)
-
-WeightedSmoothL1LocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedSmoothL1LocalizationLoss', (_message.Message,), dict(
- DESCRIPTOR = _WEIGHTEDSMOOTHL1LOCALIZATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSmoothL1LocalizationLoss)
- ))
-_sym_db.RegisterMessage(WeightedSmoothL1LocalizationLoss)
-
-WeightedIOULocalizationLoss = _reflection.GeneratedProtocolMessageType('WeightedIOULocalizationLoss', (_message.Message,), dict(
- DESCRIPTOR = _WEIGHTEDIOULOCALIZATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedIOULocalizationLoss)
- ))
-_sym_db.RegisterMessage(WeightedIOULocalizationLoss)
-
-ClassificationLoss = _reflection.GeneratedProtocolMessageType('ClassificationLoss', (_message.Message,), dict(
- DESCRIPTOR = _CLASSIFICATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ClassificationLoss)
- ))
-_sym_db.RegisterMessage(ClassificationLoss)
-
-WeightedSigmoidClassificationLoss = _reflection.GeneratedProtocolMessageType('WeightedSigmoidClassificationLoss', (_message.Message,), dict(
- DESCRIPTOR = _WEIGHTEDSIGMOIDCLASSIFICATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSigmoidClassificationLoss)
- ))
-_sym_db.RegisterMessage(WeightedSigmoidClassificationLoss)
-
-SigmoidFocalClassificationLoss = _reflection.GeneratedProtocolMessageType('SigmoidFocalClassificationLoss', (_message.Message,), dict(
- DESCRIPTOR = _SIGMOIDFOCALCLASSIFICATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SigmoidFocalClassificationLoss)
- ))
-_sym_db.RegisterMessage(SigmoidFocalClassificationLoss)
-
-WeightedSoftmaxClassificationLoss = _reflection.GeneratedProtocolMessageType('WeightedSoftmaxClassificationLoss', (_message.Message,), dict(
- DESCRIPTOR = _WEIGHTEDSOFTMAXCLASSIFICATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.WeightedSoftmaxClassificationLoss)
- ))
-_sym_db.RegisterMessage(WeightedSoftmaxClassificationLoss)
-
-BootstrappedSigmoidClassificationLoss = _reflection.GeneratedProtocolMessageType('BootstrappedSigmoidClassificationLoss', (_message.Message,), dict(
- DESCRIPTOR = _BOOTSTRAPPEDSIGMOIDCLASSIFICATIONLOSS,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BootstrappedSigmoidClassificationLoss)
- ))
-_sym_db.RegisterMessage(BootstrappedSigmoidClassificationLoss)
-
-HardExampleMiner = _reflection.GeneratedProtocolMessageType('HardExampleMiner', (_message.Message,), dict(
- DESCRIPTOR = _HARDEXAMPLEMINER,
- __module__ = 'object_detection.protos.losses_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.HardExampleMiner)
- ))
-_sym_db.RegisterMessage(HardExampleMiner)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/matcher.proto b/object_detection/protos/matcher.proto
deleted file mode 100644
index b47de56c..00000000
--- a/object_detection/protos/matcher.proto
+++ /dev/null
@@ -1,15 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/argmax_matcher.proto";
-import "object_detection/protos/bipartite_matcher.proto";
-
-// Configuration proto for the matcher to be used in the object detection
-// pipeline. See core/matcher.py for details.
-message Matcher {
- oneof matcher_oneof {
- ArgMaxMatcher argmax_matcher = 1;
- BipartiteMatcher bipartite_matcher = 2;
- }
-}
diff --git a/object_detection/protos/matcher_pb2.py b/object_detection/protos/matcher_pb2.py
deleted file mode 100644
index c422bb81..00000000
--- a/object_detection/protos/matcher_pb2.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/matcher.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import argmax_matcher_pb2 as object__detection_dot_protos_dot_argmax__matcher__pb2
-from object_detection.protos import bipartite_matcher_pb2 as object__detection_dot_protos_dot_bipartite__matcher__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/matcher.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n%object_detection/protos/matcher.proto\x12\x17object_detection.protos\x1a,object_detection/protos/argmax_matcher.proto\x1a/object_detection/protos/bipartite_matcher.proto\"\xa4\x01\n\x07Matcher\x12@\n\x0e\x61rgmax_matcher\x18\x01 \x01(\x0b\x32&.object_detection.protos.ArgMaxMatcherH\x00\x12\x46\n\x11\x62ipartite_matcher\x18\x02 \x01(\x0b\x32).object_detection.protos.BipartiteMatcherH\x00\x42\x0f\n\rmatcher_oneof')
- ,
- dependencies=[object__detection_dot_protos_dot_argmax__matcher__pb2.DESCRIPTOR,object__detection_dot_protos_dot_bipartite__matcher__pb2.DESCRIPTOR,])
-
-
-
-
-_MATCHER = _descriptor.Descriptor(
- name='Matcher',
- full_name='object_detection.protos.Matcher',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='argmax_matcher', full_name='object_detection.protos.Matcher.argmax_matcher', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='bipartite_matcher', full_name='object_detection.protos.Matcher.bipartite_matcher', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='matcher_oneof', full_name='object_detection.protos.Matcher.matcher_oneof',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=162,
- serialized_end=326,
-)
-
-_MATCHER.fields_by_name['argmax_matcher'].message_type = object__detection_dot_protos_dot_argmax__matcher__pb2._ARGMAXMATCHER
-_MATCHER.fields_by_name['bipartite_matcher'].message_type = object__detection_dot_protos_dot_bipartite__matcher__pb2._BIPARTITEMATCHER
-_MATCHER.oneofs_by_name['matcher_oneof'].fields.append(
- _MATCHER.fields_by_name['argmax_matcher'])
-_MATCHER.fields_by_name['argmax_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof']
-_MATCHER.oneofs_by_name['matcher_oneof'].fields.append(
- _MATCHER.fields_by_name['bipartite_matcher'])
-_MATCHER.fields_by_name['bipartite_matcher'].containing_oneof = _MATCHER.oneofs_by_name['matcher_oneof']
-DESCRIPTOR.message_types_by_name['Matcher'] = _MATCHER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Matcher = _reflection.GeneratedProtocolMessageType('Matcher', (_message.Message,), dict(
- DESCRIPTOR = _MATCHER,
- __module__ = 'object_detection.protos.matcher_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Matcher)
- ))
-_sym_db.RegisterMessage(Matcher)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/mean_stddev_box_coder.proto b/object_detection/protos/mean_stddev_box_coder.proto
deleted file mode 100644
index 597c70cd..00000000
--- a/object_detection/protos/mean_stddev_box_coder.proto
+++ /dev/null
@@ -1,8 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for MeanStddevBoxCoder. See
-// box_coders/mean_stddev_box_coder.py for details.
-message MeanStddevBoxCoder {
-}
diff --git a/object_detection/protos/mean_stddev_box_coder_pb2.py b/object_detection/protos/mean_stddev_box_coder_pb2.py
deleted file mode 100644
index 184565dd..00000000
--- a/object_detection/protos/mean_stddev_box_coder_pb2.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/mean_stddev_box_coder.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/mean_stddev_box_coder.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n3object_detection/protos/mean_stddev_box_coder.proto\x12\x17object_detection.protos\"\x14\n\x12MeanStddevBoxCoder')
-)
-
-
-
-
-_MEANSTDDEVBOXCODER = _descriptor.Descriptor(
- name='MeanStddevBoxCoder',
- full_name='object_detection.protos.MeanStddevBoxCoder',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=80,
- serialized_end=100,
-)
-
-DESCRIPTOR.message_types_by_name['MeanStddevBoxCoder'] = _MEANSTDDEVBOXCODER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-MeanStddevBoxCoder = _reflection.GeneratedProtocolMessageType('MeanStddevBoxCoder', (_message.Message,), dict(
- DESCRIPTOR = _MEANSTDDEVBOXCODER,
- __module__ = 'object_detection.protos.mean_stddev_box_coder_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.MeanStddevBoxCoder)
- ))
-_sym_db.RegisterMessage(MeanStddevBoxCoder)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/model.proto b/object_detection/protos/model.proto
deleted file mode 100644
index b699c17b..00000000
--- a/object_detection/protos/model.proto
+++ /dev/null
@@ -1,14 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/faster_rcnn.proto";
-import "object_detection/protos/ssd.proto";
-
-// Top level configuration for DetectionModels.
-message DetectionModel {
- oneof model {
- FasterRcnn faster_rcnn = 1;
- Ssd ssd = 2;
- }
-}
diff --git a/object_detection/protos/model_pb2.py b/object_detection/protos/model_pb2.py
deleted file mode 100644
index 54ec4673..00000000
--- a/object_detection/protos/model_pb2.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/model.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import faster_rcnn_pb2 as object__detection_dot_protos_dot_faster__rcnn__pb2
-from object_detection.protos import ssd_pb2 as object__detection_dot_protos_dot_ssd__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/model.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n#object_detection/protos/model.proto\x12\x17object_detection.protos\x1a)object_detection/protos/faster_rcnn.proto\x1a!object_detection/protos/ssd.proto\"\x82\x01\n\x0e\x44\x65tectionModel\x12:\n\x0b\x66\x61ster_rcnn\x18\x01 \x01(\x0b\x32#.object_detection.protos.FasterRcnnH\x00\x12+\n\x03ssd\x18\x02 \x01(\x0b\x32\x1c.object_detection.protos.SsdH\x00\x42\x07\n\x05model')
- ,
- dependencies=[object__detection_dot_protos_dot_faster__rcnn__pb2.DESCRIPTOR,object__detection_dot_protos_dot_ssd__pb2.DESCRIPTOR,])
-
-
-
-
-_DETECTIONMODEL = _descriptor.Descriptor(
- name='DetectionModel',
- full_name='object_detection.protos.DetectionModel',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='faster_rcnn', full_name='object_detection.protos.DetectionModel.faster_rcnn', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd', full_name='object_detection.protos.DetectionModel.ssd', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='model', full_name='object_detection.protos.DetectionModel.model',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=143,
- serialized_end=273,
-)
-
-_DETECTIONMODEL.fields_by_name['faster_rcnn'].message_type = object__detection_dot_protos_dot_faster__rcnn__pb2._FASTERRCNN
-_DETECTIONMODEL.fields_by_name['ssd'].message_type = object__detection_dot_protos_dot_ssd__pb2._SSD
-_DETECTIONMODEL.oneofs_by_name['model'].fields.append(
- _DETECTIONMODEL.fields_by_name['faster_rcnn'])
-_DETECTIONMODEL.fields_by_name['faster_rcnn'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model']
-_DETECTIONMODEL.oneofs_by_name['model'].fields.append(
- _DETECTIONMODEL.fields_by_name['ssd'])
-_DETECTIONMODEL.fields_by_name['ssd'].containing_oneof = _DETECTIONMODEL.oneofs_by_name['model']
-DESCRIPTOR.message_types_by_name['DetectionModel'] = _DETECTIONMODEL
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-DetectionModel = _reflection.GeneratedProtocolMessageType('DetectionModel', (_message.Message,), dict(
- DESCRIPTOR = _DETECTIONMODEL,
- __module__ = 'object_detection.protos.model_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.DetectionModel)
- ))
-_sym_db.RegisterMessage(DetectionModel)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/optimizer.proto b/object_detection/protos/optimizer.proto
deleted file mode 100644
index d3cf45ce..00000000
--- a/object_detection/protos/optimizer.proto
+++ /dev/null
@@ -1,83 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Messages for configuring the optimizing strategy for training object
-// detection models.
-
-// Top level optimizer message.
-message Optimizer {
- oneof optimizer {
- RMSPropOptimizer rms_prop_optimizer = 1;
- MomentumOptimizer momentum_optimizer = 2;
- AdamOptimizer adam_optimizer = 3;
- }
- optional bool use_moving_average = 4 [default = true];
- optional float moving_average_decay = 5 [default = 0.9999];
-}
-
-// Configuration message for the RMSPropOptimizer
-// See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
-message RMSPropOptimizer {
- optional LearningRate learning_rate = 1;
- optional float momentum_optimizer_value = 2 [default = 0.9];
- optional float decay = 3 [default = 0.9];
- optional float epsilon = 4 [default = 1.0];
-}
-
-// Configuration message for the MomentumOptimizer
-// See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-message MomentumOptimizer {
- optional LearningRate learning_rate = 1;
- optional float momentum_optimizer_value = 2 [default = 0.9];
-}
-
-// Configuration message for the AdamOptimizer
-// See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
-message AdamOptimizer {
- optional LearningRate learning_rate = 1;
-}
-
-// Configuration message for optimizer learning rate.
-message LearningRate {
- oneof learning_rate {
- ConstantLearningRate constant_learning_rate = 1;
- ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
- ManualStepLearningRate manual_step_learning_rate = 3;
- CosineDecayLearningRate cosine_decay_learning_rate = 4;
- }
-}
-
-// Configuration message for a constant learning rate.
-message ConstantLearningRate {
- optional float learning_rate = 1 [default = 0.002];
-}
-
-// Configuration message for an exponentially decaying learning rate.
-// See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
-// decaying_the_learning_rate#exponential_decay
-message ExponentialDecayLearningRate {
- optional float initial_learning_rate = 1 [default = 0.002];
- optional uint32 decay_steps = 2 [default = 4000000];
- optional float decay_factor = 3 [default = 0.95];
- optional bool staircase = 4 [default = true];
-}
-
-// Configuration message for a manually defined learning rate schedule.
-message ManualStepLearningRate {
- optional float initial_learning_rate = 1 [default = 0.002];
- message LearningRateSchedule {
- optional uint32 step = 1;
- optional float learning_rate = 2 [default = 0.002];
- }
- repeated LearningRateSchedule schedule = 2;
-}
-
-// Configuration message for a cosine decaying learning rate as defined in
-// object_detection/utils/learning_schedules.py
-message CosineDecayLearningRate {
- optional float learning_rate_base = 1 [default = 0.002];
- optional uint32 total_steps = 2 [default = 4000000];
- optional float warmup_learning_rate = 3 [default = 0.0002];
- optional uint32 warmup_steps = 4 [default = 10000];
-}
diff --git a/object_detection/protos/optimizer_pb2.py b/object_detection/protos/optimizer_pb2.py
deleted file mode 100644
index 9063efa6..00000000
--- a/object_detection/protos/optimizer_pb2.py
+++ /dev/null
@@ -1,591 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/optimizer.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/optimizer.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n\'object_detection/protos/optimizer.proto\x12\x17object_detection.protos\"\xb5\x02\n\tOptimizer\x12G\n\x12rms_prop_optimizer\x18\x01 \x01(\x0b\x32).object_detection.protos.RMSPropOptimizerH\x00\x12H\n\x12momentum_optimizer\x18\x02 \x01(\x0b\x32*.object_detection.protos.MomentumOptimizerH\x00\x12@\n\x0e\x61\x64\x61m_optimizer\x18\x03 \x01(\x0b\x32&.object_detection.protos.AdamOptimizerH\x00\x12 \n\x12use_moving_average\x18\x04 \x01(\x08:\x04true\x12$\n\x14moving_average_decay\x18\x05 \x01(\x02:\x06\x30.9999B\x0b\n\toptimizer\"\x9f\x01\n\x10RMSPropOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\x12%\n\x18momentum_optimizer_value\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x05\x64\x65\x63\x61y\x18\x03 \x01(\x02:\x03\x30.9\x12\x12\n\x07\x65psilon\x18\x04 \x01(\x02:\x01\x31\"x\n\x11MomentumOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\x12%\n\x18momentum_optimizer_value\x18\x02 \x01(\x02:\x03\x30.9\"M\n\rAdamOptimizer\x12<\n\rlearning_rate\x18\x01 \x01(\x0b\x32%.object_detection.protos.LearningRate\"\x80\x03\n\x0cLearningRate\x12O\n\x16\x63onstant_learning_rate\x18\x01 \x01(\x0b\x32-.object_detection.protos.ConstantLearningRateH\x00\x12`\n\x1f\x65xponential_decay_learning_rate\x18\x02 \x01(\x0b\x32\x35.object_detection.protos.ExponentialDecayLearningRateH\x00\x12T\n\x19manual_step_learning_rate\x18\x03 \x01(\x0b\x32/.object_detection.protos.ManualStepLearningRateH\x00\x12V\n\x1a\x63osine_decay_learning_rate\x18\x04 \x01(\x0b\x32\x30.object_detection.protos.CosineDecayLearningRateH\x00\x42\x0f\n\rlearning_rate\"4\n\x14\x43onstantLearningRate\x12\x1c\n\rlearning_rate\x18\x01 \x01(\x02:\x05\x30.002\"\x97\x01\n\x1c\x45xponentialDecayLearningRate\x12$\n\x15initial_learning_rate\x18\x01 \x01(\x02:\x05\x30.002\x12\x1c\n\x0b\x64\x65\x63\x61y_steps\x18\x02 \x01(\r:\x07\x34\x30\x30\x30\x30\x30\x30\x12\x1a\n\x0c\x64\x65\x63\x61y_factor\x18\x03 \x01(\x02:\x04\x30.95\x12\x17\n\tstaircase\x18\x04 \x01(\x08:\x04true\"\xda\x01\n\x16ManualStepLearningRate\x12$\n\x15initial_learning_rate\x18\x01 \x01(\x02:\x05\x30.002\x12V\n\x08schedule\x18\x02 \x03(\x0b\x32\x44.object_detection.protos.ManualStepLearningRate.LearningRateSchedule\x1a\x42\n\x14LearningRateSchedule\x12\x0c\n\x04step\x18\x01 \x01(\r\x12\x1c\n\rlearning_rate\x18\x02 \x01(\x02:\x05\x30.002\"\x9d\x01\n\x17\x43osineDecayLearningRate\x12!\n\x12learning_rate_base\x18\x01 \x01(\x02:\x05\x30.002\x12\x1c\n\x0btotal_steps\x18\x02 \x01(\r:\x07\x34\x30\x30\x30\x30\x30\x30\x12$\n\x14warmup_learning_rate\x18\x03 \x01(\x02:\x06\x30.0002\x12\x1b\n\x0cwarmup_steps\x18\x04 \x01(\r:\x05\x31\x30\x30\x30\x30')
-)
-
-
-
-
-_OPTIMIZER = _descriptor.Descriptor(
- name='Optimizer',
- full_name='object_detection.protos.Optimizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='rms_prop_optimizer', full_name='object_detection.protos.Optimizer.rms_prop_optimizer', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='momentum_optimizer', full_name='object_detection.protos.Optimizer.momentum_optimizer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='adam_optimizer', full_name='object_detection.protos.Optimizer.adam_optimizer', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='use_moving_average', full_name='object_detection.protos.Optimizer.use_moving_average', index=3,
- number=4, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='moving_average_decay', full_name='object_detection.protos.Optimizer.moving_average_decay', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.9999),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='optimizer', full_name='object_detection.protos.Optimizer.optimizer',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=69,
- serialized_end=378,
-)
-
-
-_RMSPROPOPTIMIZER = _descriptor.Descriptor(
- name='RMSPropOptimizer',
- full_name='object_detection.protos.RMSPropOptimizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='learning_rate', full_name='object_detection.protos.RMSPropOptimizer.learning_rate', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='momentum_optimizer_value', full_name='object_detection.protos.RMSPropOptimizer.momentum_optimizer_value', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.9),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='decay', full_name='object_detection.protos.RMSPropOptimizer.decay', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.9),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='epsilon', full_name='object_detection.protos.RMSPropOptimizer.epsilon', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=381,
- serialized_end=540,
-)
-
-
-_MOMENTUMOPTIMIZER = _descriptor.Descriptor(
- name='MomentumOptimizer',
- full_name='object_detection.protos.MomentumOptimizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='learning_rate', full_name='object_detection.protos.MomentumOptimizer.learning_rate', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='momentum_optimizer_value', full_name='object_detection.protos.MomentumOptimizer.momentum_optimizer_value', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.9),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=542,
- serialized_end=662,
-)
-
-
-_ADAMOPTIMIZER = _descriptor.Descriptor(
- name='AdamOptimizer',
- full_name='object_detection.protos.AdamOptimizer',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='learning_rate', full_name='object_detection.protos.AdamOptimizer.learning_rate', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=664,
- serialized_end=741,
-)
-
-
-_LEARNINGRATE = _descriptor.Descriptor(
- name='LearningRate',
- full_name='object_detection.protos.LearningRate',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='constant_learning_rate', full_name='object_detection.protos.LearningRate.constant_learning_rate', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='exponential_decay_learning_rate', full_name='object_detection.protos.LearningRate.exponential_decay_learning_rate', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='manual_step_learning_rate', full_name='object_detection.protos.LearningRate.manual_step_learning_rate', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='cosine_decay_learning_rate', full_name='object_detection.protos.LearningRate.cosine_decay_learning_rate', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='learning_rate', full_name='object_detection.protos.LearningRate.learning_rate',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=744,
- serialized_end=1128,
-)
-
-
-_CONSTANTLEARNINGRATE = _descriptor.Descriptor(
- name='ConstantLearningRate',
- full_name='object_detection.protos.ConstantLearningRate',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='learning_rate', full_name='object_detection.protos.ConstantLearningRate.learning_rate', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1130,
- serialized_end=1182,
-)
-
-
-_EXPONENTIALDECAYLEARNINGRATE = _descriptor.Descriptor(
- name='ExponentialDecayLearningRate',
- full_name='object_detection.protos.ExponentialDecayLearningRate',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='initial_learning_rate', full_name='object_detection.protos.ExponentialDecayLearningRate.initial_learning_rate', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='decay_steps', full_name='object_detection.protos.ExponentialDecayLearningRate.decay_steps', index=1,
- number=2, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=4000000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='decay_factor', full_name='object_detection.protos.ExponentialDecayLearningRate.decay_factor', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.95),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='staircase', full_name='object_detection.protos.ExponentialDecayLearningRate.staircase', index=3,
- number=4, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1185,
- serialized_end=1336,
-)
-
-
-_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE = _descriptor.Descriptor(
- name='LearningRateSchedule',
- full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='step', full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule.step', index=0,
- number=1, type=13, cpp_type=3, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='learning_rate', full_name='object_detection.protos.ManualStepLearningRate.LearningRateSchedule.learning_rate', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1491,
- serialized_end=1557,
-)
-
-_MANUALSTEPLEARNINGRATE = _descriptor.Descriptor(
- name='ManualStepLearningRate',
- full_name='object_detection.protos.ManualStepLearningRate',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='initial_learning_rate', full_name='object_detection.protos.ManualStepLearningRate.initial_learning_rate', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='schedule', full_name='object_detection.protos.ManualStepLearningRate.schedule', index=1,
- number=2, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE, ],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1339,
- serialized_end=1557,
-)
-
-
-_COSINEDECAYLEARNINGRATE = _descriptor.Descriptor(
- name='CosineDecayLearningRate',
- full_name='object_detection.protos.CosineDecayLearningRate',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='learning_rate_base', full_name='object_detection.protos.CosineDecayLearningRate.learning_rate_base', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='total_steps', full_name='object_detection.protos.CosineDecayLearningRate.total_steps', index=1,
- number=2, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=4000000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='warmup_learning_rate', full_name='object_detection.protos.CosineDecayLearningRate.warmup_learning_rate', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.0002),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='warmup_steps', full_name='object_detection.protos.CosineDecayLearningRate.warmup_steps', index=3,
- number=4, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=10000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1560,
- serialized_end=1717,
-)
-
-_OPTIMIZER.fields_by_name['rms_prop_optimizer'].message_type = _RMSPROPOPTIMIZER
-_OPTIMIZER.fields_by_name['momentum_optimizer'].message_type = _MOMENTUMOPTIMIZER
-_OPTIMIZER.fields_by_name['adam_optimizer'].message_type = _ADAMOPTIMIZER
-_OPTIMIZER.oneofs_by_name['optimizer'].fields.append(
- _OPTIMIZER.fields_by_name['rms_prop_optimizer'])
-_OPTIMIZER.fields_by_name['rms_prop_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer']
-_OPTIMIZER.oneofs_by_name['optimizer'].fields.append(
- _OPTIMIZER.fields_by_name['momentum_optimizer'])
-_OPTIMIZER.fields_by_name['momentum_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer']
-_OPTIMIZER.oneofs_by_name['optimizer'].fields.append(
- _OPTIMIZER.fields_by_name['adam_optimizer'])
-_OPTIMIZER.fields_by_name['adam_optimizer'].containing_oneof = _OPTIMIZER.oneofs_by_name['optimizer']
-_RMSPROPOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE
-_MOMENTUMOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE
-_ADAMOPTIMIZER.fields_by_name['learning_rate'].message_type = _LEARNINGRATE
-_LEARNINGRATE.fields_by_name['constant_learning_rate'].message_type = _CONSTANTLEARNINGRATE
-_LEARNINGRATE.fields_by_name['exponential_decay_learning_rate'].message_type = _EXPONENTIALDECAYLEARNINGRATE
-_LEARNINGRATE.fields_by_name['manual_step_learning_rate'].message_type = _MANUALSTEPLEARNINGRATE
-_LEARNINGRATE.fields_by_name['cosine_decay_learning_rate'].message_type = _COSINEDECAYLEARNINGRATE
-_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append(
- _LEARNINGRATE.fields_by_name['constant_learning_rate'])
-_LEARNINGRATE.fields_by_name['constant_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate']
-_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append(
- _LEARNINGRATE.fields_by_name['exponential_decay_learning_rate'])
-_LEARNINGRATE.fields_by_name['exponential_decay_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate']
-_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append(
- _LEARNINGRATE.fields_by_name['manual_step_learning_rate'])
-_LEARNINGRATE.fields_by_name['manual_step_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate']
-_LEARNINGRATE.oneofs_by_name['learning_rate'].fields.append(
- _LEARNINGRATE.fields_by_name['cosine_decay_learning_rate'])
-_LEARNINGRATE.fields_by_name['cosine_decay_learning_rate'].containing_oneof = _LEARNINGRATE.oneofs_by_name['learning_rate']
-_MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE.containing_type = _MANUALSTEPLEARNINGRATE
-_MANUALSTEPLEARNINGRATE.fields_by_name['schedule'].message_type = _MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE
-DESCRIPTOR.message_types_by_name['Optimizer'] = _OPTIMIZER
-DESCRIPTOR.message_types_by_name['RMSPropOptimizer'] = _RMSPROPOPTIMIZER
-DESCRIPTOR.message_types_by_name['MomentumOptimizer'] = _MOMENTUMOPTIMIZER
-DESCRIPTOR.message_types_by_name['AdamOptimizer'] = _ADAMOPTIMIZER
-DESCRIPTOR.message_types_by_name['LearningRate'] = _LEARNINGRATE
-DESCRIPTOR.message_types_by_name['ConstantLearningRate'] = _CONSTANTLEARNINGRATE
-DESCRIPTOR.message_types_by_name['ExponentialDecayLearningRate'] = _EXPONENTIALDECAYLEARNINGRATE
-DESCRIPTOR.message_types_by_name['ManualStepLearningRate'] = _MANUALSTEPLEARNINGRATE
-DESCRIPTOR.message_types_by_name['CosineDecayLearningRate'] = _COSINEDECAYLEARNINGRATE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Optimizer = _reflection.GeneratedProtocolMessageType('Optimizer', (_message.Message,), dict(
- DESCRIPTOR = _OPTIMIZER,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Optimizer)
- ))
-_sym_db.RegisterMessage(Optimizer)
-
-RMSPropOptimizer = _reflection.GeneratedProtocolMessageType('RMSPropOptimizer', (_message.Message,), dict(
- DESCRIPTOR = _RMSPROPOPTIMIZER,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RMSPropOptimizer)
- ))
-_sym_db.RegisterMessage(RMSPropOptimizer)
-
-MomentumOptimizer = _reflection.GeneratedProtocolMessageType('MomentumOptimizer', (_message.Message,), dict(
- DESCRIPTOR = _MOMENTUMOPTIMIZER,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.MomentumOptimizer)
- ))
-_sym_db.RegisterMessage(MomentumOptimizer)
-
-AdamOptimizer = _reflection.GeneratedProtocolMessageType('AdamOptimizer', (_message.Message,), dict(
- DESCRIPTOR = _ADAMOPTIMIZER,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.AdamOptimizer)
- ))
-_sym_db.RegisterMessage(AdamOptimizer)
-
-LearningRate = _reflection.GeneratedProtocolMessageType('LearningRate', (_message.Message,), dict(
- DESCRIPTOR = _LEARNINGRATE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.LearningRate)
- ))
-_sym_db.RegisterMessage(LearningRate)
-
-ConstantLearningRate = _reflection.GeneratedProtocolMessageType('ConstantLearningRate', (_message.Message,), dict(
- DESCRIPTOR = _CONSTANTLEARNINGRATE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ConstantLearningRate)
- ))
-_sym_db.RegisterMessage(ConstantLearningRate)
-
-ExponentialDecayLearningRate = _reflection.GeneratedProtocolMessageType('ExponentialDecayLearningRate', (_message.Message,), dict(
- DESCRIPTOR = _EXPONENTIALDECAYLEARNINGRATE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ExponentialDecayLearningRate)
- ))
-_sym_db.RegisterMessage(ExponentialDecayLearningRate)
-
-ManualStepLearningRate = _reflection.GeneratedProtocolMessageType('ManualStepLearningRate', (_message.Message,), dict(
-
- LearningRateSchedule = _reflection.GeneratedProtocolMessageType('LearningRateSchedule', (_message.Message,), dict(
- DESCRIPTOR = _MANUALSTEPLEARNINGRATE_LEARNINGRATESCHEDULE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ManualStepLearningRate.LearningRateSchedule)
- ))
- ,
- DESCRIPTOR = _MANUALSTEPLEARNINGRATE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ManualStepLearningRate)
- ))
-_sym_db.RegisterMessage(ManualStepLearningRate)
-_sym_db.RegisterMessage(ManualStepLearningRate.LearningRateSchedule)
-
-CosineDecayLearningRate = _reflection.GeneratedProtocolMessageType('CosineDecayLearningRate', (_message.Message,), dict(
- DESCRIPTOR = _COSINEDECAYLEARNINGRATE,
- __module__ = 'object_detection.protos.optimizer_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.CosineDecayLearningRate)
- ))
-_sym_db.RegisterMessage(CosineDecayLearningRate)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/pipeline.proto b/object_detection/protos/pipeline.proto
deleted file mode 100644
index 67f4e544..00000000
--- a/object_detection/protos/pipeline.proto
+++ /dev/null
@@ -1,18 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/eval.proto";
-import "object_detection/protos/input_reader.proto";
-import "object_detection/protos/model.proto";
-import "object_detection/protos/train.proto";
-
-// Convenience message for configuring a training and eval pipeline. Allows all
-// of the pipeline parameters to be configured from one file.
-message TrainEvalPipelineConfig {
- optional DetectionModel model = 1;
- optional TrainConfig train_config = 2;
- optional InputReader train_input_reader = 3;
- optional EvalConfig eval_config = 4;
- optional InputReader eval_input_reader = 5;
-}
diff --git a/object_detection/protos/pipeline_pb2.py b/object_detection/protos/pipeline_pb2.py
deleted file mode 100644
index d9a7073f..00000000
--- a/object_detection/protos/pipeline_pb2.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/pipeline.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import eval_pb2 as object__detection_dot_protos_dot_eval__pb2
-from object_detection.protos import input_reader_pb2 as object__detection_dot_protos_dot_input__reader__pb2
-from object_detection.protos import model_pb2 as object__detection_dot_protos_dot_model__pb2
-from object_detection.protos import train_pb2 as object__detection_dot_protos_dot_train__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/pipeline.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n&object_detection/protos/pipeline.proto\x12\x17object_detection.protos\x1a\"object_detection/protos/eval.proto\x1a*object_detection/protos/input_reader.proto\x1a#object_detection/protos/model.proto\x1a#object_detection/protos/train.proto\"\xca\x02\n\x17TrainEvalPipelineConfig\x12\x36\n\x05model\x18\x01 \x01(\x0b\x32\'.object_detection.protos.DetectionModel\x12:\n\x0ctrain_config\x18\x02 \x01(\x0b\x32$.object_detection.protos.TrainConfig\x12@\n\x12train_input_reader\x18\x03 \x01(\x0b\x32$.object_detection.protos.InputReader\x12\x38\n\x0b\x65val_config\x18\x04 \x01(\x0b\x32#.object_detection.protos.EvalConfig\x12?\n\x11\x65val_input_reader\x18\x05 \x01(\x0b\x32$.object_detection.protos.InputReader')
- ,
- dependencies=[object__detection_dot_protos_dot_eval__pb2.DESCRIPTOR,object__detection_dot_protos_dot_input__reader__pb2.DESCRIPTOR,object__detection_dot_protos_dot_model__pb2.DESCRIPTOR,object__detection_dot_protos_dot_train__pb2.DESCRIPTOR,])
-
-
-
-
-_TRAINEVALPIPELINECONFIG = _descriptor.Descriptor(
- name='TrainEvalPipelineConfig',
- full_name='object_detection.protos.TrainEvalPipelineConfig',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='model', full_name='object_detection.protos.TrainEvalPipelineConfig.model', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='train_config', full_name='object_detection.protos.TrainEvalPipelineConfig.train_config', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='train_input_reader', full_name='object_detection.protos.TrainEvalPipelineConfig.train_input_reader', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='eval_config', full_name='object_detection.protos.TrainEvalPipelineConfig.eval_config', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='eval_input_reader', full_name='object_detection.protos.TrainEvalPipelineConfig.eval_input_reader', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=222,
- serialized_end=552,
-)
-
-_TRAINEVALPIPELINECONFIG.fields_by_name['model'].message_type = object__detection_dot_protos_dot_model__pb2._DETECTIONMODEL
-_TRAINEVALPIPELINECONFIG.fields_by_name['train_config'].message_type = object__detection_dot_protos_dot_train__pb2._TRAINCONFIG
-_TRAINEVALPIPELINECONFIG.fields_by_name['train_input_reader'].message_type = object__detection_dot_protos_dot_input__reader__pb2._INPUTREADER
-_TRAINEVALPIPELINECONFIG.fields_by_name['eval_config'].message_type = object__detection_dot_protos_dot_eval__pb2._EVALCONFIG
-_TRAINEVALPIPELINECONFIG.fields_by_name['eval_input_reader'].message_type = object__detection_dot_protos_dot_input__reader__pb2._INPUTREADER
-DESCRIPTOR.message_types_by_name['TrainEvalPipelineConfig'] = _TRAINEVALPIPELINECONFIG
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-TrainEvalPipelineConfig = _reflection.GeneratedProtocolMessageType('TrainEvalPipelineConfig', (_message.Message,), dict(
- DESCRIPTOR = _TRAINEVALPIPELINECONFIG,
- __module__ = 'object_detection.protos.pipeline_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.TrainEvalPipelineConfig)
- ))
-_sym_db.RegisterMessage(TrainEvalPipelineConfig)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/post_processing.proto b/object_detection/protos/post_processing.proto
deleted file mode 100644
index bbd69cde..00000000
--- a/object_detection/protos/post_processing.proto
+++ /dev/null
@@ -1,46 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for non-max-suppression operation on a batch of
-// detections.
-message BatchNonMaxSuppression {
- // Scalar threshold for score (low scoring boxes are removed).
- optional float score_threshold = 1 [default = 0.0];
-
- // Scalar threshold for IOU (boxes that have high IOU overlap
- // with previously selected boxes are removed).
- optional float iou_threshold = 2 [default = 0.6];
-
- // Maximum number of detections to retain per class.
- optional int32 max_detections_per_class = 3 [default = 100];
-
- // Maximum number of detections to retain across all classes.
- optional int32 max_total_detections = 5 [default = 100];
-}
-
-// Configuration proto for post-processing predicted boxes and
-// scores.
-message PostProcessing {
- // Non max suppression parameters.
- optional BatchNonMaxSuppression batch_non_max_suppression = 1;
-
- // Enum to specify how to convert the detection scores.
- enum ScoreConverter {
- // Input scores equals output scores.
- IDENTITY = 0;
-
- // Applies a sigmoid on input scores.
- SIGMOID = 1;
-
- // Applies a softmax on input scores
- SOFTMAX = 2;
- }
-
- // Score converter to use.
- optional ScoreConverter score_converter = 2 [default = IDENTITY];
- // Scale logit (input) value before conversion in post-processing step.
- // Typically used for softmax distillation, though can be used to scale for
- // other reasons.
- optional float logit_scale = 3 [default = 1.0];
-}
diff --git a/object_detection/protos/post_processing_pb2.py b/object_detection/protos/post_processing_pb2.py
deleted file mode 100644
index 4001956a..00000000
--- a/object_detection/protos/post_processing_pb2.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/post_processing.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/post_processing.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n-object_detection/protos/post_processing.proto\x12\x17object_detection.protos\"\x9a\x01\n\x16\x42\x61tchNonMaxSuppression\x12\x1a\n\x0fscore_threshold\x18\x01 \x01(\x02:\x01\x30\x12\x1a\n\riou_threshold\x18\x02 \x01(\x02:\x03\x30.6\x12%\n\x18max_detections_per_class\x18\x03 \x01(\x05:\x03\x31\x30\x30\x12!\n\x14max_total_detections\x18\x05 \x01(\x05:\x03\x31\x30\x30\"\x91\x02\n\x0ePostProcessing\x12R\n\x19\x62\x61tch_non_max_suppression\x18\x01 \x01(\x0b\x32/.object_detection.protos.BatchNonMaxSuppression\x12Y\n\x0fscore_converter\x18\x02 \x01(\x0e\x32\x36.object_detection.protos.PostProcessing.ScoreConverter:\x08IDENTITY\x12\x16\n\x0blogit_scale\x18\x03 \x01(\x02:\x01\x31\"8\n\x0eScoreConverter\x12\x0c\n\x08IDENTITY\x10\x00\x12\x0b\n\x07SIGMOID\x10\x01\x12\x0b\n\x07SOFTMAX\x10\x02')
-)
-
-
-
-_POSTPROCESSING_SCORECONVERTER = _descriptor.EnumDescriptor(
- name='ScoreConverter',
- full_name='object_detection.protos.PostProcessing.ScoreConverter',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='IDENTITY', index=0, number=0,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='SIGMOID', index=1, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='SOFTMAX', index=2, number=2,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=449,
- serialized_end=505,
-)
-_sym_db.RegisterEnumDescriptor(_POSTPROCESSING_SCORECONVERTER)
-
-
-_BATCHNONMAXSUPPRESSION = _descriptor.Descriptor(
- name='BatchNonMaxSuppression',
- full_name='object_detection.protos.BatchNonMaxSuppression',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='score_threshold', full_name='object_detection.protos.BatchNonMaxSuppression.score_threshold', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='iou_threshold', full_name='object_detection.protos.BatchNonMaxSuppression.iou_threshold', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.6),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_detections_per_class', full_name='object_detection.protos.BatchNonMaxSuppression.max_detections_per_class', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=100,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_total_detections', full_name='object_detection.protos.BatchNonMaxSuppression.max_total_detections', index=3,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=100,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=75,
- serialized_end=229,
-)
-
-
-_POSTPROCESSING = _descriptor.Descriptor(
- name='PostProcessing',
- full_name='object_detection.protos.PostProcessing',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='batch_non_max_suppression', full_name='object_detection.protos.PostProcessing.batch_non_max_suppression', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='score_converter', full_name='object_detection.protos.PostProcessing.score_converter', index=1,
- number=2, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='logit_scale', full_name='object_detection.protos.PostProcessing.logit_scale', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- _POSTPROCESSING_SCORECONVERTER,
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=232,
- serialized_end=505,
-)
-
-_POSTPROCESSING.fields_by_name['batch_non_max_suppression'].message_type = _BATCHNONMAXSUPPRESSION
-_POSTPROCESSING.fields_by_name['score_converter'].enum_type = _POSTPROCESSING_SCORECONVERTER
-_POSTPROCESSING_SCORECONVERTER.containing_type = _POSTPROCESSING
-DESCRIPTOR.message_types_by_name['BatchNonMaxSuppression'] = _BATCHNONMAXSUPPRESSION
-DESCRIPTOR.message_types_by_name['PostProcessing'] = _POSTPROCESSING
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-BatchNonMaxSuppression = _reflection.GeneratedProtocolMessageType('BatchNonMaxSuppression', (_message.Message,), dict(
- DESCRIPTOR = _BATCHNONMAXSUPPRESSION,
- __module__ = 'object_detection.protos.post_processing_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.BatchNonMaxSuppression)
- ))
-_sym_db.RegisterMessage(BatchNonMaxSuppression)
-
-PostProcessing = _reflection.GeneratedProtocolMessageType('PostProcessing', (_message.Message,), dict(
- DESCRIPTOR = _POSTPROCESSING,
- __module__ = 'object_detection.protos.post_processing_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.PostProcessing)
- ))
-_sym_db.RegisterMessage(PostProcessing)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/preprocessor.proto b/object_detection/protos/preprocessor.proto
deleted file mode 100644
index fcfb450a..00000000
--- a/object_detection/protos/preprocessor.proto
+++ /dev/null
@@ -1,405 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Message for defining a preprocessing operation on input data.
-// See: //object_detection/core/preprocessor.py
-message PreprocessingStep {
- oneof preprocessing_step {
- NormalizeImage normalize_image = 1;
- RandomHorizontalFlip random_horizontal_flip = 2;
- RandomPixelValueScale random_pixel_value_scale = 3;
- RandomImageScale random_image_scale = 4;
- RandomRGBtoGray random_rgb_to_gray = 5;
- RandomAdjustBrightness random_adjust_brightness = 6;
- RandomAdjustContrast random_adjust_contrast = 7;
- RandomAdjustHue random_adjust_hue = 8;
- RandomAdjustSaturation random_adjust_saturation = 9;
- RandomDistortColor random_distort_color = 10;
- RandomJitterBoxes random_jitter_boxes = 11;
- RandomCropImage random_crop_image = 12;
- RandomPadImage random_pad_image = 13;
- RandomCropPadImage random_crop_pad_image = 14;
- RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
- RandomBlackPatches random_black_patches = 16;
- RandomResizeMethod random_resize_method = 17;
- ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
- ResizeImage resize_image = 19;
- SubtractChannelMean subtract_channel_mean = 20;
- SSDRandomCrop ssd_random_crop = 21;
- SSDRandomCropPad ssd_random_crop_pad = 22;
- SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
- SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24;
- RandomVerticalFlip random_vertical_flip = 25;
- RandomRotation90 random_rotation90 = 26;
- }
-}
-
-// Normalizes pixel values in an image.
-// For every channel in the image, moves the pixel values from the range
-// [original_minval, original_maxval] to [target_minval, target_maxval].
-message NormalizeImage {
- optional float original_minval = 1;
- optional float original_maxval = 2;
- optional float target_minval = 3 [default=0];
- optional float target_maxval = 4 [default=1];
-}
-
-// Randomly horizontally flips the image and detections 50% of the time.
-message RandomHorizontalFlip {
- // Specifies a mapping from the original keypoint indices to horizontally
- // flipped indices. This is used in the event that keypoints are specified,
- // in which case when the image is horizontally flipped the keypoints will
- // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
- // nose_tip, mouth, left_ear, right_ear (in that order), one might specify
- // the keypoint_flip_permutation below:
- // keypoint_flip_permutation: 1
- // keypoint_flip_permutation: 0
- // keypoint_flip_permutation: 2
- // keypoint_flip_permutation: 3
- // keypoint_flip_permutation: 5
- // keypoint_flip_permutation: 4
- repeated int32 keypoint_flip_permutation = 1;
-}
-
-// Randomly vertically flips the image and detections 50% of the time.
-message RandomVerticalFlip {
- // Specifies a mapping from the original keypoint indices to vertically
- // flipped indices. This is used in the event that keypoints are specified,
- // in which case when the image is vertically flipped the keypoints will
- // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
- // nose_tip, mouth, left_ear, right_ear (in that order), one might specify
- // the keypoint_flip_permutation below:
- // keypoint_flip_permutation: 1
- // keypoint_flip_permutation: 0
- // keypoint_flip_permutation: 2
- // keypoint_flip_permutation: 3
- // keypoint_flip_permutation: 5
- // keypoint_flip_permutation: 4
- repeated int32 keypoint_flip_permutation = 1;
-}
-
-// Randomly rotates the image and detections by 90 degrees counter-clockwise
-// 50% of the time.
-message RandomRotation90 {}
-
-// Randomly scales the values of all pixels in the image by some constant value
-// between [minval, maxval], then clip the value to a range between [0, 1.0].
-message RandomPixelValueScale {
- optional float minval = 1 [default=0.9];
- optional float maxval = 2 [default=1.1];
-}
-
-// Randomly enlarges or shrinks image (keeping aspect ratio).
-message RandomImageScale {
- optional float min_scale_ratio = 1 [default=0.5];
- optional float max_scale_ratio = 2 [default=2.0];
-}
-
-// Randomly convert entire image to grey scale.
-message RandomRGBtoGray {
- optional float probability = 1 [default=0.1];
-}
-
-// Randomly changes image brightness by up to max_delta. Image outputs will be
-// saturated between 0 and 1.
-message RandomAdjustBrightness {
- optional float max_delta=1 [default=0.2];
-}
-
-// Randomly scales contract by a value between [min_delta, max_delta].
-message RandomAdjustContrast {
- optional float min_delta = 1 [default=0.8];
- optional float max_delta = 2 [default=1.25];
-}
-
-// Randomly alters hue by a value of up to max_delta.
-message RandomAdjustHue {
- optional float max_delta = 1 [default=0.02];
-}
-
-// Randomly changes saturation by a value between [min_delta, max_delta].
-message RandomAdjustSaturation {
- optional float min_delta = 1 [default=0.8];
- optional float max_delta = 2 [default=1.25];
-}
-
-// Performs a random color distortion. color_orderings should either be 0 or 1.
-message RandomDistortColor {
- optional int32 color_ordering = 1;
-}
-
-// Randomly jitters corners of boxes in the image determined by ratio.
-// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
-message RandomJitterBoxes {
- optional float ratio = 1 [default=0.05];
-}
-
-// Randomly crops the image and bounding boxes.
-message RandomCropImage {
- // Cropped image must cover at least one box by this fraction.
- optional float min_object_covered = 1 [default=1.0];
-
- // Aspect ratio bounds of cropped image.
- optional float min_aspect_ratio = 2 [default=0.75];
- optional float max_aspect_ratio = 3 [default=1.33];
-
- // Allowed area ratio of cropped image to original image.
- optional float min_area = 4 [default=0.1];
- optional float max_area = 5 [default=1.0];
-
- // Minimum overlap threshold of cropped boxes to keep in new image. If the
- // ratio between a cropped bounding box and the original is less than this
- // value, it is removed from the new image.
- optional float overlap_thresh = 6 [default=0.3];
-
- // Probability of keeping the original image.
- optional float random_coef = 7 [default=0.0];
-}
-
-// Randomly adds padding to the image.
-message RandomPadImage {
- // Minimum dimensions for padded image. If unset, will use original image
- // dimension as a lower bound.
- optional float min_image_height = 1;
- optional float min_image_width = 2;
-
- // Maximum dimensions for padded image. If unset, will use double the original
- // image dimension as a lower bound.
- optional float max_image_height = 3;
- optional float max_image_width = 4;
-
- // Color of the padding. If unset, will pad using average color of the input
- // image.
- repeated float pad_color = 5;
-}
-
-// Randomly crops an image followed by a random pad.
-message RandomCropPadImage {
- // Cropping operation must cover at least one box by this fraction.
- optional float min_object_covered = 1 [default=1.0];
-
- // Aspect ratio bounds of image after cropping operation.
- optional float min_aspect_ratio = 2 [default=0.75];
- optional float max_aspect_ratio = 3 [default=1.33];
-
- // Allowed area ratio of image after cropping operation.
- optional float min_area = 4 [default=0.1];
- optional float max_area = 5 [default=1.0];
-
- // Minimum overlap threshold of cropped boxes to keep in new image. If the
- // ratio between a cropped bounding box and the original is less than this
- // value, it is removed from the new image.
- optional float overlap_thresh = 6 [default=0.3];
-
- // Probability of keeping the original image during the crop operation.
- optional float random_coef = 7 [default=0.0];
-
- // Maximum dimensions for padded image. If unset, will use double the original
- // image dimension as a lower bound. Both of the following fields should be
- // length 2.
- repeated float min_padded_size_ratio = 8;
- repeated float max_padded_size_ratio = 9;
-
- // Color of the padding. If unset, will pad using average color of the input
- // image.
- repeated float pad_color = 10;
-}
-
-// Randomly crops an iamge to a given aspect ratio.
-message RandomCropToAspectRatio {
- // Aspect ratio.
- optional float aspect_ratio = 1 [default=1.0];
-
- // Minimum overlap threshold of cropped boxes to keep in new image. If the
- // ratio between a cropped bounding box and the original is less than this
- // value, it is removed from the new image.
- optional float overlap_thresh = 2 [default=0.3];
-}
-
-// Randomly adds black square patches to an image.
-message RandomBlackPatches {
- // The maximum number of black patches to add.
- optional int32 max_black_patches = 1 [default=10];
-
- // The probability of a black patch being added to an image.
- optional float probability = 2 [default=0.5];
-
- // Ratio between the dimension of the black patch to the minimum dimension of
- // the image (patch_width = patch_height = min(image_height, image_width)).
- optional float size_to_image_ratio = 3 [default=0.1];
-}
-
-// Randomly resizes the image up to [target_height, target_width].
-message RandomResizeMethod {
- optional float target_height = 1;
- optional float target_width = 2;
-}
-
-// Scales boxes from normalized coordinates to pixel coordinates.
-message ScaleBoxesToPixelCoordinates {
-}
-
-// Resizes images to [new_height, new_width].
-message ResizeImage {
- optional int32 new_height = 1;
- optional int32 new_width = 2;
- enum Method {
- AREA=1;
- BICUBIC=2;
- BILINEAR=3;
- NEAREST_NEIGHBOR=4;
- }
- optional Method method = 3 [default=BILINEAR];
-}
-
-// Normalizes an image by subtracting a mean from each channel.
-message SubtractChannelMean {
- // The mean to subtract from each channel. Should be of same dimension of
- // channels in the input image.
- repeated float means = 1;
-}
-
-message SSDRandomCropOperation {
- // Cropped image must cover at least this fraction of one original bounding
- // box.
- optional float min_object_covered = 1;
-
- // The aspect ratio of the cropped image must be within the range of
- // [min_aspect_ratio, max_aspect_ratio].
- optional float min_aspect_ratio = 2;
- optional float max_aspect_ratio = 3;
-
- // The area of the cropped image must be within the range of
- // [min_area, max_area].
- optional float min_area = 4;
- optional float max_area = 5;
-
- // Cropped box area ratio must be above this threhold to be kept.
- optional float overlap_thresh = 6;
-
- // Probability a crop operation is skipped.
- optional float random_coef = 7;
-}
-
-// Randomly crops a image according to:
-// Liu et al., SSD: Single shot multibox detector.
-// This preprocessing step defines multiple SSDRandomCropOperations. Only one
-// operation (chosen at random) is actually performed on an image.
-message SSDRandomCrop {
- repeated SSDRandomCropOperation operations = 1;
-}
-
-message SSDRandomCropPadOperation {
- // Cropped image must cover at least this fraction of one original bounding
- // box.
- optional float min_object_covered = 1;
-
- // The aspect ratio of the cropped image must be within the range of
- // [min_aspect_ratio, max_aspect_ratio].
- optional float min_aspect_ratio = 2;
- optional float max_aspect_ratio = 3;
-
- // The area of the cropped image must be within the range of
- // [min_area, max_area].
- optional float min_area = 4;
- optional float max_area = 5;
-
- // Cropped box area ratio must be above this threhold to be kept.
- optional float overlap_thresh = 6;
-
- // Probability a crop operation is skipped.
- optional float random_coef = 7;
-
- // Min ratio of padded image height and width to the input image's height and
- // width. Two entries per operation.
- repeated float min_padded_size_ratio = 8;
-
- // Max ratio of padded image height and width to the input image's height and
- // width. Two entries per operation.
- repeated float max_padded_size_ratio = 9;
-
- // Padding color.
- optional float pad_color_r = 10;
- optional float pad_color_g = 11;
- optional float pad_color_b = 12;
-}
-
-// Randomly crops and pads an image according to:
-// Liu et al., SSD: Single shot multibox detector.
-// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
-// operation (chosen at random) is actually performed on an image.
-message SSDRandomCropPad {
- repeated SSDRandomCropPadOperation operations = 1;
-}
-
-message SSDRandomCropFixedAspectRatioOperation {
- // Cropped image must cover at least this fraction of one original bounding
- // box.
- optional float min_object_covered = 1;
-
- // The area of the cropped image must be within the range of
- // [min_area, max_area].
- optional float min_area = 4;
- optional float max_area = 5;
-
- // Cropped box area ratio must be above this threhold to be kept.
- optional float overlap_thresh = 6;
-
- // Probability a crop operation is skipped.
- optional float random_coef = 7;
-}
-
-// Randomly crops a image to a fixed aspect ratio according to:
-// Liu et al., SSD: Single shot multibox detector.
-// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
-// preprocessing step. Only one operation (chosen at random) is actually
-// performed on an image.
-message SSDRandomCropFixedAspectRatio {
- repeated SSDRandomCropFixedAspectRatioOperation operations = 1;
-
- // Aspect ratio to crop to. This value is used for all crop operations.
- optional float aspect_ratio = 2 [default=1.0];
-}
-
-message SSDRandomCropPadFixedAspectRatioOperation {
- // Cropped image must cover at least this fraction of one original bounding
- // box.
- optional float min_object_covered = 1;
-
- // The aspect ratio of the cropped image must be within the range of
- // [min_aspect_ratio, max_aspect_ratio].
- optional float min_aspect_ratio = 2;
- optional float max_aspect_ratio = 3;
-
- // The area of the cropped image must be within the range of
- // [min_area, max_area].
- optional float min_area = 4;
- optional float max_area = 5;
-
- // Cropped box area ratio must be above this threhold to be kept.
- optional float overlap_thresh = 6;
-
- // Probability a crop operation is skipped.
- optional float random_coef = 7;
-
- // Min ratio of padded image height and width to the input image's height and
- // width. Two entries per operation.
- repeated float min_padded_size_ratio = 8;
-
- // Max ratio of padded image height and width to the input image's height and
- // width. Two entries per operation.
- repeated float max_padded_size_ratio = 9;
-}
-
-// Randomly crops and pads an image to a fixed aspect ratio according to:
-// Liu et al., SSD: Single shot multibox detector.
-// Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this
-// preprocessing step. Only one operation (chosen at random) is actually
-// performed on an image.
-message SSDRandomCropPadFixedAspectRatio {
- repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1;
-
- // Aspect ratio to pad to. This value is used for all crop and pad operations.
- optional float aspect_ratio = 2 [default=1.0];
-}
diff --git a/object_detection/protos/preprocessor_pb2.py b/object_detection/protos/preprocessor_pb2.py
deleted file mode 100644
index 40a0d576..00000000
--- a/object_detection/protos/preprocessor_pb2.py
+++ /dev/null
@@ -1,1985 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/preprocessor.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/preprocessor.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n*object_detection/protos/preprocessor.proto\x12\x17object_detection.protos\"\xaf\x10\n\x11PreprocessingStep\x12\x42\n\x0fnormalize_image\x18\x01 \x01(\x0b\x32\'.object_detection.protos.NormalizeImageH\x00\x12O\n\x16random_horizontal_flip\x18\x02 \x01(\x0b\x32-.object_detection.protos.RandomHorizontalFlipH\x00\x12R\n\x18random_pixel_value_scale\x18\x03 \x01(\x0b\x32..object_detection.protos.RandomPixelValueScaleH\x00\x12G\n\x12random_image_scale\x18\x04 \x01(\x0b\x32).object_detection.protos.RandomImageScaleH\x00\x12\x46\n\x12random_rgb_to_gray\x18\x05 \x01(\x0b\x32(.object_detection.protos.RandomRGBtoGrayH\x00\x12S\n\x18random_adjust_brightness\x18\x06 \x01(\x0b\x32/.object_detection.protos.RandomAdjustBrightnessH\x00\x12O\n\x16random_adjust_contrast\x18\x07 \x01(\x0b\x32-.object_detection.protos.RandomAdjustContrastH\x00\x12\x45\n\x11random_adjust_hue\x18\x08 \x01(\x0b\x32(.object_detection.protos.RandomAdjustHueH\x00\x12S\n\x18random_adjust_saturation\x18\t \x01(\x0b\x32/.object_detection.protos.RandomAdjustSaturationH\x00\x12K\n\x14random_distort_color\x18\n \x01(\x0b\x32+.object_detection.protos.RandomDistortColorH\x00\x12I\n\x13random_jitter_boxes\x18\x0b \x01(\x0b\x32*.object_detection.protos.RandomJitterBoxesH\x00\x12\x45\n\x11random_crop_image\x18\x0c \x01(\x0b\x32(.object_detection.protos.RandomCropImageH\x00\x12\x43\n\x10random_pad_image\x18\r \x01(\x0b\x32\'.object_detection.protos.RandomPadImageH\x00\x12L\n\x15random_crop_pad_image\x18\x0e \x01(\x0b\x32+.object_detection.protos.RandomCropPadImageH\x00\x12W\n\x1brandom_crop_to_aspect_ratio\x18\x0f \x01(\x0b\x32\x30.object_detection.protos.RandomCropToAspectRatioH\x00\x12K\n\x14random_black_patches\x18\x10 \x01(\x0b\x32+.object_detection.protos.RandomBlackPatchesH\x00\x12K\n\x14random_resize_method\x18\x11 \x01(\x0b\x32+.object_detection.protos.RandomResizeMethodH\x00\x12\x61\n scale_boxes_to_pixel_coordinates\x18\x12 \x01(\x0b\x32\x35.object_detection.protos.ScaleBoxesToPixelCoordinatesH\x00\x12<\n\x0cresize_image\x18\x13 \x01(\x0b\x32$.object_detection.protos.ResizeImageH\x00\x12M\n\x15subtract_channel_mean\x18\x14 \x01(\x0b\x32,.object_detection.protos.SubtractChannelMeanH\x00\x12\x41\n\x0fssd_random_crop\x18\x15 \x01(\x0b\x32&.object_detection.protos.SSDRandomCropH\x00\x12H\n\x13ssd_random_crop_pad\x18\x16 \x01(\x0b\x32).object_detection.protos.SSDRandomCropPadH\x00\x12\x64\n\"ssd_random_crop_fixed_aspect_ratio\x18\x17 \x01(\x0b\x32\x36.object_detection.protos.SSDRandomCropFixedAspectRatioH\x00\x12k\n&ssd_random_crop_pad_fixed_aspect_ratio\x18\x18 \x01(\x0b\x32\x39.object_detection.protos.SSDRandomCropPadFixedAspectRatioH\x00\x12K\n\x14random_vertical_flip\x18\x19 \x01(\x0b\x32+.object_detection.protos.RandomVerticalFlipH\x00\x12\x46\n\x11random_rotation90\x18\x1a \x01(\x0b\x32).object_detection.protos.RandomRotation90H\x00\x42\x14\n\x12preprocessing_step\"v\n\x0eNormalizeImage\x12\x17\n\x0foriginal_minval\x18\x01 \x01(\x02\x12\x17\n\x0foriginal_maxval\x18\x02 \x01(\x02\x12\x18\n\rtarget_minval\x18\x03 \x01(\x02:\x01\x30\x12\x18\n\rtarget_maxval\x18\x04 \x01(\x02:\x01\x31\"9\n\x14RandomHorizontalFlip\x12!\n\x19keypoint_flip_permutation\x18\x01 \x03(\x05\"7\n\x12RandomVerticalFlip\x12!\n\x19keypoint_flip_permutation\x18\x01 \x03(\x05\"\x12\n\x10RandomRotation90\"A\n\x15RandomPixelValueScale\x12\x13\n\x06minval\x18\x01 \x01(\x02:\x03\x30.9\x12\x13\n\x06maxval\x18\x02 \x01(\x02:\x03\x31.1\"L\n\x10RandomImageScale\x12\x1c\n\x0fmin_scale_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x1a\n\x0fmax_scale_ratio\x18\x02 \x01(\x02:\x01\x32\"+\n\x0fRandomRGBtoGray\x12\x18\n\x0bprobability\x18\x01 \x01(\x02:\x03\x30.1\"0\n\x16RandomAdjustBrightness\x12\x16\n\tmax_delta\x18\x01 \x01(\x02:\x03\x30.2\"G\n\x14RandomAdjustContrast\x12\x16\n\tmin_delta\x18\x01 \x01(\x02:\x03\x30.8\x12\x17\n\tmax_delta\x18\x02 \x01(\x02:\x04\x31.25\"*\n\x0fRandomAdjustHue\x12\x17\n\tmax_delta\x18\x01 \x01(\x02:\x04\x30.02\"I\n\x16RandomAdjustSaturation\x12\x16\n\tmin_delta\x18\x01 \x01(\x02:\x03\x30.8\x12\x17\n\tmax_delta\x18\x02 \x01(\x02:\x04\x31.25\",\n\x12RandomDistortColor\x12\x16\n\x0e\x63olor_ordering\x18\x01 \x01(\x05\"(\n\x11RandomJitterBoxes\x12\x13\n\x05ratio\x18\x01 \x01(\x02:\x04\x30.05\"\xd1\x01\n\x0fRandomCropImage\x12\x1d\n\x12min_object_covered\x18\x01 \x01(\x02:\x01\x31\x12\x1e\n\x10min_aspect_ratio\x18\x02 \x01(\x02:\x04\x30.75\x12\x1e\n\x10max_aspect_ratio\x18\x03 \x01(\x02:\x04\x31.33\x12\x15\n\x08min_area\x18\x04 \x01(\x02:\x03\x30.1\x12\x13\n\x08max_area\x18\x05 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x06 \x01(\x02:\x03\x30.3\x12\x16\n\x0brandom_coef\x18\x07 \x01(\x02:\x01\x30\"\x89\x01\n\x0eRandomPadImage\x12\x18\n\x10min_image_height\x18\x01 \x01(\x02\x12\x17\n\x0fmin_image_width\x18\x02 \x01(\x02\x12\x18\n\x10max_image_height\x18\x03 \x01(\x02\x12\x17\n\x0fmax_image_width\x18\x04 \x01(\x02\x12\x11\n\tpad_color\x18\x05 \x03(\x02\"\xa5\x02\n\x12RandomCropPadImage\x12\x1d\n\x12min_object_covered\x18\x01 \x01(\x02:\x01\x31\x12\x1e\n\x10min_aspect_ratio\x18\x02 \x01(\x02:\x04\x30.75\x12\x1e\n\x10max_aspect_ratio\x18\x03 \x01(\x02:\x04\x31.33\x12\x15\n\x08min_area\x18\x04 \x01(\x02:\x03\x30.1\x12\x13\n\x08max_area\x18\x05 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x06 \x01(\x02:\x03\x30.3\x12\x16\n\x0brandom_coef\x18\x07 \x01(\x02:\x01\x30\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\x12\x11\n\tpad_color\x18\n \x03(\x02\"O\n\x17RandomCropToAspectRatio\x12\x17\n\x0c\x61spect_ratio\x18\x01 \x01(\x02:\x01\x31\x12\x1b\n\x0eoverlap_thresh\x18\x02 \x01(\x02:\x03\x30.3\"o\n\x12RandomBlackPatches\x12\x1d\n\x11max_black_patches\x18\x01 \x01(\x05:\x02\x31\x30\x12\x18\n\x0bprobability\x18\x02 \x01(\x02:\x03\x30.5\x12 \n\x13size_to_image_ratio\x18\x03 \x01(\x02:\x03\x30.1\"A\n\x12RandomResizeMethod\x12\x15\n\rtarget_height\x18\x01 \x01(\x02\x12\x14\n\x0ctarget_width\x18\x02 \x01(\x02\"\x1e\n\x1cScaleBoxesToPixelCoordinates\"\xc0\x01\n\x0bResizeImage\x12\x12\n\nnew_height\x18\x01 \x01(\x05\x12\x11\n\tnew_width\x18\x02 \x01(\x05\x12\x45\n\x06method\x18\x03 \x01(\x0e\x32+.object_detection.protos.ResizeImage.Method:\x08\x42ILINEAR\"C\n\x06Method\x12\x08\n\x04\x41REA\x10\x01\x12\x0b\n\x07\x42ICUBIC\x10\x02\x12\x0c\n\x08\x42ILINEAR\x10\x03\x12\x14\n\x10NEAREST_NEIGHBOR\x10\x04\"$\n\x13SubtractChannelMean\x12\r\n\x05means\x18\x01 \x03(\x02\"\xb9\x01\n\x16SSDRandomCropOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\"T\n\rSSDRandomCrop\x12\x43\n\noperations\x18\x01 \x03(\x0b\x32/.object_detection.protos.SSDRandomCropOperation\"\xb9\x02\n\x19SSDRandomCropPadOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\x12\x13\n\x0bpad_color_r\x18\n \x01(\x02\x12\x13\n\x0bpad_color_g\x18\x0b \x01(\x02\x12\x13\n\x0bpad_color_b\x18\x0c \x01(\x02\"Z\n\x10SSDRandomCropPad\x12\x46\n\noperations\x18\x01 \x03(\x0b\x32\x32.object_detection.protos.SSDRandomCropPadOperation\"\x95\x01\n&SSDRandomCropFixedAspectRatioOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\"\x8d\x01\n\x1dSSDRandomCropFixedAspectRatio\x12S\n\noperations\x18\x01 \x03(\x0b\x32?.object_detection.protos.SSDRandomCropFixedAspectRatioOperation\x12\x17\n\x0c\x61spect_ratio\x18\x02 \x01(\x02:\x01\x31\"\x8a\x02\n)SSDRandomCropPadFixedAspectRatioOperation\x12\x1a\n\x12min_object_covered\x18\x01 \x01(\x02\x12\x18\n\x10min_aspect_ratio\x18\x02 \x01(\x02\x12\x18\n\x10max_aspect_ratio\x18\x03 \x01(\x02\x12\x10\n\x08min_area\x18\x04 \x01(\x02\x12\x10\n\x08max_area\x18\x05 \x01(\x02\x12\x16\n\x0eoverlap_thresh\x18\x06 \x01(\x02\x12\x13\n\x0brandom_coef\x18\x07 \x01(\x02\x12\x1d\n\x15min_padded_size_ratio\x18\x08 \x03(\x02\x12\x1d\n\x15max_padded_size_ratio\x18\t \x03(\x02\"\x93\x01\n SSDRandomCropPadFixedAspectRatio\x12V\n\noperations\x18\x01 \x03(\x0b\x32\x42.object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation\x12\x17\n\x0c\x61spect_ratio\x18\x02 \x01(\x02:\x01\x31')
-)
-
-
-
-_RESIZEIMAGE_METHOD = _descriptor.EnumDescriptor(
- name='Method',
- full_name='object_detection.protos.ResizeImage.Method',
- filename=None,
- file=DESCRIPTOR,
- values=[
- _descriptor.EnumValueDescriptor(
- name='AREA', index=0, number=1,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='BICUBIC', index=1, number=2,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='BILINEAR', index=2, number=3,
- options=None,
- type=None),
- _descriptor.EnumValueDescriptor(
- name='NEAREST_NEIGHBOR', index=3, number=4,
- options=None,
- type=None),
- ],
- containing_type=None,
- options=None,
- serialized_start=4012,
- serialized_end=4079,
-)
-_sym_db.RegisterEnumDescriptor(_RESIZEIMAGE_METHOD)
-
-
-_PREPROCESSINGSTEP = _descriptor.Descriptor(
- name='PreprocessingStep',
- full_name='object_detection.protos.PreprocessingStep',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='normalize_image', full_name='object_detection.protos.PreprocessingStep.normalize_image', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_horizontal_flip', full_name='object_detection.protos.PreprocessingStep.random_horizontal_flip', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_pixel_value_scale', full_name='object_detection.protos.PreprocessingStep.random_pixel_value_scale', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_image_scale', full_name='object_detection.protos.PreprocessingStep.random_image_scale', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_rgb_to_gray', full_name='object_detection.protos.PreprocessingStep.random_rgb_to_gray', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_adjust_brightness', full_name='object_detection.protos.PreprocessingStep.random_adjust_brightness', index=5,
- number=6, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_adjust_contrast', full_name='object_detection.protos.PreprocessingStep.random_adjust_contrast', index=6,
- number=7, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_adjust_hue', full_name='object_detection.protos.PreprocessingStep.random_adjust_hue', index=7,
- number=8, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_adjust_saturation', full_name='object_detection.protos.PreprocessingStep.random_adjust_saturation', index=8,
- number=9, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_distort_color', full_name='object_detection.protos.PreprocessingStep.random_distort_color', index=9,
- number=10, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_jitter_boxes', full_name='object_detection.protos.PreprocessingStep.random_jitter_boxes', index=10,
- number=11, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_crop_image', full_name='object_detection.protos.PreprocessingStep.random_crop_image', index=11,
- number=12, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_pad_image', full_name='object_detection.protos.PreprocessingStep.random_pad_image', index=12,
- number=13, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_crop_pad_image', full_name='object_detection.protos.PreprocessingStep.random_crop_pad_image', index=13,
- number=14, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_crop_to_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.random_crop_to_aspect_ratio', index=14,
- number=15, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_black_patches', full_name='object_detection.protos.PreprocessingStep.random_black_patches', index=15,
- number=16, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_resize_method', full_name='object_detection.protos.PreprocessingStep.random_resize_method', index=16,
- number=17, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='scale_boxes_to_pixel_coordinates', full_name='object_detection.protos.PreprocessingStep.scale_boxes_to_pixel_coordinates', index=17,
- number=18, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='resize_image', full_name='object_detection.protos.PreprocessingStep.resize_image', index=18,
- number=19, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='subtract_channel_mean', full_name='object_detection.protos.PreprocessingStep.subtract_channel_mean', index=19,
- number=20, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd_random_crop', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop', index=20,
- number=21, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd_random_crop_pad', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_pad', index=21,
- number=22, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd_random_crop_fixed_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_fixed_aspect_ratio', index=22,
- number=23, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ssd_random_crop_pad_fixed_aspect_ratio', full_name='object_detection.protos.PreprocessingStep.ssd_random_crop_pad_fixed_aspect_ratio', index=23,
- number=24, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_vertical_flip', full_name='object_detection.protos.PreprocessingStep.random_vertical_flip', index=24,
- number=25, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_rotation90', full_name='object_detection.protos.PreprocessingStep.random_rotation90', index=25,
- number=26, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='preprocessing_step', full_name='object_detection.protos.PreprocessingStep.preprocessing_step',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=72,
- serialized_end=2167,
-)
-
-
-_NORMALIZEIMAGE = _descriptor.Descriptor(
- name='NormalizeImage',
- full_name='object_detection.protos.NormalizeImage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='original_minval', full_name='object_detection.protos.NormalizeImage.original_minval', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='original_maxval', full_name='object_detection.protos.NormalizeImage.original_maxval', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='target_minval', full_name='object_detection.protos.NormalizeImage.target_minval', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='target_maxval', full_name='object_detection.protos.NormalizeImage.target_maxval', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2169,
- serialized_end=2287,
-)
-
-
-_RANDOMHORIZONTALFLIP = _descriptor.Descriptor(
- name='RandomHorizontalFlip',
- full_name='object_detection.protos.RandomHorizontalFlip',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='keypoint_flip_permutation', full_name='object_detection.protos.RandomHorizontalFlip.keypoint_flip_permutation', index=0,
- number=1, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2289,
- serialized_end=2346,
-)
-
-
-_RANDOMVERTICALFLIP = _descriptor.Descriptor(
- name='RandomVerticalFlip',
- full_name='object_detection.protos.RandomVerticalFlip',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='keypoint_flip_permutation', full_name='object_detection.protos.RandomVerticalFlip.keypoint_flip_permutation', index=0,
- number=1, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2348,
- serialized_end=2403,
-)
-
-
-_RANDOMROTATION90 = _descriptor.Descriptor(
- name='RandomRotation90',
- full_name='object_detection.protos.RandomRotation90',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2405,
- serialized_end=2423,
-)
-
-
-_RANDOMPIXELVALUESCALE = _descriptor.Descriptor(
- name='RandomPixelValueScale',
- full_name='object_detection.protos.RandomPixelValueScale',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='minval', full_name='object_detection.protos.RandomPixelValueScale.minval', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.9),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='maxval', full_name='object_detection.protos.RandomPixelValueScale.maxval', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1.1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2425,
- serialized_end=2490,
-)
-
-
-_RANDOMIMAGESCALE = _descriptor.Descriptor(
- name='RandomImageScale',
- full_name='object_detection.protos.RandomImageScale',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_scale_ratio', full_name='object_detection.protos.RandomImageScale.min_scale_ratio', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_scale_ratio', full_name='object_detection.protos.RandomImageScale.max_scale_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(2),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2492,
- serialized_end=2568,
-)
-
-
-_RANDOMRGBTOGRAY = _descriptor.Descriptor(
- name='RandomRGBtoGray',
- full_name='object_detection.protos.RandomRGBtoGray',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='probability', full_name='object_detection.protos.RandomRGBtoGray.probability', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2570,
- serialized_end=2613,
-)
-
-
-_RANDOMADJUSTBRIGHTNESS = _descriptor.Descriptor(
- name='RandomAdjustBrightness',
- full_name='object_detection.protos.RandomAdjustBrightness',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='max_delta', full_name='object_detection.protos.RandomAdjustBrightness.max_delta', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.2),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2615,
- serialized_end=2663,
-)
-
-
-_RANDOMADJUSTCONTRAST = _descriptor.Descriptor(
- name='RandomAdjustContrast',
- full_name='object_detection.protos.RandomAdjustContrast',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_delta', full_name='object_detection.protos.RandomAdjustContrast.min_delta', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.8),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_delta', full_name='object_detection.protos.RandomAdjustContrast.max_delta', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1.25),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2665,
- serialized_end=2736,
-)
-
-
-_RANDOMADJUSTHUE = _descriptor.Descriptor(
- name='RandomAdjustHue',
- full_name='object_detection.protos.RandomAdjustHue',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='max_delta', full_name='object_detection.protos.RandomAdjustHue.max_delta', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.02),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2738,
- serialized_end=2780,
-)
-
-
-_RANDOMADJUSTSATURATION = _descriptor.Descriptor(
- name='RandomAdjustSaturation',
- full_name='object_detection.protos.RandomAdjustSaturation',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_delta', full_name='object_detection.protos.RandomAdjustSaturation.min_delta', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.8),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_delta', full_name='object_detection.protos.RandomAdjustSaturation.max_delta', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1.25),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2782,
- serialized_end=2855,
-)
-
-
-_RANDOMDISTORTCOLOR = _descriptor.Descriptor(
- name='RandomDistortColor',
- full_name='object_detection.protos.RandomDistortColor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='color_ordering', full_name='object_detection.protos.RandomDistortColor.color_ordering', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2857,
- serialized_end=2901,
-)
-
-
-_RANDOMJITTERBOXES = _descriptor.Descriptor(
- name='RandomJitterBoxes',
- full_name='object_detection.protos.RandomJitterBoxes',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='ratio', full_name='object_detection.protos.RandomJitterBoxes.ratio', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.05),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2903,
- serialized_end=2943,
-)
-
-
-_RANDOMCROPIMAGE = _descriptor.Descriptor(
- name='RandomCropImage',
- full_name='object_detection.protos.RandomCropImage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.RandomCropImage.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_aspect_ratio', full_name='object_detection.protos.RandomCropImage.min_aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.75),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_aspect_ratio', full_name='object_detection.protos.RandomCropImage.max_aspect_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1.33),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.RandomCropImage.min_area', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.RandomCropImage.max_area', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.RandomCropImage.overlap_thresh', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.3),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.RandomCropImage.random_coef', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=2946,
- serialized_end=3155,
-)
-
-
-_RANDOMPADIMAGE = _descriptor.Descriptor(
- name='RandomPadImage',
- full_name='object_detection.protos.RandomPadImage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_image_height', full_name='object_detection.protos.RandomPadImage.min_image_height', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_image_width', full_name='object_detection.protos.RandomPadImage.min_image_width', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_image_height', full_name='object_detection.protos.RandomPadImage.max_image_height', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_image_width', full_name='object_detection.protos.RandomPadImage.max_image_width', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_color', full_name='object_detection.protos.RandomPadImage.pad_color', index=4,
- number=5, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3158,
- serialized_end=3295,
-)
-
-
-_RANDOMCROPPADIMAGE = _descriptor.Descriptor(
- name='RandomCropPadImage',
- full_name='object_detection.protos.RandomCropPadImage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.RandomCropPadImage.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_aspect_ratio', full_name='object_detection.protos.RandomCropPadImage.min_aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.75),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_aspect_ratio', full_name='object_detection.protos.RandomCropPadImage.max_aspect_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1.33),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.RandomCropPadImage.min_area', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.RandomCropPadImage.max_area', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.RandomCropPadImage.overlap_thresh', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.3),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.RandomCropPadImage.random_coef', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_padded_size_ratio', full_name='object_detection.protos.RandomCropPadImage.min_padded_size_ratio', index=7,
- number=8, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_padded_size_ratio', full_name='object_detection.protos.RandomCropPadImage.max_padded_size_ratio', index=8,
- number=9, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_color', full_name='object_detection.protos.RandomCropPadImage.pad_color', index=9,
- number=10, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3298,
- serialized_end=3591,
-)
-
-
-_RANDOMCROPTOASPECTRATIO = _descriptor.Descriptor(
- name='RandomCropToAspectRatio',
- full_name='object_detection.protos.RandomCropToAspectRatio',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='aspect_ratio', full_name='object_detection.protos.RandomCropToAspectRatio.aspect_ratio', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.RandomCropToAspectRatio.overlap_thresh', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.3),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3593,
- serialized_end=3672,
-)
-
-
-_RANDOMBLACKPATCHES = _descriptor.Descriptor(
- name='RandomBlackPatches',
- full_name='object_detection.protos.RandomBlackPatches',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='max_black_patches', full_name='object_detection.protos.RandomBlackPatches.max_black_patches', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=10,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='probability', full_name='object_detection.protos.RandomBlackPatches.probability', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='size_to_image_ratio', full_name='object_detection.protos.RandomBlackPatches.size_to_image_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3674,
- serialized_end=3785,
-)
-
-
-_RANDOMRESIZEMETHOD = _descriptor.Descriptor(
- name='RandomResizeMethod',
- full_name='object_detection.protos.RandomResizeMethod',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='target_height', full_name='object_detection.protos.RandomResizeMethod.target_height', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='target_width', full_name='object_detection.protos.RandomResizeMethod.target_width', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3787,
- serialized_end=3852,
-)
-
-
-_SCALEBOXESTOPIXELCOORDINATES = _descriptor.Descriptor(
- name='ScaleBoxesToPixelCoordinates',
- full_name='object_detection.protos.ScaleBoxesToPixelCoordinates',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3854,
- serialized_end=3884,
-)
-
-
-_RESIZEIMAGE = _descriptor.Descriptor(
- name='ResizeImage',
- full_name='object_detection.protos.ResizeImage',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='new_height', full_name='object_detection.protos.ResizeImage.new_height', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='new_width', full_name='object_detection.protos.ResizeImage.new_width', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='method', full_name='object_detection.protos.ResizeImage.method', index=2,
- number=3, type=14, cpp_type=8, label=1,
- has_default_value=True, default_value=3,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- _RESIZEIMAGE_METHOD,
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=3887,
- serialized_end=4079,
-)
-
-
-_SUBTRACTCHANNELMEAN = _descriptor.Descriptor(
- name='SubtractChannelMean',
- full_name='object_detection.protos.SubtractChannelMean',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='means', full_name='object_detection.protos.SubtractChannelMean.means', index=0,
- number=1, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4081,
- serialized_end=4117,
-)
-
-
-_SSDRANDOMCROPOPERATION = _descriptor.Descriptor(
- name='SSDRandomCropOperation',
- full_name='object_detection.protos.SSDRandomCropOperation',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.SSDRandomCropOperation.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropOperation.min_aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropOperation.max_aspect_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.SSDRandomCropOperation.min_area', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.SSDRandomCropOperation.max_area', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropOperation.overlap_thresh', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.SSDRandomCropOperation.random_coef', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4120,
- serialized_end=4305,
-)
-
-
-_SSDRANDOMCROP = _descriptor.Descriptor(
- name='SSDRandomCrop',
- full_name='object_detection.protos.SSDRandomCrop',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='operations', full_name='object_detection.protos.SSDRandomCrop.operations', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4307,
- serialized_end=4391,
-)
-
-
-_SSDRANDOMCROPPADOPERATION = _descriptor.Descriptor(
- name='SSDRandomCropPadOperation',
- full_name='object_detection.protos.SSDRandomCropPadOperation',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.SSDRandomCropPadOperation.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.min_aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.max_aspect_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.SSDRandomCropPadOperation.min_area', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.SSDRandomCropPadOperation.max_area', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropPadOperation.overlap_thresh', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.SSDRandomCropPadOperation.random_coef', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.min_padded_size_ratio', index=7,
- number=8, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadOperation.max_padded_size_ratio', index=8,
- number=9, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_color_r', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_r', index=9,
- number=10, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_color_g', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_g', index=10,
- number=11, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_color_b', full_name='object_detection.protos.SSDRandomCropPadOperation.pad_color_b', index=11,
- number=12, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4394,
- serialized_end=4707,
-)
-
-
-_SSDRANDOMCROPPAD = _descriptor.Descriptor(
- name='SSDRandomCropPad',
- full_name='object_detection.protos.SSDRandomCropPad',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='operations', full_name='object_detection.protos.SSDRandomCropPad.operations', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4709,
- serialized_end=4799,
-)
-
-
-_SSDRANDOMCROPFIXEDASPECTRATIOOPERATION = _descriptor.Descriptor(
- name='SSDRandomCropFixedAspectRatioOperation',
- full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.min_area', index=1,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.max_area', index=2,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.overlap_thresh', index=3,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.SSDRandomCropFixedAspectRatioOperation.random_coef', index=4,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4802,
- serialized_end=4951,
-)
-
-
-_SSDRANDOMCROPFIXEDASPECTRATIO = _descriptor.Descriptor(
- name='SSDRandomCropFixedAspectRatio',
- full_name='object_detection.protos.SSDRandomCropFixedAspectRatio',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='operations', full_name='object_detection.protos.SSDRandomCropFixedAspectRatio.operations', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='aspect_ratio', full_name='object_detection.protos.SSDRandomCropFixedAspectRatio.aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=4954,
- serialized_end=5095,
-)
-
-
-_SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION = _descriptor.Descriptor(
- name='SSDRandomCropPadFixedAspectRatioOperation',
- full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='min_object_covered', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_object_covered', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_aspect_ratio', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_area', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_area', index=3,
- number=4, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_area', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_area', index=4,
- number=5, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='overlap_thresh', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.overlap_thresh', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='random_coef', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.random_coef', index=6,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=False, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.min_padded_size_ratio', index=7,
- number=8, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_padded_size_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation.max_padded_size_ratio', index=8,
- number=9, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=5098,
- serialized_end=5364,
-)
-
-
-_SSDRANDOMCROPPADFIXEDASPECTRATIO = _descriptor.Descriptor(
- name='SSDRandomCropPadFixedAspectRatio',
- full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='operations', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio.operations', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='aspect_ratio', full_name='object_detection.protos.SSDRandomCropPadFixedAspectRatio.aspect_ratio', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=5367,
- serialized_end=5514,
-)
-
-_PREPROCESSINGSTEP.fields_by_name['normalize_image'].message_type = _NORMALIZEIMAGE
-_PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip'].message_type = _RANDOMHORIZONTALFLIP
-_PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale'].message_type = _RANDOMPIXELVALUESCALE
-_PREPROCESSINGSTEP.fields_by_name['random_image_scale'].message_type = _RANDOMIMAGESCALE
-_PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray'].message_type = _RANDOMRGBTOGRAY
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness'].message_type = _RANDOMADJUSTBRIGHTNESS
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast'].message_type = _RANDOMADJUSTCONTRAST
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_hue'].message_type = _RANDOMADJUSTHUE
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation'].message_type = _RANDOMADJUSTSATURATION
-_PREPROCESSINGSTEP.fields_by_name['random_distort_color'].message_type = _RANDOMDISTORTCOLOR
-_PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes'].message_type = _RANDOMJITTERBOXES
-_PREPROCESSINGSTEP.fields_by_name['random_crop_image'].message_type = _RANDOMCROPIMAGE
-_PREPROCESSINGSTEP.fields_by_name['random_pad_image'].message_type = _RANDOMPADIMAGE
-_PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image'].message_type = _RANDOMCROPPADIMAGE
-_PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio'].message_type = _RANDOMCROPTOASPECTRATIO
-_PREPROCESSINGSTEP.fields_by_name['random_black_patches'].message_type = _RANDOMBLACKPATCHES
-_PREPROCESSINGSTEP.fields_by_name['random_resize_method'].message_type = _RANDOMRESIZEMETHOD
-_PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates'].message_type = _SCALEBOXESTOPIXELCOORDINATES
-_PREPROCESSINGSTEP.fields_by_name['resize_image'].message_type = _RESIZEIMAGE
-_PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean'].message_type = _SUBTRACTCHANNELMEAN
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop'].message_type = _SSDRANDOMCROP
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad'].message_type = _SSDRANDOMCROPPAD
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio'].message_type = _SSDRANDOMCROPFIXEDASPECTRATIO
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio'].message_type = _SSDRANDOMCROPPADFIXEDASPECTRATIO
-_PREPROCESSINGSTEP.fields_by_name['random_vertical_flip'].message_type = _RANDOMVERTICALFLIP
-_PREPROCESSINGSTEP.fields_by_name['random_rotation90'].message_type = _RANDOMROTATION90
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['normalize_image'])
-_PREPROCESSINGSTEP.fields_by_name['normalize_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip'])
-_PREPROCESSINGSTEP.fields_by_name['random_horizontal_flip'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale'])
-_PREPROCESSINGSTEP.fields_by_name['random_pixel_value_scale'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_image_scale'])
-_PREPROCESSINGSTEP.fields_by_name['random_image_scale'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray'])
-_PREPROCESSINGSTEP.fields_by_name['random_rgb_to_gray'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness'])
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_brightness'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast'])
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_contrast'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_adjust_hue'])
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_hue'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation'])
-_PREPROCESSINGSTEP.fields_by_name['random_adjust_saturation'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_distort_color'])
-_PREPROCESSINGSTEP.fields_by_name['random_distort_color'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes'])
-_PREPROCESSINGSTEP.fields_by_name['random_jitter_boxes'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_crop_image'])
-_PREPROCESSINGSTEP.fields_by_name['random_crop_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_pad_image'])
-_PREPROCESSINGSTEP.fields_by_name['random_pad_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image'])
-_PREPROCESSINGSTEP.fields_by_name['random_crop_pad_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio'])
-_PREPROCESSINGSTEP.fields_by_name['random_crop_to_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_black_patches'])
-_PREPROCESSINGSTEP.fields_by_name['random_black_patches'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_resize_method'])
-_PREPROCESSINGSTEP.fields_by_name['random_resize_method'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates'])
-_PREPROCESSINGSTEP.fields_by_name['scale_boxes_to_pixel_coordinates'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['resize_image'])
-_PREPROCESSINGSTEP.fields_by_name['resize_image'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean'])
-_PREPROCESSINGSTEP.fields_by_name['subtract_channel_mean'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop'])
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad'])
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio'])
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_fixed_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio'])
-_PREPROCESSINGSTEP.fields_by_name['ssd_random_crop_pad_fixed_aspect_ratio'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_vertical_flip'])
-_PREPROCESSINGSTEP.fields_by_name['random_vertical_flip'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step'].fields.append(
- _PREPROCESSINGSTEP.fields_by_name['random_rotation90'])
-_PREPROCESSINGSTEP.fields_by_name['random_rotation90'].containing_oneof = _PREPROCESSINGSTEP.oneofs_by_name['preprocessing_step']
-_RESIZEIMAGE.fields_by_name['method'].enum_type = _RESIZEIMAGE_METHOD
-_RESIZEIMAGE_METHOD.containing_type = _RESIZEIMAGE
-_SSDRANDOMCROP.fields_by_name['operations'].message_type = _SSDRANDOMCROPOPERATION
-_SSDRANDOMCROPPAD.fields_by_name['operations'].message_type = _SSDRANDOMCROPPADOPERATION
-_SSDRANDOMCROPFIXEDASPECTRATIO.fields_by_name['operations'].message_type = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION
-_SSDRANDOMCROPPADFIXEDASPECTRATIO.fields_by_name['operations'].message_type = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION
-DESCRIPTOR.message_types_by_name['PreprocessingStep'] = _PREPROCESSINGSTEP
-DESCRIPTOR.message_types_by_name['NormalizeImage'] = _NORMALIZEIMAGE
-DESCRIPTOR.message_types_by_name['RandomHorizontalFlip'] = _RANDOMHORIZONTALFLIP
-DESCRIPTOR.message_types_by_name['RandomVerticalFlip'] = _RANDOMVERTICALFLIP
-DESCRIPTOR.message_types_by_name['RandomRotation90'] = _RANDOMROTATION90
-DESCRIPTOR.message_types_by_name['RandomPixelValueScale'] = _RANDOMPIXELVALUESCALE
-DESCRIPTOR.message_types_by_name['RandomImageScale'] = _RANDOMIMAGESCALE
-DESCRIPTOR.message_types_by_name['RandomRGBtoGray'] = _RANDOMRGBTOGRAY
-DESCRIPTOR.message_types_by_name['RandomAdjustBrightness'] = _RANDOMADJUSTBRIGHTNESS
-DESCRIPTOR.message_types_by_name['RandomAdjustContrast'] = _RANDOMADJUSTCONTRAST
-DESCRIPTOR.message_types_by_name['RandomAdjustHue'] = _RANDOMADJUSTHUE
-DESCRIPTOR.message_types_by_name['RandomAdjustSaturation'] = _RANDOMADJUSTSATURATION
-DESCRIPTOR.message_types_by_name['RandomDistortColor'] = _RANDOMDISTORTCOLOR
-DESCRIPTOR.message_types_by_name['RandomJitterBoxes'] = _RANDOMJITTERBOXES
-DESCRIPTOR.message_types_by_name['RandomCropImage'] = _RANDOMCROPIMAGE
-DESCRIPTOR.message_types_by_name['RandomPadImage'] = _RANDOMPADIMAGE
-DESCRIPTOR.message_types_by_name['RandomCropPadImage'] = _RANDOMCROPPADIMAGE
-DESCRIPTOR.message_types_by_name['RandomCropToAspectRatio'] = _RANDOMCROPTOASPECTRATIO
-DESCRIPTOR.message_types_by_name['RandomBlackPatches'] = _RANDOMBLACKPATCHES
-DESCRIPTOR.message_types_by_name['RandomResizeMethod'] = _RANDOMRESIZEMETHOD
-DESCRIPTOR.message_types_by_name['ScaleBoxesToPixelCoordinates'] = _SCALEBOXESTOPIXELCOORDINATES
-DESCRIPTOR.message_types_by_name['ResizeImage'] = _RESIZEIMAGE
-DESCRIPTOR.message_types_by_name['SubtractChannelMean'] = _SUBTRACTCHANNELMEAN
-DESCRIPTOR.message_types_by_name['SSDRandomCropOperation'] = _SSDRANDOMCROPOPERATION
-DESCRIPTOR.message_types_by_name['SSDRandomCrop'] = _SSDRANDOMCROP
-DESCRIPTOR.message_types_by_name['SSDRandomCropPadOperation'] = _SSDRANDOMCROPPADOPERATION
-DESCRIPTOR.message_types_by_name['SSDRandomCropPad'] = _SSDRANDOMCROPPAD
-DESCRIPTOR.message_types_by_name['SSDRandomCropFixedAspectRatioOperation'] = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION
-DESCRIPTOR.message_types_by_name['SSDRandomCropFixedAspectRatio'] = _SSDRANDOMCROPFIXEDASPECTRATIO
-DESCRIPTOR.message_types_by_name['SSDRandomCropPadFixedAspectRatioOperation'] = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION
-DESCRIPTOR.message_types_by_name['SSDRandomCropPadFixedAspectRatio'] = _SSDRANDOMCROPPADFIXEDASPECTRATIO
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-PreprocessingStep = _reflection.GeneratedProtocolMessageType('PreprocessingStep', (_message.Message,), dict(
- DESCRIPTOR = _PREPROCESSINGSTEP,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.PreprocessingStep)
- ))
-_sym_db.RegisterMessage(PreprocessingStep)
-
-NormalizeImage = _reflection.GeneratedProtocolMessageType('NormalizeImage', (_message.Message,), dict(
- DESCRIPTOR = _NORMALIZEIMAGE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.NormalizeImage)
- ))
-_sym_db.RegisterMessage(NormalizeImage)
-
-RandomHorizontalFlip = _reflection.GeneratedProtocolMessageType('RandomHorizontalFlip', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMHORIZONTALFLIP,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomHorizontalFlip)
- ))
-_sym_db.RegisterMessage(RandomHorizontalFlip)
-
-RandomVerticalFlip = _reflection.GeneratedProtocolMessageType('RandomVerticalFlip', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMVERTICALFLIP,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomVerticalFlip)
- ))
-_sym_db.RegisterMessage(RandomVerticalFlip)
-
-RandomRotation90 = _reflection.GeneratedProtocolMessageType('RandomRotation90', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMROTATION90,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomRotation90)
- ))
-_sym_db.RegisterMessage(RandomRotation90)
-
-RandomPixelValueScale = _reflection.GeneratedProtocolMessageType('RandomPixelValueScale', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMPIXELVALUESCALE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomPixelValueScale)
- ))
-_sym_db.RegisterMessage(RandomPixelValueScale)
-
-RandomImageScale = _reflection.GeneratedProtocolMessageType('RandomImageScale', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMIMAGESCALE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomImageScale)
- ))
-_sym_db.RegisterMessage(RandomImageScale)
-
-RandomRGBtoGray = _reflection.GeneratedProtocolMessageType('RandomRGBtoGray', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMRGBTOGRAY,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomRGBtoGray)
- ))
-_sym_db.RegisterMessage(RandomRGBtoGray)
-
-RandomAdjustBrightness = _reflection.GeneratedProtocolMessageType('RandomAdjustBrightness', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMADJUSTBRIGHTNESS,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustBrightness)
- ))
-_sym_db.RegisterMessage(RandomAdjustBrightness)
-
-RandomAdjustContrast = _reflection.GeneratedProtocolMessageType('RandomAdjustContrast', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMADJUSTCONTRAST,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustContrast)
- ))
-_sym_db.RegisterMessage(RandomAdjustContrast)
-
-RandomAdjustHue = _reflection.GeneratedProtocolMessageType('RandomAdjustHue', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMADJUSTHUE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustHue)
- ))
-_sym_db.RegisterMessage(RandomAdjustHue)
-
-RandomAdjustSaturation = _reflection.GeneratedProtocolMessageType('RandomAdjustSaturation', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMADJUSTSATURATION,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomAdjustSaturation)
- ))
-_sym_db.RegisterMessage(RandomAdjustSaturation)
-
-RandomDistortColor = _reflection.GeneratedProtocolMessageType('RandomDistortColor', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMDISTORTCOLOR,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomDistortColor)
- ))
-_sym_db.RegisterMessage(RandomDistortColor)
-
-RandomJitterBoxes = _reflection.GeneratedProtocolMessageType('RandomJitterBoxes', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMJITTERBOXES,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomJitterBoxes)
- ))
-_sym_db.RegisterMessage(RandomJitterBoxes)
-
-RandomCropImage = _reflection.GeneratedProtocolMessageType('RandomCropImage', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMCROPIMAGE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropImage)
- ))
-_sym_db.RegisterMessage(RandomCropImage)
-
-RandomPadImage = _reflection.GeneratedProtocolMessageType('RandomPadImage', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMPADIMAGE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomPadImage)
- ))
-_sym_db.RegisterMessage(RandomPadImage)
-
-RandomCropPadImage = _reflection.GeneratedProtocolMessageType('RandomCropPadImage', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMCROPPADIMAGE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropPadImage)
- ))
-_sym_db.RegisterMessage(RandomCropPadImage)
-
-RandomCropToAspectRatio = _reflection.GeneratedProtocolMessageType('RandomCropToAspectRatio', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMCROPTOASPECTRATIO,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomCropToAspectRatio)
- ))
-_sym_db.RegisterMessage(RandomCropToAspectRatio)
-
-RandomBlackPatches = _reflection.GeneratedProtocolMessageType('RandomBlackPatches', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMBLACKPATCHES,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomBlackPatches)
- ))
-_sym_db.RegisterMessage(RandomBlackPatches)
-
-RandomResizeMethod = _reflection.GeneratedProtocolMessageType('RandomResizeMethod', (_message.Message,), dict(
- DESCRIPTOR = _RANDOMRESIZEMETHOD,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RandomResizeMethod)
- ))
-_sym_db.RegisterMessage(RandomResizeMethod)
-
-ScaleBoxesToPixelCoordinates = _reflection.GeneratedProtocolMessageType('ScaleBoxesToPixelCoordinates', (_message.Message,), dict(
- DESCRIPTOR = _SCALEBOXESTOPIXELCOORDINATES,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ScaleBoxesToPixelCoordinates)
- ))
-_sym_db.RegisterMessage(ScaleBoxesToPixelCoordinates)
-
-ResizeImage = _reflection.GeneratedProtocolMessageType('ResizeImage', (_message.Message,), dict(
- DESCRIPTOR = _RESIZEIMAGE,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.ResizeImage)
- ))
-_sym_db.RegisterMessage(ResizeImage)
-
-SubtractChannelMean = _reflection.GeneratedProtocolMessageType('SubtractChannelMean', (_message.Message,), dict(
- DESCRIPTOR = _SUBTRACTCHANNELMEAN,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SubtractChannelMean)
- ))
-_sym_db.RegisterMessage(SubtractChannelMean)
-
-SSDRandomCropOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropOperation', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPOPERATION,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropOperation)
- ))
-_sym_db.RegisterMessage(SSDRandomCropOperation)
-
-SSDRandomCrop = _reflection.GeneratedProtocolMessageType('SSDRandomCrop', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROP,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCrop)
- ))
-_sym_db.RegisterMessage(SSDRandomCrop)
-
-SSDRandomCropPadOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadOperation', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPPADOPERATION,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadOperation)
- ))
-_sym_db.RegisterMessage(SSDRandomCropPadOperation)
-
-SSDRandomCropPad = _reflection.GeneratedProtocolMessageType('SSDRandomCropPad', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPPAD,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPad)
- ))
-_sym_db.RegisterMessage(SSDRandomCropPad)
-
-SSDRandomCropFixedAspectRatioOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropFixedAspectRatioOperation', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPFIXEDASPECTRATIOOPERATION,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropFixedAspectRatioOperation)
- ))
-_sym_db.RegisterMessage(SSDRandomCropFixedAspectRatioOperation)
-
-SSDRandomCropFixedAspectRatio = _reflection.GeneratedProtocolMessageType('SSDRandomCropFixedAspectRatio', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPFIXEDASPECTRATIO,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropFixedAspectRatio)
- ))
-_sym_db.RegisterMessage(SSDRandomCropFixedAspectRatio)
-
-SSDRandomCropPadFixedAspectRatioOperation = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadFixedAspectRatioOperation', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPPADFIXEDASPECTRATIOOPERATION,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadFixedAspectRatioOperation)
- ))
-_sym_db.RegisterMessage(SSDRandomCropPadFixedAspectRatioOperation)
-
-SSDRandomCropPadFixedAspectRatio = _reflection.GeneratedProtocolMessageType('SSDRandomCropPadFixedAspectRatio', (_message.Message,), dict(
- DESCRIPTOR = _SSDRANDOMCROPPADFIXEDASPECTRATIO,
- __module__ = 'object_detection.protos.preprocessor_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SSDRandomCropPadFixedAspectRatio)
- ))
-_sym_db.RegisterMessage(SSDRandomCropPadFixedAspectRatio)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/region_similarity_calculator.proto b/object_detection/protos/region_similarity_calculator.proto
deleted file mode 100644
index e82424e2..00000000
--- a/object_detection/protos/region_similarity_calculator.proto
+++ /dev/null
@@ -1,25 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for region similarity calculators. See
-// core/region_similarity_calculator.py for details.
-message RegionSimilarityCalculator {
- oneof region_similarity {
- NegSqDistSimilarity neg_sq_dist_similarity = 1;
- IouSimilarity iou_similarity = 2;
- IoaSimilarity ioa_similarity = 3;
- }
-}
-
-// Configuration for negative squared distance similarity calculator.
-message NegSqDistSimilarity {
-}
-
-// Configuration for intersection-over-union (IOU) similarity calculator.
-message IouSimilarity {
-}
-
-// Configuration for intersection-over-area (IOA) similarity calculator.
-message IoaSimilarity {
-}
diff --git a/object_detection/protos/region_similarity_calculator_pb2.py b/object_detection/protos/region_similarity_calculator_pb2.py
deleted file mode 100644
index 9e260d84..00000000
--- a/object_detection/protos/region_similarity_calculator_pb2.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/region_similarity_calculator.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/region_similarity_calculator.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n:object_detection/protos/region_similarity_calculator.proto\x12\x17object_detection.protos\"\x85\x02\n\x1aRegionSimilarityCalculator\x12N\n\x16neg_sq_dist_similarity\x18\x01 \x01(\x0b\x32,.object_detection.protos.NegSqDistSimilarityH\x00\x12@\n\x0eiou_similarity\x18\x02 \x01(\x0b\x32&.object_detection.protos.IouSimilarityH\x00\x12@\n\x0eioa_similarity\x18\x03 \x01(\x0b\x32&.object_detection.protos.IoaSimilarityH\x00\x42\x13\n\x11region_similarity\"\x15\n\x13NegSqDistSimilarity\"\x0f\n\rIouSimilarity\"\x0f\n\rIoaSimilarity')
-)
-
-
-
-
-_REGIONSIMILARITYCALCULATOR = _descriptor.Descriptor(
- name='RegionSimilarityCalculator',
- full_name='object_detection.protos.RegionSimilarityCalculator',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='neg_sq_dist_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.neg_sq_dist_similarity', index=0,
- number=1, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='iou_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.iou_similarity', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='ioa_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.ioa_similarity', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name='region_similarity', full_name='object_detection.protos.RegionSimilarityCalculator.region_similarity',
- index=0, containing_type=None, fields=[]),
- ],
- serialized_start=88,
- serialized_end=349,
-)
-
-
-_NEGSQDISTSIMILARITY = _descriptor.Descriptor(
- name='NegSqDistSimilarity',
- full_name='object_detection.protos.NegSqDistSimilarity',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=351,
- serialized_end=372,
-)
-
-
-_IOUSIMILARITY = _descriptor.Descriptor(
- name='IouSimilarity',
- full_name='object_detection.protos.IouSimilarity',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=374,
- serialized_end=389,
-)
-
-
-_IOASIMILARITY = _descriptor.Descriptor(
- name='IoaSimilarity',
- full_name='object_detection.protos.IoaSimilarity',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=391,
- serialized_end=406,
-)
-
-_REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity'].message_type = _NEGSQDISTSIMILARITY
-_REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity'].message_type = _IOUSIMILARITY
-_REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity'].message_type = _IOASIMILARITY
-_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append(
- _REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity'])
-_REGIONSIMILARITYCALCULATOR.fields_by_name['neg_sq_dist_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity']
-_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append(
- _REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity'])
-_REGIONSIMILARITYCALCULATOR.fields_by_name['iou_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity']
-_REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity'].fields.append(
- _REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity'])
-_REGIONSIMILARITYCALCULATOR.fields_by_name['ioa_similarity'].containing_oneof = _REGIONSIMILARITYCALCULATOR.oneofs_by_name['region_similarity']
-DESCRIPTOR.message_types_by_name['RegionSimilarityCalculator'] = _REGIONSIMILARITYCALCULATOR
-DESCRIPTOR.message_types_by_name['NegSqDistSimilarity'] = _NEGSQDISTSIMILARITY
-DESCRIPTOR.message_types_by_name['IouSimilarity'] = _IOUSIMILARITY
-DESCRIPTOR.message_types_by_name['IoaSimilarity'] = _IOASIMILARITY
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-RegionSimilarityCalculator = _reflection.GeneratedProtocolMessageType('RegionSimilarityCalculator', (_message.Message,), dict(
- DESCRIPTOR = _REGIONSIMILARITYCALCULATOR,
- __module__ = 'object_detection.protos.region_similarity_calculator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.RegionSimilarityCalculator)
- ))
-_sym_db.RegisterMessage(RegionSimilarityCalculator)
-
-NegSqDistSimilarity = _reflection.GeneratedProtocolMessageType('NegSqDistSimilarity', (_message.Message,), dict(
- DESCRIPTOR = _NEGSQDISTSIMILARITY,
- __module__ = 'object_detection.protos.region_similarity_calculator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.NegSqDistSimilarity)
- ))
-_sym_db.RegisterMessage(NegSqDistSimilarity)
-
-IouSimilarity = _reflection.GeneratedProtocolMessageType('IouSimilarity', (_message.Message,), dict(
- DESCRIPTOR = _IOUSIMILARITY,
- __module__ = 'object_detection.protos.region_similarity_calculator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.IouSimilarity)
- ))
-_sym_db.RegisterMessage(IouSimilarity)
-
-IoaSimilarity = _reflection.GeneratedProtocolMessageType('IoaSimilarity', (_message.Message,), dict(
- DESCRIPTOR = _IOASIMILARITY,
- __module__ = 'object_detection.protos.region_similarity_calculator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.IoaSimilarity)
- ))
-_sym_db.RegisterMessage(IoaSimilarity)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/square_box_coder.proto b/object_detection/protos/square_box_coder.proto
deleted file mode 100644
index 41575eb4..00000000
--- a/object_detection/protos/square_box_coder.proto
+++ /dev/null
@@ -1,14 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for SquareBoxCoder. See
-// box_coders/square_box_coder.py for details.
-message SquareBoxCoder {
- // Scale factor for anchor encoded box center.
- optional float y_scale = 1 [default = 10.0];
- optional float x_scale = 2 [default = 10.0];
-
- // Scale factor for anchor encoded box length.
- optional float length_scale = 3 [default = 5.0];
-}
diff --git a/object_detection/protos/square_box_coder_pb2.py b/object_detection/protos/square_box_coder_pb2.py
deleted file mode 100644
index a75e3d06..00000000
--- a/object_detection/protos/square_box_coder_pb2.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/square_box_coder.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/square_box_coder.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n.object_detection/protos/square_box_coder.proto\x12\x17object_detection.protos\"S\n\x0eSquareBoxCoder\x12\x13\n\x07y_scale\x18\x01 \x01(\x02:\x02\x31\x30\x12\x13\n\x07x_scale\x18\x02 \x01(\x02:\x02\x31\x30\x12\x17\n\x0clength_scale\x18\x03 \x01(\x02:\x01\x35')
-)
-
-
-
-
-_SQUAREBOXCODER = _descriptor.Descriptor(
- name='SquareBoxCoder',
- full_name='object_detection.protos.SquareBoxCoder',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='y_scale', full_name='object_detection.protos.SquareBoxCoder.y_scale', index=0,
- number=1, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='x_scale', full_name='object_detection.protos.SquareBoxCoder.x_scale', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(10),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='length_scale', full_name='object_detection.protos.SquareBoxCoder.length_scale', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(5),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=75,
- serialized_end=158,
-)
-
-DESCRIPTOR.message_types_by_name['SquareBoxCoder'] = _SQUAREBOXCODER
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-SquareBoxCoder = _reflection.GeneratedProtocolMessageType('SquareBoxCoder', (_message.Message,), dict(
- DESCRIPTOR = _SQUAREBOXCODER,
- __module__ = 'object_detection.protos.square_box_coder_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SquareBoxCoder)
- ))
-_sym_db.RegisterMessage(SquareBoxCoder)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/ssd.proto b/object_detection/protos/ssd.proto
deleted file mode 100644
index 067c2fff..00000000
--- a/object_detection/protos/ssd.proto
+++ /dev/null
@@ -1,85 +0,0 @@
-syntax = "proto2";
-package object_detection.protos;
-
-import "object_detection/protos/anchor_generator.proto";
-import "object_detection/protos/box_coder.proto";
-import "object_detection/protos/box_predictor.proto";
-import "object_detection/protos/hyperparams.proto";
-import "object_detection/protos/image_resizer.proto";
-import "object_detection/protos/matcher.proto";
-import "object_detection/protos/losses.proto";
-import "object_detection/protos/post_processing.proto";
-import "object_detection/protos/region_similarity_calculator.proto";
-
-// Configuration for Single Shot Detection (SSD) models.
-message Ssd {
-
- // Number of classes to predict.
- optional int32 num_classes = 1;
-
- // Image resizer for preprocessing the input image.
- optional ImageResizer image_resizer = 2;
-
- // Feature extractor config.
- optional SsdFeatureExtractor feature_extractor = 3;
-
- // Box coder to encode the boxes.
- optional BoxCoder box_coder = 4;
-
- // Matcher to match groundtruth with anchors.
- optional Matcher matcher = 5;
-
- // Region similarity calculator to compute similarity of boxes.
- optional RegionSimilarityCalculator similarity_calculator = 6;
-
- // Box predictor to attach to the features.
- optional BoxPredictor box_predictor = 7;
-
- // Anchor generator to compute anchors.
- optional AnchorGenerator anchor_generator = 8;
-
- // Post processing to apply on the predictions.
- optional PostProcessing post_processing = 9;
-
- // Whether to normalize the loss by number of groundtruth boxes that match to
- // the anchors.
- optional bool normalize_loss_by_num_matches = 10 [default=true];
-
- // Loss configuration for training.
- optional Loss loss = 11;
-}
-
-
-message SsdFeatureExtractor {
- // Type of ssd feature extractor.
- optional string type = 1;
-
- // The factor to alter the depth of the channels in the feature extractor.
- optional float depth_multiplier = 2 [default=1.0];
-
- // Minimum number of the channels in the feature extractor.
- optional int32 min_depth = 3 [default=16];
-
- // Hyperparameters for the feature extractor.
- optional Hyperparams conv_hyperparams = 4;
-
- // The nearest multiple to zero-pad the input height and width dimensions to.
- // For example, if pad_to_multiple = 2, input dimensions are zero-padded
- // until the resulting dimensions are even.
- optional int32 pad_to_multiple = 5 [default = 1];
-
- // Whether to update batch norm parameters during training or not.
- // When training with a relative small batch size (e.g. 1), it is
- // desirable to disable batch norm update and use pretrained batch norm
- // params.
- //
- // Note: Some feature extractors are used with canned arg_scopes
- // (e.g resnet arg scopes). In these cases training behavior of batch norm
- // variables may depend on both values of `batch_norm_trainable` and
- // `is_training`.
- //
- // When canned arg_scopes are used with feature extractors `conv_hyperparams`
- // will apply only to the additional layers that are added and are outside the
- // canned arg_scope.
- optional bool batch_norm_trainable = 6 [default=true];
-}
diff --git a/object_detection/protos/ssd_anchor_generator.proto b/object_detection/protos/ssd_anchor_generator.proto
deleted file mode 100644
index d969ddf7..00000000
--- a/object_detection/protos/ssd_anchor_generator.proto
+++ /dev/null
@@ -1,55 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-// Configuration proto for SSD anchor generator described in
-// https://arxiv.org/abs/1512.02325. See
-// anchor_generators/multiple_grid_anchor_generator.py for details.
-message SsdAnchorGenerator {
- // Number of grid layers to create anchors for.
- optional int32 num_layers = 1 [default = 6];
-
- // Scale of anchors corresponding to finest resolution.
- optional float min_scale = 2 [default = 0.2];
-
- // Scale of anchors corresponding to coarsest resolution
- optional float max_scale = 3 [default = 0.95];
-
- // Can be used to override min_scale->max_scale, with an explicitly defined
- // set of scales. If empty, then min_scale->max_scale is used.
- repeated float scales = 12;
-
- // Aspect ratios for anchors at each grid point.
- repeated float aspect_ratios = 4;
-
- // When this aspect ratio is greater than 0, then an additional
- // anchor, with an interpolated scale is added with this aspect ratio.
- optional float interpolated_scale_aspect_ratio = 13 [default = 1.0];
-
- // Whether to use the following aspect ratio and scale combination for the
- // layer with the finest resolution : (scale=0.1, aspect_ratio=1.0),
- // (scale=min_scale, aspect_ration=2.0), (scale=min_scale, aspect_ratio=0.5).
- optional bool reduce_boxes_in_lowest_layer = 5 [default = true];
-
- // The base anchor size in height dimension.
- optional float base_anchor_height = 6 [default = 1.0];
-
- // The base anchor size in width dimension.
- optional float base_anchor_width = 7 [default = 1.0];
-
- // Anchor stride in height dimension in pixels for each layer. The length of
- // this field is expected to be equal to the value of num_layers.
- repeated int32 height_stride = 8;
-
- // Anchor stride in width dimension in pixels for each layer. The length of
- // this field is expected to be equal to the value of num_layers.
- repeated int32 width_stride = 9;
-
- // Anchor height offset in pixels for each layer. The length of this field is
- // expected to be equal to the value of num_layers.
- repeated int32 height_offset = 10;
-
- // Anchor width offset in pixels for each layer. The length of this field is
- // expected to be equal to the value of num_layers.
- repeated int32 width_offset = 11;
-}
diff --git a/object_detection/protos/ssd_anchor_generator_pb2.py b/object_detection/protos/ssd_anchor_generator_pb2.py
deleted file mode 100644
index 25fe45a0..00000000
--- a/object_detection/protos/ssd_anchor_generator_pb2.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/ssd_anchor_generator.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/ssd_anchor_generator.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n2object_detection/protos/ssd_anchor_generator.proto\x12\x17object_detection.protos\"\xf2\x02\n\x12SsdAnchorGenerator\x12\x15\n\nnum_layers\x18\x01 \x01(\x05:\x01\x36\x12\x16\n\tmin_scale\x18\x02 \x01(\x02:\x03\x30.2\x12\x17\n\tmax_scale\x18\x03 \x01(\x02:\x04\x30.95\x12\x0e\n\x06scales\x18\x0c \x03(\x02\x12\x15\n\raspect_ratios\x18\x04 \x03(\x02\x12*\n\x1finterpolated_scale_aspect_ratio\x18\r \x01(\x02:\x01\x31\x12*\n\x1creduce_boxes_in_lowest_layer\x18\x05 \x01(\x08:\x04true\x12\x1d\n\x12\x62\x61se_anchor_height\x18\x06 \x01(\x02:\x01\x31\x12\x1c\n\x11\x62\x61se_anchor_width\x18\x07 \x01(\x02:\x01\x31\x12\x15\n\rheight_stride\x18\x08 \x03(\x05\x12\x14\n\x0cwidth_stride\x18\t \x03(\x05\x12\x15\n\rheight_offset\x18\n \x03(\x05\x12\x14\n\x0cwidth_offset\x18\x0b \x03(\x05')
-)
-
-
-
-
-_SSDANCHORGENERATOR = _descriptor.Descriptor(
- name='SsdAnchorGenerator',
- full_name='object_detection.protos.SsdAnchorGenerator',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='num_layers', full_name='object_detection.protos.SsdAnchorGenerator.num_layers', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=6,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_scale', full_name='object_detection.protos.SsdAnchorGenerator.min_scale', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.2),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='max_scale', full_name='object_detection.protos.SsdAnchorGenerator.max_scale', index=2,
- number=3, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0.95),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='scales', full_name='object_detection.protos.SsdAnchorGenerator.scales', index=3,
- number=12, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='aspect_ratios', full_name='object_detection.protos.SsdAnchorGenerator.aspect_ratios', index=4,
- number=4, type=2, cpp_type=6, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='interpolated_scale_aspect_ratio', full_name='object_detection.protos.SsdAnchorGenerator.interpolated_scale_aspect_ratio', index=5,
- number=13, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='reduce_boxes_in_lowest_layer', full_name='object_detection.protos.SsdAnchorGenerator.reduce_boxes_in_lowest_layer', index=6,
- number=5, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='base_anchor_height', full_name='object_detection.protos.SsdAnchorGenerator.base_anchor_height', index=7,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='base_anchor_width', full_name='object_detection.protos.SsdAnchorGenerator.base_anchor_width', index=8,
- number=7, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_stride', full_name='object_detection.protos.SsdAnchorGenerator.height_stride', index=9,
- number=8, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_stride', full_name='object_detection.protos.SsdAnchorGenerator.width_stride', index=10,
- number=9, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='height_offset', full_name='object_detection.protos.SsdAnchorGenerator.height_offset', index=11,
- number=10, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='width_offset', full_name='object_detection.protos.SsdAnchorGenerator.width_offset', index=12,
- number=11, type=5, cpp_type=1, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=80,
- serialized_end=450,
-)
-
-DESCRIPTOR.message_types_by_name['SsdAnchorGenerator'] = _SSDANCHORGENERATOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-SsdAnchorGenerator = _reflection.GeneratedProtocolMessageType('SsdAnchorGenerator', (_message.Message,), dict(
- DESCRIPTOR = _SSDANCHORGENERATOR,
- __module__ = 'object_detection.protos.ssd_anchor_generator_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SsdAnchorGenerator)
- ))
-_sym_db.RegisterMessage(SsdAnchorGenerator)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/ssd_pb2.py b/object_detection/protos/ssd_pb2.py
deleted file mode 100644
index bc4ec4a1..00000000
--- a/object_detection/protos/ssd_pb2.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/ssd.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import anchor_generator_pb2 as object__detection_dot_protos_dot_anchor__generator__pb2
-from object_detection.protos import box_coder_pb2 as object__detection_dot_protos_dot_box__coder__pb2
-from object_detection.protos import box_predictor_pb2 as object__detection_dot_protos_dot_box__predictor__pb2
-from object_detection.protos import hyperparams_pb2 as object__detection_dot_protos_dot_hyperparams__pb2
-from object_detection.protos import image_resizer_pb2 as object__detection_dot_protos_dot_image__resizer__pb2
-from object_detection.protos import matcher_pb2 as object__detection_dot_protos_dot_matcher__pb2
-from object_detection.protos import losses_pb2 as object__detection_dot_protos_dot_losses__pb2
-from object_detection.protos import post_processing_pb2 as object__detection_dot_protos_dot_post__processing__pb2
-from object_detection.protos import region_similarity_calculator_pb2 as object__detection_dot_protos_dot_region__similarity__calculator__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/ssd.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n!object_detection/protos/ssd.proto\x12\x17object_detection.protos\x1a.object_detection/protos/anchor_generator.proto\x1a\'object_detection/protos/box_coder.proto\x1a+object_detection/protos/box_predictor.proto\x1a)object_detection/protos/hyperparams.proto\x1a+object_detection/protos/image_resizer.proto\x1a%object_detection/protos/matcher.proto\x1a$object_detection/protos/losses.proto\x1a-object_detection/protos/post_processing.proto\x1a:object_detection/protos/region_similarity_calculator.proto\"\xfc\x04\n\x03Ssd\x12\x13\n\x0bnum_classes\x18\x01 \x01(\x05\x12<\n\rimage_resizer\x18\x02 \x01(\x0b\x32%.object_detection.protos.ImageResizer\x12G\n\x11\x66\x65\x61ture_extractor\x18\x03 \x01(\x0b\x32,.object_detection.protos.SsdFeatureExtractor\x12\x34\n\tbox_coder\x18\x04 \x01(\x0b\x32!.object_detection.protos.BoxCoder\x12\x31\n\x07matcher\x18\x05 \x01(\x0b\x32 .object_detection.protos.Matcher\x12R\n\x15similarity_calculator\x18\x06 \x01(\x0b\x32\x33.object_detection.protos.RegionSimilarityCalculator\x12<\n\rbox_predictor\x18\x07 \x01(\x0b\x32%.object_detection.protos.BoxPredictor\x12\x42\n\x10\x61nchor_generator\x18\x08 \x01(\x0b\x32(.object_detection.protos.AnchorGenerator\x12@\n\x0fpost_processing\x18\t \x01(\x0b\x32\'.object_detection.protos.PostProcessing\x12+\n\x1dnormalize_loss_by_num_matches\x18\n \x01(\x08:\x04true\x12+\n\x04loss\x18\x0b \x01(\x0b\x32\x1d.object_detection.protos.Loss\"\xd7\x01\n\x13SsdFeatureExtractor\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x1b\n\x10\x64\x65pth_multiplier\x18\x02 \x01(\x02:\x01\x31\x12\x15\n\tmin_depth\x18\x03 \x01(\x05:\x02\x31\x36\x12>\n\x10\x63onv_hyperparams\x18\x04 \x01(\x0b\x32$.object_detection.protos.Hyperparams\x12\x1a\n\x0fpad_to_multiple\x18\x05 \x01(\x05:\x01\x31\x12\"\n\x14\x62\x61tch_norm_trainable\x18\x06 \x01(\x08:\x04true')
- ,
- dependencies=[object__detection_dot_protos_dot_anchor__generator__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__coder__pb2.DESCRIPTOR,object__detection_dot_protos_dot_box__predictor__pb2.DESCRIPTOR,object__detection_dot_protos_dot_hyperparams__pb2.DESCRIPTOR,object__detection_dot_protos_dot_image__resizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_matcher__pb2.DESCRIPTOR,object__detection_dot_protos_dot_losses__pb2.DESCRIPTOR,object__detection_dot_protos_dot_post__processing__pb2.DESCRIPTOR,object__detection_dot_protos_dot_region__similarity__calculator__pb2.DESCRIPTOR,])
-
-
-
-
-_SSD = _descriptor.Descriptor(
- name='Ssd',
- full_name='object_detection.protos.Ssd',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='num_classes', full_name='object_detection.protos.Ssd.num_classes', index=0,
- number=1, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='image_resizer', full_name='object_detection.protos.Ssd.image_resizer', index=1,
- number=2, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='feature_extractor', full_name='object_detection.protos.Ssd.feature_extractor', index=2,
- number=3, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='box_coder', full_name='object_detection.protos.Ssd.box_coder', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='matcher', full_name='object_detection.protos.Ssd.matcher', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='similarity_calculator', full_name='object_detection.protos.Ssd.similarity_calculator', index=5,
- number=6, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='box_predictor', full_name='object_detection.protos.Ssd.box_predictor', index=6,
- number=7, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='anchor_generator', full_name='object_detection.protos.Ssd.anchor_generator', index=7,
- number=8, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='post_processing', full_name='object_detection.protos.Ssd.post_processing', index=8,
- number=9, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='normalize_loss_by_num_matches', full_name='object_detection.protos.Ssd.normalize_loss_by_num_matches', index=9,
- number=10, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='loss', full_name='object_detection.protos.Ssd.loss', index=10,
- number=11, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=469,
- serialized_end=1105,
-)
-
-
-_SSDFEATUREEXTRACTOR = _descriptor.Descriptor(
- name='SsdFeatureExtractor',
- full_name='object_detection.protos.SsdFeatureExtractor',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='type', full_name='object_detection.protos.SsdFeatureExtractor.type', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='depth_multiplier', full_name='object_detection.protos.SsdFeatureExtractor.depth_multiplier', index=1,
- number=2, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(1),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='min_depth', full_name='object_detection.protos.SsdFeatureExtractor.min_depth', index=2,
- number=3, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=16,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='conv_hyperparams', full_name='object_detection.protos.SsdFeatureExtractor.conv_hyperparams', index=3,
- number=4, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='pad_to_multiple', full_name='object_detection.protos.SsdFeatureExtractor.pad_to_multiple', index=4,
- number=5, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='batch_norm_trainable', full_name='object_detection.protos.SsdFeatureExtractor.batch_norm_trainable', index=5,
- number=6, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=True,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=1108,
- serialized_end=1323,
-)
-
-_SSD.fields_by_name['image_resizer'].message_type = object__detection_dot_protos_dot_image__resizer__pb2._IMAGERESIZER
-_SSD.fields_by_name['feature_extractor'].message_type = _SSDFEATUREEXTRACTOR
-_SSD.fields_by_name['box_coder'].message_type = object__detection_dot_protos_dot_box__coder__pb2._BOXCODER
-_SSD.fields_by_name['matcher'].message_type = object__detection_dot_protos_dot_matcher__pb2._MATCHER
-_SSD.fields_by_name['similarity_calculator'].message_type = object__detection_dot_protos_dot_region__similarity__calculator__pb2._REGIONSIMILARITYCALCULATOR
-_SSD.fields_by_name['box_predictor'].message_type = object__detection_dot_protos_dot_box__predictor__pb2._BOXPREDICTOR
-_SSD.fields_by_name['anchor_generator'].message_type = object__detection_dot_protos_dot_anchor__generator__pb2._ANCHORGENERATOR
-_SSD.fields_by_name['post_processing'].message_type = object__detection_dot_protos_dot_post__processing__pb2._POSTPROCESSING
-_SSD.fields_by_name['loss'].message_type = object__detection_dot_protos_dot_losses__pb2._LOSS
-_SSDFEATUREEXTRACTOR.fields_by_name['conv_hyperparams'].message_type = object__detection_dot_protos_dot_hyperparams__pb2._HYPERPARAMS
-DESCRIPTOR.message_types_by_name['Ssd'] = _SSD
-DESCRIPTOR.message_types_by_name['SsdFeatureExtractor'] = _SSDFEATUREEXTRACTOR
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Ssd = _reflection.GeneratedProtocolMessageType('Ssd', (_message.Message,), dict(
- DESCRIPTOR = _SSD,
- __module__ = 'object_detection.protos.ssd_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.Ssd)
- ))
-_sym_db.RegisterMessage(Ssd)
-
-SsdFeatureExtractor = _reflection.GeneratedProtocolMessageType('SsdFeatureExtractor', (_message.Message,), dict(
- DESCRIPTOR = _SSDFEATUREEXTRACTOR,
- __module__ = 'object_detection.protos.ssd_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.SsdFeatureExtractor)
- ))
-_sym_db.RegisterMessage(SsdFeatureExtractor)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/string_int_label_map.proto b/object_detection/protos/string_int_label_map.proto
deleted file mode 100644
index 0894183b..00000000
--- a/object_detection/protos/string_int_label_map.proto
+++ /dev/null
@@ -1,24 +0,0 @@
-// Message to store the mapping from class label strings to class id. Datasets
-// use string labels to represent classes while the object detection framework
-// works with class ids. This message maps them so they can be converted back
-// and forth as needed.
-syntax = "proto2";
-
-package object_detection.protos;
-
-message StringIntLabelMapItem {
- // String name. The most common practice is to set this to a MID or synsets
- // id.
- optional string name = 1;
-
- // Integer id that maps to the string name above. Label ids should start from
- // 1.
- optional int32 id = 2;
-
- // Human readable string label.
- optional string display_name = 3;
-};
-
-message StringIntLabelMap {
- repeated StringIntLabelMapItem item = 1;
-};
diff --git a/object_detection/protos/string_int_label_map_pb2.py b/object_detection/protos/string_int_label_map_pb2.py
deleted file mode 100644
index 381d5526..00000000
--- a/object_detection/protos/string_int_label_map_pb2.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/string_int_label_map.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/string_int_label_map.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem')
-)
-
-
-
-
-_STRINGINTLABELMAPITEM = _descriptor.Descriptor(
- name='StringIntLabelMapItem',
- full_name='object_detection.protos.StringIntLabelMapItem',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0,
- number=1, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1,
- number=2, type=5, cpp_type=1, label=1,
- has_default_value=False, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2,
- number=3, type=9, cpp_type=9, label=1,
- has_default_value=False, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=79,
- serialized_end=150,
-)
-
-
-_STRINGINTLABELMAP = _descriptor.Descriptor(
- name='StringIntLabelMap',
- full_name='object_detection.protos.StringIntLabelMap',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0,
- number=1, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=152,
- serialized_end=233,
-)
-
-_STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM
-DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM
-DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict(
- DESCRIPTOR = _STRINGINTLABELMAPITEM,
- __module__ = 'object_detection.protos.string_int_label_map_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
- ))
-_sym_db.RegisterMessage(StringIntLabelMapItem)
-
-StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict(
- DESCRIPTOR = _STRINGINTLABELMAP,
- __module__ = 'object_detection.protos.string_int_label_map_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
- ))
-_sym_db.RegisterMessage(StringIntLabelMap)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/protos/train.proto b/object_detection/protos/train.proto
deleted file mode 100644
index ae905c78..00000000
--- a/object_detection/protos/train.proto
+++ /dev/null
@@ -1,69 +0,0 @@
-syntax = "proto2";
-
-package object_detection.protos;
-
-import "object_detection/protos/optimizer.proto";
-import "object_detection/protos/preprocessor.proto";
-
-// Message for configuring DetectionModel training jobs (train.py).
-message TrainConfig {
- // Input queue batch size.
- optional uint32 batch_size = 1 [default=32];
-
- // Data augmentation options.
- repeated PreprocessingStep data_augmentation_options = 2;
-
- // Whether to synchronize replicas during training.
- optional bool sync_replicas = 3 [default=false];
-
- // How frequently to keep checkpoints.
- optional uint32 keep_checkpoint_every_n_hours = 4 [default=1000];
-
- // Optimizer used to train the DetectionModel.
- optional Optimizer optimizer = 5;
-
- // If greater than 0, clips gradients by this value.
- optional float gradient_clipping_by_norm = 6 [default=0.0];
-
- // Checkpoint to restore variables from. Typically used to load feature
- // extractor variables trained outside of object detection.
- optional string fine_tune_checkpoint = 7 [default=""];
-
- // Specifies if the finetune checkpoint is from an object detection model.
- // If from an object detection model, the model being trained should have
- // the same parameters with the exception of the num_classes parameter.
- // If false, it assumes the checkpoint was a object classification model.
- optional bool from_detection_checkpoint = 8 [default=false];
-
- // Number of steps to train the DetectionModel for. If 0, will train the model
- // indefinitely.
- optional uint32 num_steps = 9 [default=0];
-
- // Number of training steps between replica startup.
- // This flag must be set to 0 if sync_replicas is set to true.
- optional float startup_delay_steps = 10 [default=15];
-
- // If greater than 0, multiplies the gradient of bias variables by this
- // amount.
- optional float bias_grad_multiplier = 11 [default=0];
-
- // Variables that should not be updated during training.
- repeated string freeze_variables = 12;
-
- // Number of replicas to aggregate before making parameter updates.
- optional int32 replicas_to_aggregate = 13 [default=1];
-
- // Maximum number of elements to store within a queue.
- optional int32 batch_queue_capacity = 14 [default=150];
-
- // Number of threads to use for batching.
- optional int32 num_batch_queue_threads = 15 [default=8];
-
- // Maximum capacity of the queue used to prefetch assembled batches.
- optional int32 prefetch_queue_capacity = 16 [default=5];
-
- // If true, boxes with the same coordinates will be merged together.
- // This is useful when each box can have multiple labels.
- // Note that only Sigmoid classification losses should be used.
- optional bool merge_multiple_label_boxes = 17 [default=false];
-}
diff --git a/object_detection/protos/train_pb2.py b/object_detection/protos/train_pb2.py
deleted file mode 100644
index d1309252..00000000
--- a/object_detection/protos/train_pb2.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: object_detection/protos/train.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from object_detection.protos import optimizer_pb2 as object__detection_dot_protos_dot_optimizer__pb2
-from object_detection.protos import preprocessor_pb2 as object__detection_dot_protos_dot_preprocessor__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name='object_detection/protos/train.proto',
- package='object_detection.protos',
- syntax='proto2',
- serialized_pb=_b('\n#object_detection/protos/train.proto\x12\x17object_detection.protos\x1a\'object_detection/protos/optimizer.proto\x1a*object_detection/protos/preprocessor.proto\"\x90\x05\n\x0bTrainConfig\x12\x16\n\nbatch_size\x18\x01 \x01(\r:\x02\x33\x32\x12M\n\x19\x64\x61ta_augmentation_options\x18\x02 \x03(\x0b\x32*.object_detection.protos.PreprocessingStep\x12\x1c\n\rsync_replicas\x18\x03 \x01(\x08:\x05\x66\x61lse\x12+\n\x1dkeep_checkpoint_every_n_hours\x18\x04 \x01(\r:\x04\x31\x30\x30\x30\x12\x35\n\toptimizer\x18\x05 \x01(\x0b\x32\".object_detection.protos.Optimizer\x12$\n\x19gradient_clipping_by_norm\x18\x06 \x01(\x02:\x01\x30\x12\x1e\n\x14\x66ine_tune_checkpoint\x18\x07 \x01(\t:\x00\x12(\n\x19\x66rom_detection_checkpoint\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tnum_steps\x18\t \x01(\r:\x01\x30\x12\x1f\n\x13startup_delay_steps\x18\n \x01(\x02:\x02\x31\x35\x12\x1f\n\x14\x62ias_grad_multiplier\x18\x0b \x01(\x02:\x01\x30\x12\x18\n\x10\x66reeze_variables\x18\x0c \x03(\t\x12 \n\x15replicas_to_aggregate\x18\r \x01(\x05:\x01\x31\x12!\n\x14\x62\x61tch_queue_capacity\x18\x0e \x01(\x05:\x03\x31\x35\x30\x12\"\n\x17num_batch_queue_threads\x18\x0f \x01(\x05:\x01\x38\x12\"\n\x17prefetch_queue_capacity\x18\x10 \x01(\x05:\x01\x35\x12)\n\x1amerge_multiple_label_boxes\x18\x11 \x01(\x08:\x05\x66\x61lse')
- ,
- dependencies=[object__detection_dot_protos_dot_optimizer__pb2.DESCRIPTOR,object__detection_dot_protos_dot_preprocessor__pb2.DESCRIPTOR,])
-
-
-
-
-_TRAINCONFIG = _descriptor.Descriptor(
- name='TrainConfig',
- full_name='object_detection.protos.TrainConfig',
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- fields=[
- _descriptor.FieldDescriptor(
- name='batch_size', full_name='object_detection.protos.TrainConfig.batch_size', index=0,
- number=1, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=32,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='data_augmentation_options', full_name='object_detection.protos.TrainConfig.data_augmentation_options', index=1,
- number=2, type=11, cpp_type=10, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='sync_replicas', full_name='object_detection.protos.TrainConfig.sync_replicas', index=2,
- number=3, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='keep_checkpoint_every_n_hours', full_name='object_detection.protos.TrainConfig.keep_checkpoint_every_n_hours', index=3,
- number=4, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=1000,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='optimizer', full_name='object_detection.protos.TrainConfig.optimizer', index=4,
- number=5, type=11, cpp_type=10, label=1,
- has_default_value=False, default_value=None,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='gradient_clipping_by_norm', full_name='object_detection.protos.TrainConfig.gradient_clipping_by_norm', index=5,
- number=6, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='fine_tune_checkpoint', full_name='object_detection.protos.TrainConfig.fine_tune_checkpoint', index=6,
- number=7, type=9, cpp_type=9, label=1,
- has_default_value=True, default_value=_b("").decode('utf-8'),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='from_detection_checkpoint', full_name='object_detection.protos.TrainConfig.from_detection_checkpoint', index=7,
- number=8, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_steps', full_name='object_detection.protos.TrainConfig.num_steps', index=8,
- number=9, type=13, cpp_type=3, label=1,
- has_default_value=True, default_value=0,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='startup_delay_steps', full_name='object_detection.protos.TrainConfig.startup_delay_steps', index=9,
- number=10, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(15),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='bias_grad_multiplier', full_name='object_detection.protos.TrainConfig.bias_grad_multiplier', index=10,
- number=11, type=2, cpp_type=6, label=1,
- has_default_value=True, default_value=float(0),
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='freeze_variables', full_name='object_detection.protos.TrainConfig.freeze_variables', index=11,
- number=12, type=9, cpp_type=9, label=3,
- has_default_value=False, default_value=[],
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='replicas_to_aggregate', full_name='object_detection.protos.TrainConfig.replicas_to_aggregate', index=12,
- number=13, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=1,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='batch_queue_capacity', full_name='object_detection.protos.TrainConfig.batch_queue_capacity', index=13,
- number=14, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=150,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='num_batch_queue_threads', full_name='object_detection.protos.TrainConfig.num_batch_queue_threads', index=14,
- number=15, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=8,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='prefetch_queue_capacity', full_name='object_detection.protos.TrainConfig.prefetch_queue_capacity', index=15,
- number=16, type=5, cpp_type=1, label=1,
- has_default_value=True, default_value=5,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- _descriptor.FieldDescriptor(
- name='merge_multiple_label_boxes', full_name='object_detection.protos.TrainConfig.merge_multiple_label_boxes', index=16,
- number=17, type=8, cpp_type=7, label=1,
- has_default_value=True, default_value=False,
- message_type=None, enum_type=None, containing_type=None,
- is_extension=False, extension_scope=None,
- options=None, file=DESCRIPTOR),
- ],
- extensions=[
- ],
- nested_types=[],
- enum_types=[
- ],
- options=None,
- is_extendable=False,
- syntax='proto2',
- extension_ranges=[],
- oneofs=[
- ],
- serialized_start=150,
- serialized_end=806,
-)
-
-_TRAINCONFIG.fields_by_name['data_augmentation_options'].message_type = object__detection_dot_protos_dot_preprocessor__pb2._PREPROCESSINGSTEP
-_TRAINCONFIG.fields_by_name['optimizer'].message_type = object__detection_dot_protos_dot_optimizer__pb2._OPTIMIZER
-DESCRIPTOR.message_types_by_name['TrainConfig'] = _TRAINCONFIG
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-TrainConfig = _reflection.GeneratedProtocolMessageType('TrainConfig', (_message.Message,), dict(
- DESCRIPTOR = _TRAINCONFIG,
- __module__ = 'object_detection.protos.train_pb2'
- # @@protoc_insertion_point(class_scope:object_detection.protos.TrainConfig)
- ))
-_sym_db.RegisterMessage(TrainConfig)
-
-
-# @@protoc_insertion_point(module_scope)
diff --git a/object_detection/samples/cloud/cloud.yml b/object_detection/samples/cloud/cloud.yml
deleted file mode 100644
index 495876a1..00000000
--- a/object_detection/samples/cloud/cloud.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-trainingInput:
- runtimeVersion: "1.0"
- scaleTier: CUSTOM
- masterType: standard_gpu
- workerCount: 5
- workerType: standard_gpu
- parameterServerCount: 3
- parameterServerType: standard
-
-
-
diff --git a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config b/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config
deleted file mode 100644
index 8cee5b1a..00000000
--- a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config
+++ /dev/null
@@ -1,147 +0,0 @@
-# Faster R-CNN with Inception Resnet v2, Atrous version;
-# Configured for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- first_stage_features_stride: 8
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 8
- width_stride: 8
- }
- }
- first_stage_atrous_rate: 2
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config b/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
deleted file mode 100644
index b9211464..00000000
--- a/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
+++ /dev/null
@@ -1,146 +0,0 @@
-# Faster R-CNN with Inception Resnet v2, Atrous version;
-# Configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_resnet_v2'
- first_stage_features_stride: 8
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 8
- width_stride: 8
- }
- }
- first_stage_atrous_rate: 2
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config b/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config
deleted file mode 100644
index 8950aa18..00000000
--- a/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config
+++ /dev/null
@@ -1,145 +0,0 @@
-# Faster R-CNN with Inception v2, configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_v2'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0002
- schedule {
- step: 0
- learning_rate: .0002
- }
- schedule {
- step: 900000
- learning_rate: .00002
- }
- schedule {
- step: 1200000
- learning_rate: .000002
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the COCO dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config b/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config
deleted file mode 100644
index 0c3e8bb2..00000000
--- a/object_detection/samples/configs/faster_rcnn_inception_v2_pets.config
+++ /dev/null
@@ -1,145 +0,0 @@
-# Faster R-CNN with Inception v2, configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_inception_v2'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0002
- schedule {
- step: 0
- learning_rate: .0002
- }
- schedule {
- step: 900000
- learning_rate: .00002
- }
- schedule {
- step: 1200000
- learning_rate: .000002
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_nas_coco.config b/object_detection/samples/configs/faster_rcnn_nas_coco.config
deleted file mode 100644
index a32cb033..00000000
--- a/object_detection/samples/configs/faster_rcnn_nas_coco.config
+++ /dev/null
@@ -1,148 +0,0 @@
-# Faster R-CNN with NASNet-A featurization
-# Configured for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- # TODO: Only fixed_shape_resizer is currently supported for NASNet
- # featurization. The reason for this is that nasnet.py only supports
- # inputs with fully known shapes. We need to update nasnet.py to handle
- # shapes not known at compile time.
- fixed_shape_resizer {
- height: 1200
- width: 1200
- }
- }
- feature_extractor {
- type: 'faster_rcnn_nas'
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 50
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 17
- maxpool_kernel_size: 1
- maxpool_stride: 1
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- metrics_set: "pascal_voc_metrics"
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_coco.config b/object_detection/samples/configs/faster_rcnn_resnet101_coco.config
deleted file mode 100644
index ed11bb94..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet101_coco.config
+++ /dev/null
@@ -1,145 +0,0 @@
-# Faster R-CNN with Resnet-101 (v1) configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config b/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config
deleted file mode 100644
index 196d047c..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet101_kitti.config
+++ /dev/null
@@ -1,143 +0,0 @@
-# Faster R-CNN with Resnet-101 (v1)
-# Trained on KITTI dataset (cars and pedestrian), initialized from COCO
-# detection checkpoint.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 2
- image_resizer {
- keep_aspect_ratio_resizer {
- # Raw KITTI images have a resolution of 1242x375, if we wish to resize
- # them to have a height of 600 then their width should be
- # 1242/(375/600)=1987.2
- min_dimension: 600
- max_dimension: 1987
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0001
- schedule {
- step: 0
- learning_rate: .0001
- }
- schedule {
- step: 500000
- learning_rate: .00001
- }
- schedule {
- step: 700000
- learning_rate: .000001
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- num_steps: 800000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- label_map_path: "PATH_TO_BE_CONFIGURED/kitti_label_map.pbtxt"
- tf_record_input_reader: {
- input_path: "PATH_TO_BE_CONFIGURED/kitti_train.tfrecord"
- }
-}
-
-eval_config: {
- metrics_set: "coco_metrics"
- use_moving_averages: false
- num_examples: 500
-}
-
-eval_input_reader: {
- label_map_path: "PATH_TO_BE_CONFIGURED/kitti_label_map.pbtxt"
- tf_record_input_reader: {
- input_path: "PATH_TO_BE_CONFIGURED/kitti_val.tfrecord"
- }
-}
-
diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_pets.config b/object_detection/samples/configs/faster_rcnn_resnet101_pets.config
deleted file mode 100644
index 0a61d641..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet101_pets.config
+++ /dev/null
@@ -1,144 +0,0 @@
-# Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config b/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config
deleted file mode 100644
index e2362241..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet101_voc07.config
+++ /dev/null
@@ -1,137 +0,0 @@
-# Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 20
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0001
- schedule {
- step: 0
- learning_rate: .0001
- }
- schedule {
- step: 500000
- learning_rate: .00001
- }
- schedule {
- step: 700000
- learning_rate: .000001
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- num_steps: 800000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pascal_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 4952
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pascal_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pascal_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet152_coco.config b/object_detection/samples/configs/faster_rcnn_resnet152_coco.config
deleted file mode 100644
index d537b08f..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet152_coco.config
+++ /dev/null
@@ -1,145 +0,0 @@
-# Faster R-CNN with Resnet-152 (v1), configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet152'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet152_pets.config b/object_detection/samples/configs/faster_rcnn_resnet152_pets.config
deleted file mode 100644
index bc6f1aa7..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet152_pets.config
+++ /dev/null
@@ -1,144 +0,0 @@
-# Faster R-CNN with Resnet-152 (v1), configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet152'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet50_coco.config b/object_detection/samples/configs/faster_rcnn_resnet50_coco.config
deleted file mode 100644
index e3257860..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet50_coco.config
+++ /dev/null
@@ -1,145 +0,0 @@
-# Faster R-CNN with Resnet-50 (v1), configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet50'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/faster_rcnn_resnet50_pets.config b/object_detection/samples/configs/faster_rcnn_resnet50_pets.config
deleted file mode 100644
index 042ee13d..00000000
--- a/object_detection/samples/configs/faster_rcnn_resnet50_pets.config
+++ /dev/null
@@ -1,144 +0,0 @@
-# Faster R-CNN with Resnet-50 (v1), configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet50'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- initial_crop_size: 14
- maxpool_kernel_size: 2
- maxpool_stride: 2
- second_stage_box_predictor {
- mask_rcnn_box_predictor {
- use_dropout: false
- dropout_keep_probability: 1.0
- fc_hyperparams {
- op: FC
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- variance_scaling_initializer {
- factor: 1.0
- uniform: true
- mode: FAN_AVG
- }
- }
- }
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/rfcn_resnet101_coco.config b/object_detection/samples/configs/rfcn_resnet101_coco.config
deleted file mode 100644
index 6c383fa7..00000000
--- a/object_detection/samples/configs/rfcn_resnet101_coco.config
+++ /dev/null
@@ -1,142 +0,0 @@
-# R-FCN with Resnet-101 (v1), configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 90
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- second_stage_box_predictor {
- rfcn_box_predictor {
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- crop_height: 18
- crop_width: 18
- num_spatial_bins_height: 3
- num_spatial_bins_width: 3
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/rfcn_resnet101_pets.config b/object_detection/samples/configs/rfcn_resnet101_pets.config
deleted file mode 100644
index 28f3d091..00000000
--- a/object_detection/samples/configs/rfcn_resnet101_pets.config
+++ /dev/null
@@ -1,141 +0,0 @@
-# R-FCN with Resnet-101 (v1), configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- faster_rcnn {
- num_classes: 37
- image_resizer {
- keep_aspect_ratio_resizer {
- min_dimension: 600
- max_dimension: 1024
- }
- }
- feature_extractor {
- type: 'faster_rcnn_resnet101'
- first_stage_features_stride: 16
- }
- first_stage_anchor_generator {
- grid_anchor_generator {
- scales: [0.25, 0.5, 1.0, 2.0]
- aspect_ratios: [0.5, 1.0, 2.0]
- height_stride: 16
- width_stride: 16
- }
- }
- first_stage_box_predictor_conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- first_stage_nms_score_threshold: 0.0
- first_stage_nms_iou_threshold: 0.7
- first_stage_max_proposals: 300
- first_stage_localization_loss_weight: 2.0
- first_stage_objectness_loss_weight: 1.0
- second_stage_box_predictor {
- rfcn_box_predictor {
- conv_hyperparams {
- op: CONV
- regularizer {
- l2_regularizer {
- weight: 0.0
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.01
- }
- }
- }
- crop_height: 18
- crop_width: 18
- num_spatial_bins_height: 3
- num_spatial_bins_width: 3
- }
- }
- second_stage_post_processing {
- batch_non_max_suppression {
- score_threshold: 0.0
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 300
- }
- score_converter: SOFTMAX
- }
- second_stage_localization_loss_weight: 2.0
- second_stage_classification_loss_weight: 1.0
- }
-}
-
-train_config: {
- batch_size: 1
- optimizer {
- momentum_optimizer: {
- learning_rate: {
- manual_step_learning_rate {
- initial_learning_rate: 0.0003
- schedule {
- step: 0
- learning_rate: .0003
- }
- schedule {
- step: 900000
- learning_rate: .00003
- }
- schedule {
- step: 1200000
- learning_rate: .000003
- }
- }
- }
- momentum_optimizer_value: 0.9
- }
- use_moving_average: false
- }
- gradient_clipping_by_norm: 10.0
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/ssd_inception_v2_coco.config b/object_detection/samples/configs/ssd_inception_v2_coco.config
deleted file mode 100644
index 62e7e4f2..00000000
--- a/object_detection/samples/configs/ssd_inception_v2_coco.config
+++ /dev/null
@@ -1,191 +0,0 @@
-# SSD with Inception v2 configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- ssd {
- num_classes: 90
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 6
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- reduce_boxes_in_lowest_layer: true
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 300
- width: 300
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 0
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 3
- box_code_size: 4
- apply_sigmoid_to_scores: false
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- }
- }
- }
- feature_extractor {
- type: 'ssd_inception_v2'
- min_depth: 16
- depth_multiplier: 1.0
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- anchorwise_output: true
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 1.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: 1e-8
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 24
- optimizer {
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
- data_augmentation_options {
- ssd_random_crop {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/ssd_inception_v2_pets.config b/object_detection/samples/configs/ssd_inception_v2_pets.config
deleted file mode 100644
index 41b15880..00000000
--- a/object_detection/samples/configs/ssd_inception_v2_pets.config
+++ /dev/null
@@ -1,190 +0,0 @@
-# SSD with Inception v2 configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- ssd {
- num_classes: 37
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 6
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- reduce_boxes_in_lowest_layer: true
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 300
- width: 300
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 0
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 3
- box_code_size: 4
- apply_sigmoid_to_scores: false
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- }
- }
- }
- feature_extractor {
- type: 'ssd_inception_v2'
- min_depth: 16
- depth_multiplier: 1.0
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- anchorwise_output: true
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 1.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: 1e-8
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 24
- optimizer {
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
- data_augmentation_options {
- ssd_random_crop {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/samples/configs/ssd_mobilenet_v1_coco.config b/object_detection/samples/configs/ssd_mobilenet_v1_coco.config
deleted file mode 100644
index d46a5432..00000000
--- a/object_detection/samples/configs/ssd_mobilenet_v1_coco.config
+++ /dev/null
@@ -1,197 +0,0 @@
-# SSD with Mobilenet v1 configuration for MSCOCO Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- ssd {
- num_classes: 90
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 6
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 300
- width: 300
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 0
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 1
- box_code_size: 4
- apply_sigmoid_to_scores: false
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- }
- feature_extractor {
- type: 'ssd_mobilenet_v1'
- min_depth: 16
- depth_multiplier: 1.0
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- anchorwise_output: true
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 1.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: 1e-8
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 24
- optimizer {
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
- data_augmentation_options {
- ssd_random_crop {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 8000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
- shuffle: false
- num_readers: 1
- num_epochs: 1
-}
diff --git a/object_detection/samples/configs/ssd_mobilenet_v1_pets.config b/object_detection/samples/configs/ssd_mobilenet_v1_pets.config
deleted file mode 100644
index a6741357..00000000
--- a/object_detection/samples/configs/ssd_mobilenet_v1_pets.config
+++ /dev/null
@@ -1,196 +0,0 @@
-# SSD with Mobilenet v1, configured for Oxford-IIIT Pets Dataset.
-# Users should configure the fine_tune_checkpoint field in the train config as
-# well as the label_map_path and input_path fields in the train_input_reader and
-# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
-# should be configured.
-
-model {
- ssd {
- num_classes: 37
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 6
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 300
- width: 300
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 0
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 1
- box_code_size: 4
- apply_sigmoid_to_scores: false
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- }
- feature_extractor {
- type: 'ssd_mobilenet_v1'
- min_depth: 16
- depth_multiplier: 1.0
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- anchorwise_output: true
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- anchorwise_output: true
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 1.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: 1e-8
- iou_threshold: 0.6
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 24
- optimizer {
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.004
- decay_steps: 800720
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
- fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
- from_detection_checkpoint: true
- # Note: The below line limits the training process to 200K steps, which we
- # empirically found to be sufficient enough to train the pets dataset. This
- # effectively bypasses the learning rate schedule (the learning rate will
- # never decay). Remove the below line to train indefinitely.
- num_steps: 200000
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
- data_augmentation_options {
- ssd_random_crop {
- }
- }
-}
-
-train_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
-}
-
-eval_config: {
- num_examples: 2000
- # Note: The below line limits the evaluation process to 10 evaluations.
- # Remove the below line to evaluate indefinitely.
- max_evals: 10
-}
-
-eval_input_reader: {
- tf_record_input_reader {
- input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
- }
- label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
- shuffle: false
- num_readers: 1
-}
diff --git a/object_detection/test_images/image1.jpg b/object_detection/test_images/image1.jpg
deleted file mode 100644
index 8b20d8af..00000000
Binary files a/object_detection/test_images/image1.jpg and /dev/null differ
diff --git a/object_detection/test_images/image2.jpg b/object_detection/test_images/image2.jpg
deleted file mode 100644
index 9eb325ac..00000000
Binary files a/object_detection/test_images/image2.jpg and /dev/null differ
diff --git a/object_detection/test_images/image_info.txt b/object_detection/test_images/image_info.txt
deleted file mode 100644
index 6f805cbc..00000000
--- a/object_detection/test_images/image_info.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-
-Image provenance:
-image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg
-image2.jpg: Michael Miley,
- https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4
-
diff --git a/object_detection/train.py b/object_detection/train.py
deleted file mode 100644
index faab1acc..00000000
--- a/object_detection/train.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Training executable for detection models.
-
-This executable is used to train DetectionModels. There are two ways of
-configuring the training job:
-
-1) A single pipeline_pb2.TrainEvalPipelineConfig configuration file
-can be specified by --pipeline_config_path.
-
-Example usage:
- ./train \
- --logtostderr \
- --train_dir=path/to/train_dir \
- --pipeline_config_path=pipeline_config.pbtxt
-
-2) Three configuration files can be provided: a model_pb2.DetectionModel
-configuration file to define what type of DetectionModel is being trained, an
-input_reader_pb2.InputReader file to specify what training data will be used and
-a train_pb2.TrainConfig file to configure training parameters.
-
-Example usage:
- ./train \
- --logtostderr \
- --train_dir=path/to/train_dir \
- --model_config_path=model_config.pbtxt \
- --train_config_path=train_config.pbtxt \
- --input_config_path=train_input_config.pbtxt
-"""
-
-import functools
-import json
-import os
-import tensorflow as tf
-
-from object_detection import trainer
-from object_detection.builders import input_reader_builder
-from object_detection.builders import model_builder
-from object_detection.utils import config_util
-
-tf.logging.set_verbosity(tf.logging.INFO)
-
-flags = tf.app.flags
-flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
-flags.DEFINE_integer('task', 0, 'task id')
-flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
-flags.DEFINE_boolean('clone_on_cpu', False,
- 'Force clones to be deployed on CPU. Note that even if '
- 'set to False (allowing ops to run on gpu), some ops may '
- 'still be run on the CPU if they have no GPU kernel.')
-flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
- 'replicas.')
-flags.DEFINE_integer('ps_tasks', 0,
- 'Number of parameter server tasks. If None, does not use '
- 'a parameter server.')
-flags.DEFINE_string('train_dir', '',
- 'Directory to save the checkpoints and training summaries.')
-
-flags.DEFINE_string('pipeline_config_path', '',
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file. If provided, other configs are ignored')
-
-flags.DEFINE_string('train_config_path', '',
- 'Path to a train_pb2.TrainConfig config file.')
-flags.DEFINE_string('input_config_path', '',
- 'Path to an input_reader_pb2.InputReader config file.')
-flags.DEFINE_string('model_config_path', '',
- 'Path to a model_pb2.DetectionModel config file.')
-
-FLAGS = flags.FLAGS
-
-
-def main(_):
- assert FLAGS.train_dir, '`train_dir` is missing.'
- if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)
- if FLAGS.pipeline_config_path:
- configs = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
- if FLAGS.task == 0:
- tf.gfile.Copy(FLAGS.pipeline_config_path,
- os.path.join(FLAGS.train_dir, 'pipeline.config'),
- overwrite=True)
- else:
- configs = config_util.get_configs_from_multiple_files(
- model_config_path=FLAGS.model_config_path,
- train_config_path=FLAGS.train_config_path,
- train_input_config_path=FLAGS.input_config_path)
- if FLAGS.task == 0:
- for name, config in [('model.config', FLAGS.model_config_path),
- ('train.config', FLAGS.train_config_path),
- ('input.config', FLAGS.input_config_path)]:
- tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name),
- overwrite=True)
-
- model_config = configs['model']
- train_config = configs['train_config']
- input_config = configs['train_input_config']
-
- model_fn = functools.partial(
- model_builder.build,
- model_config=model_config,
- is_training=True)
-
- create_input_dict_fn = functools.partial(
- input_reader_builder.build, input_config)
-
- env = json.loads(os.environ.get('TF_CONFIG', '{}'))
- cluster_data = env.get('cluster', None)
- cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
- task_data = env.get('task', None) or {'type': 'master', 'index': 0}
- task_info = type('TaskSpec', (object,), task_data)
-
- # Parameters for a single worker.
- ps_tasks = 0
- worker_replicas = 1
- worker_job_name = 'lonely_worker'
- task = 0
- is_chief = True
- master = ''
-
- if cluster_data and 'worker' in cluster_data:
- # Number of total worker replicas include "worker"s and the "master".
- worker_replicas = len(cluster_data['worker']) + 1
- if cluster_data and 'ps' in cluster_data:
- ps_tasks = len(cluster_data['ps'])
-
- if worker_replicas > 1 and ps_tasks < 1:
- raise ValueError('At least 1 ps task is needed for distributed training.')
-
- if worker_replicas >= 1 and ps_tasks > 0:
- # Set up distributed training.
- server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
- job_name=task_info.type,
- task_index=task_info.index)
- if task_info.type == 'ps':
- server.join()
- return
-
- worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
- task = task_info.index
- is_chief = (task_info.type == 'master')
- master = server.target
-
- trainer.train(create_input_dict_fn, model_fn, train_config, master, task,
- FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks,
- worker_job_name, is_chief, FLAGS.train_dir)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/object_detection/trainer.py b/object_detection/trainer.py
deleted file mode 100644
index ea91777b..00000000
--- a/object_detection/trainer.py
+++ /dev/null
@@ -1,332 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Detection model trainer.
-
-This file provides a generic training method that can be used to train a
-DetectionModel.
-"""
-
-import functools
-
-import tensorflow as tf
-
-from object_detection.builders import optimizer_builder
-from object_detection.builders import preprocessor_builder
-from object_detection.core import batcher
-from object_detection.core import preprocessor
-from object_detection.core import standard_fields as fields
-from object_detection.utils import ops as util_ops
-from object_detection.utils import variables_helper
-from deployment import model_deploy
-
-slim = tf.contrib.slim
-
-
-def create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
- batch_queue_capacity, num_batch_queue_threads,
- prefetch_queue_capacity, data_augmentation_options):
- """Sets up reader, prefetcher and returns input queue.
-
- Args:
- batch_size_per_clone: batch size to use per clone.
- create_tensor_dict_fn: function to create tensor dictionary.
- batch_queue_capacity: maximum number of elements to store within a queue.
- num_batch_queue_threads: number of threads to use for batching.
- prefetch_queue_capacity: maximum capacity of the queue used to prefetch
- assembled batches.
- data_augmentation_options: a list of tuples, where each tuple contains a
- data augmentation function and a dictionary containing arguments and their
- values (see preprocessor.py).
-
- Returns:
- input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
- (which hold images, boxes and targets). To get a batch of tensor_dicts,
- call input_queue.Dequeue().
- """
- tensor_dict = create_tensor_dict_fn()
-
- tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
- tensor_dict[fields.InputDataFields.image], 0)
-
- images = tensor_dict[fields.InputDataFields.image]
- float_images = tf.to_float(images)
- tensor_dict[fields.InputDataFields.image] = float_images
-
- include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks
- in tensor_dict)
- include_keypoints = (fields.InputDataFields.groundtruth_keypoints
- in tensor_dict)
- if data_augmentation_options:
- tensor_dict = preprocessor.preprocess(
- tensor_dict, data_augmentation_options,
- func_arg_map=preprocessor.get_default_func_arg_map(
- include_instance_masks=include_instance_masks,
- include_keypoints=include_keypoints))
-
- input_queue = batcher.BatchQueue(
- tensor_dict,
- batch_size=batch_size_per_clone,
- batch_queue_capacity=batch_queue_capacity,
- num_batch_queue_threads=num_batch_queue_threads,
- prefetch_queue_capacity=prefetch_queue_capacity)
- return input_queue
-
-
-def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
- """Dequeues batch and constructs inputs to object detection model.
-
- Args:
- input_queue: BatchQueue object holding enqueued tensor_dicts.
- num_classes: Number of classes.
- merge_multiple_label_boxes: Whether to merge boxes with multiple labels
- or not. Defaults to false. Merged boxes are represented with a single
- box and a k-hot encoding of the multiple labels associated with the
- boxes.
-
- Returns:
- images: a list of 3-D float tensor of images.
- image_keys: a list of string keys for the images.
- locations_list: a list of tensors of shape [num_boxes, 4]
- containing the corners of the groundtruth boxes.
- classes_list: a list of padded one-hot tensors containing target classes.
- masks_list: a list of 3-D float tensors of shape [num_boxes, image_height,
- image_width] containing instance masks for objects if present in the
- input_queue. Else returns None.
- keypoints_list: a list of 3-D float tensors of shape [num_boxes,
- num_keypoints, 2] containing keypoints for objects if present in the
- input queue. Else returns None.
- """
- read_data_list = input_queue.dequeue()
- label_id_offset = 1
- def extract_images_and_targets(read_data):
- """Extract images and targets from the input dict."""
- image = read_data[fields.InputDataFields.image]
- key = ''
- if fields.InputDataFields.source_id in read_data:
- key = read_data[fields.InputDataFields.source_id]
- location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
- classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
- tf.int32)
- classes_gt -= label_id_offset
- if merge_multiple_label_boxes:
- location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
- location_gt, classes_gt, num_classes)
- else:
- classes_gt = util_ops.padded_one_hot_encoding(
- indices=classes_gt, depth=num_classes, left_pad=0)
- masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks)
- keypoints_gt = read_data.get(fields.InputDataFields.groundtruth_keypoints)
- if (merge_multiple_label_boxes and (
- masks_gt is not None or keypoints_gt is not None)):
- raise NotImplementedError('Multi-label support is only for boxes.')
- return image, key, location_gt, classes_gt, masks_gt, keypoints_gt
-
- return zip(*map(extract_images_and_targets, read_data_list))
-
-
-def _create_losses(input_queue, create_model_fn, train_config):
- """Creates loss function for a DetectionModel.
-
- Args:
- input_queue: BatchQueue object holding enqueued tensor_dicts.
- create_model_fn: A function to create the DetectionModel.
- train_config: a train_pb2.TrainConfig protobuf.
- """
- detection_model = create_model_fn()
- (images, _, groundtruth_boxes_list, groundtruth_classes_list,
- groundtruth_masks_list, groundtruth_keypoints_list) = get_inputs(
- input_queue,
- detection_model.num_classes,
- train_config.merge_multiple_label_boxes)
- images = [detection_model.preprocess(image) for image in images]
- images = tf.concat(images, 0)
- if any(mask is None for mask in groundtruth_masks_list):
- groundtruth_masks_list = None
- if any(keypoints is None for keypoints in groundtruth_keypoints_list):
- groundtruth_keypoints_list = None
-
- detection_model.provide_groundtruth(groundtruth_boxes_list,
- groundtruth_classes_list,
- groundtruth_masks_list,
- groundtruth_keypoints_list)
- prediction_dict = detection_model.predict(images)
-
- losses_dict = detection_model.loss(prediction_dict)
- for loss_tensor in losses_dict.values():
- tf.losses.add_loss(loss_tensor)
-
-
-def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
- num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name,
- is_chief, train_dir):
- """Training function for detection models.
-
- Args:
- create_tensor_dict_fn: a function to create a tensor input dictionary.
- create_model_fn: a function that creates a DetectionModel and generates
- losses.
- train_config: a train_pb2.TrainConfig protobuf.
- master: BNS name of the TensorFlow master to use.
- task: The task id of this training instance.
- num_clones: The number of clones to run per machine.
- worker_replicas: The number of work replicas to train with.
- clone_on_cpu: True if clones should be forced to run on CPU.
- ps_tasks: Number of parameter server tasks.
- worker_job_name: Name of the worker job.
- is_chief: Whether this replica is the chief replica.
- train_dir: Directory to write checkpoints and training summaries to.
- """
-
- detection_model = create_model_fn()
- data_augmentation_options = [
- preprocessor_builder.build(step)
- for step in train_config.data_augmentation_options]
-
- with tf.Graph().as_default():
- # Build a configuration specifying multi-GPU and multi-replicas.
- deploy_config = model_deploy.DeploymentConfig(
- num_clones=num_clones,
- clone_on_cpu=clone_on_cpu,
- replica_id=task,
- num_replicas=worker_replicas,
- num_ps_tasks=ps_tasks,
- worker_job_name=worker_job_name)
-
- # Place the global step on the device storing the variables.
- with tf.device(deploy_config.variables_device()):
- global_step = slim.create_global_step()
-
- with tf.device(deploy_config.inputs_device()):
- input_queue = create_input_queue(
- train_config.batch_size // num_clones, create_tensor_dict_fn,
- train_config.batch_queue_capacity,
- train_config.num_batch_queue_threads,
- train_config.prefetch_queue_capacity, data_augmentation_options)
-
- # Gather initial summaries.
- # TODO(rathodv): See if summaries can be added/extracted from global tf
- # collections so that they don't have to be passed around.
- summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
- global_summaries = set([])
-
- model_fn = functools.partial(_create_losses,
- create_model_fn=create_model_fn,
- train_config=train_config)
- clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue])
- first_clone_scope = clones[0].scope
-
- # Gather update_ops from the first clone. These contain, for example,
- # the updates for the batch_norm variables created by model_fn.
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
-
- with tf.device(deploy_config.optimizer_device()):
- training_optimizer = optimizer_builder.build(train_config.optimizer,
- global_summaries)
-
- sync_optimizer = None
- if train_config.sync_replicas:
- training_optimizer = tf.SyncReplicasOptimizer(
- training_optimizer,
- replicas_to_aggregate=train_config.replicas_to_aggregate,
- total_num_replicas=train_config.worker_replicas)
- sync_optimizer = training_optimizer
-
- # Create ops required to initialize the model from a given checkpoint.
- init_fn = None
- if train_config.fine_tune_checkpoint:
- var_map = detection_model.restore_map(
- from_detection_checkpoint=train_config.from_detection_checkpoint)
- available_var_map = (variables_helper.
- get_variables_available_in_checkpoint(
- var_map, train_config.fine_tune_checkpoint))
- init_saver = tf.train.Saver(available_var_map)
- def initializer_fn(sess):
- init_saver.restore(sess, train_config.fine_tune_checkpoint)
- init_fn = initializer_fn
-
- with tf.device(deploy_config.optimizer_device()):
- total_loss, grads_and_vars = model_deploy.optimize_clones(
- clones, training_optimizer, regularization_losses=None)
- total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')
-
- # Optionally multiply bias gradients by train_config.bias_grad_multiplier.
- if train_config.bias_grad_multiplier:
- biases_regex_list = ['.*/biases']
- grads_and_vars = variables_helper.multiply_gradients_matching_regex(
- grads_and_vars,
- biases_regex_list,
- multiplier=train_config.bias_grad_multiplier)
-
- # Optionally freeze some layers by setting their gradients to be zero.
- if train_config.freeze_variables:
- grads_and_vars = variables_helper.freeze_gradients_matching_regex(
- grads_and_vars, train_config.freeze_variables)
-
- # Optionally clip gradients
- if train_config.gradient_clipping_by_norm > 0:
- with tf.name_scope('clip_grads'):
- grads_and_vars = slim.learning.clip_gradient_norms(
- grads_and_vars, train_config.gradient_clipping_by_norm)
-
- # Create gradient updates.
- grad_updates = training_optimizer.apply_gradients(grads_and_vars,
- global_step=global_step)
- update_ops.append(grad_updates)
-
- update_op = tf.group(*update_ops)
- with tf.control_dependencies([update_op]):
- train_tensor = tf.identity(total_loss, name='train_op')
-
- # Add summaries.
- for model_var in slim.get_model_variables():
- global_summaries.add(tf.summary.histogram(model_var.op.name, model_var))
- for loss_tensor in tf.losses.get_losses():
- global_summaries.add(tf.summary.scalar(loss_tensor.op.name, loss_tensor))
- global_summaries.add(
- tf.summary.scalar('TotalLoss', tf.losses.get_total_loss()))
-
- # Add the summaries from the first clone. These contain the summaries
- # created by model_fn and either optimize_clones() or _gather_clone_loss().
- summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
- first_clone_scope))
- summaries |= global_summaries
-
- # Merge all summaries together.
- summary_op = tf.summary.merge(list(summaries), name='summary_op')
-
- # Soft placement allows placing on CPU ops without GPU implementation.
- session_config = tf.ConfigProto(allow_soft_placement=True,
- log_device_placement=False)
-
- # Save checkpoints regularly.
- keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours
- saver = tf.train.Saver(
- keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
-
- slim.learning.train(
- train_tensor,
- logdir=train_dir,
- master=master,
- is_chief=is_chief,
- session_config=session_config,
- startup_delay_steps=train_config.startup_delay_steps,
- init_fn=init_fn,
- summary_op=summary_op,
- number_of_steps=(
- train_config.num_steps if train_config.num_steps else None),
- save_summaries_secs=120,
- sync_optimizer=sync_optimizer,
- saver=saver)
diff --git a/object_detection/trainer_test.py b/object_detection/trainer_test.py
deleted file mode 100644
index caa8c1eb..00000000
--- a/object_detection/trainer_test.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.trainer."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection import trainer
-from object_detection.core import losses
-from object_detection.core import model
-from object_detection.core import standard_fields as fields
-from object_detection.protos import train_pb2
-
-
-NUMBER_OF_CLASSES = 2
-
-
-def get_input_function():
- """A function to get test inputs. Returns an image with one box."""
- image = tf.random_uniform([32, 32, 3], dtype=tf.float32)
- key = tf.constant('image_000000')
- class_label = tf.random_uniform(
- [1], minval=0, maxval=NUMBER_OF_CLASSES, dtype=tf.int32)
- box_label = tf.random_uniform(
- [1, 4], minval=0.4, maxval=0.6, dtype=tf.float32)
-
- return {
- fields.InputDataFields.image: image,
- fields.InputDataFields.key: key,
- fields.InputDataFields.groundtruth_classes: class_label,
- fields.InputDataFields.groundtruth_boxes: box_label
- }
-
-
-class FakeDetectionModel(model.DetectionModel):
- """A simple (and poor) DetectionModel for use in test."""
-
- def __init__(self):
- super(FakeDetectionModel, self).__init__(num_classes=NUMBER_OF_CLASSES)
- self._classification_loss = losses.WeightedSigmoidClassificationLoss(
- anchorwise_output=True)
- self._localization_loss = losses.WeightedSmoothL1LocalizationLoss(
- anchorwise_output=True)
-
- def preprocess(self, inputs):
- """Input preprocessing, resizes images to 28x28.
-
- Args:
- inputs: a [batch, height_in, width_in, channels] float32 tensor
- representing a batch of images with values between 0 and 255.0.
-
- Returns:
- preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
- """
- return tf.image.resize_images(inputs, [28, 28])
-
- def predict(self, preprocessed_inputs):
- """Prediction tensors from inputs tensor.
-
- Args:
- preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
-
- Returns:
- prediction_dict: a dictionary holding prediction tensors to be
- passed to the Loss or Postprocess functions.
- """
- flattened_inputs = tf.contrib.layers.flatten(preprocessed_inputs)
- class_prediction = tf.contrib.layers.fully_connected(
- flattened_inputs, self._num_classes)
- box_prediction = tf.contrib.layers.fully_connected(flattened_inputs, 4)
-
- return {
- 'class_predictions_with_background': tf.reshape(
- class_prediction, [-1, 1, self._num_classes]),
- 'box_encodings': tf.reshape(box_prediction, [-1, 1, 4])
- }
-
- def postprocess(self, prediction_dict, **params):
- """Convert predicted output tensors to final detections. Unused.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors.
- **params: Additional keyword arguments for specific implementations of
- DetectionModel.
-
- Returns:
- detections: a dictionary with empty fields.
- """
- return {
- 'detection_boxes': None,
- 'detection_scores': None,
- 'detection_classes': None,
- 'num_detections': None
- }
-
- def loss(self, prediction_dict):
- """Compute scalar loss tensors with respect to provided groundtruth.
-
- Calling this function requires that groundtruth tensors have been
- provided via the provide_groundtruth function.
-
- Args:
- prediction_dict: a dictionary holding predicted tensors
-
- Returns:
- a dictionary mapping strings (loss names) to scalar tensors representing
- loss values.
- """
- batch_reg_targets = tf.stack(
- self.groundtruth_lists(fields.BoxListFields.boxes))
- batch_cls_targets = tf.stack(
- self.groundtruth_lists(fields.BoxListFields.classes))
- weights = tf.constant(
- 1.0, dtype=tf.float32,
- shape=[len(self.groundtruth_lists(fields.BoxListFields.boxes)), 1])
-
- location_losses = self._localization_loss(
- prediction_dict['box_encodings'], batch_reg_targets,
- weights=weights)
- cls_losses = self._classification_loss(
- prediction_dict['class_predictions_with_background'], batch_cls_targets,
- weights=weights)
-
- loss_dict = {
- 'localization_loss': tf.reduce_sum(location_losses),
- 'classification_loss': tf.reduce_sum(cls_losses),
- }
- return loss_dict
-
- def restore_map(self, from_detection_checkpoint=True):
- """Returns a map of variables to load from a foreign checkpoint.
-
- Args:
- from_detection_checkpoint: whether to restore from a full detection
- checkpoint (with compatible variable names) or to restore from a
- classification checkpoint for initialization prior to training.
-
- Returns:
- A dict mapping variable names to variables.
- """
- return {var.op.name: var for var in tf.global_variables()}
-
-
-class TrainerTest(tf.test.TestCase):
-
- def test_configure_trainer_and_train_two_steps(self):
- train_config_text_proto = """
- optimizer {
- adam_optimizer {
- learning_rate {
- constant_learning_rate {
- learning_rate: 0.01
- }
- }
- }
- }
- data_augmentation_options {
- random_adjust_brightness {
- max_delta: 0.2
- }
- }
- data_augmentation_options {
- random_adjust_contrast {
- min_delta: 0.7
- max_delta: 1.1
- }
- }
- num_steps: 2
- """
- train_config = train_pb2.TrainConfig()
- text_format.Merge(train_config_text_proto, train_config)
-
- train_dir = self.get_temp_dir()
-
- trainer.train(create_tensor_dict_fn=get_input_function,
- create_model_fn=FakeDetectionModel,
- train_config=train_config,
- master='',
- task=0,
- num_clones=1,
- worker_replicas=1,
- clone_on_cpu=True,
- ps_tasks=0,
- worker_job_name='worker',
- is_chief=True,
- train_dir=train_dir)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/BUILD b/object_detection/utils/BUILD
deleted file mode 100644
index 7e511c95..00000000
--- a/object_detection/utils/BUILD
+++ /dev/null
@@ -1,324 +0,0 @@
-# Tensorflow Object Detection API: Utility functions.
-
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_library(
- name = "category_util",
- srcs = ["category_util.py"],
- deps = ["//tensorflow"],
-)
-
-py_library(
- name = "config_util",
- srcs = ["config_util.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:eval_py_pb2",
- "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
- "//tensorflow_models/object_detection/protos:model_py_pb2",
- "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
- "//tensorflow_models/object_detection/protos:train_py_pb2",
- ],
-)
-
-py_library(
- name = "dataset_util",
- srcs = ["dataset_util.py"],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "label_map_util",
- srcs = ["label_map_util.py"],
- deps = [
- "//third_party/py/google/protobuf",
- "//tensorflow",
- "//tensorflow_models/object_detection/protos:string_int_label_map_py_pb2",
- ],
-)
-
-py_library(
- name = "learning_schedules",
- srcs = ["learning_schedules.py"],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "metrics",
- srcs = ["metrics.py"],
- deps = ["//third_party/py/numpy"],
-)
-
-py_library(
- name = "np_box_list",
- srcs = ["np_box_list.py"],
- deps = ["//tensorflow"],
-)
-
-py_library(
- name = "np_box_list_ops",
- srcs = ["np_box_list_ops.py"],
- deps = [
- ":np_box_list",
- ":np_box_ops",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "np_box_ops",
- srcs = ["np_box_ops.py"],
- deps = ["//tensorflow"],
-)
-
-py_library(
- name = "object_detection_evaluation",
- srcs = ["object_detection_evaluation.py"],
- deps = [
- ":label_map_util",
- ":metrics",
- ":per_image_evaluation",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_library(
- name = "ops",
- srcs = ["ops.py"],
- deps = [
- ":static_shape",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:box_list_ops",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_library(
- name = "per_image_evaluation",
- srcs = ["per_image_evaluation.py"],
- deps = [
- ":np_box_list",
- ":np_box_list_ops",
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "shape_utils",
- srcs = ["shape_utils.py"],
- deps = ["//tensorflow"],
-)
-
-py_library(
- name = "static_shape",
- srcs = ["static_shape.py"],
- deps = [],
-)
-
-py_library(
- name = "test_utils",
- srcs = ["test_utils.py"],
- deps = [
- "//tensorflow",
- "//tensorflow_models/object_detection/core:anchor_generator",
- "//tensorflow_models/object_detection/core:box_coder",
- "//tensorflow_models/object_detection/core:box_list",
- "//tensorflow_models/object_detection/core:box_predictor",
- "//tensorflow_models/object_detection/core:matcher",
- "//tensorflow_models/object_detection/utils:shape_utils",
- ],
-)
-
-py_library(
- name = "variables_helper",
- srcs = ["variables_helper.py"],
- deps = [
- "//tensorflow",
- ],
-)
-
-py_library(
- name = "visualization_utils",
- srcs = ["visualization_utils.py"],
- deps = [
- "//third_party/py/PIL:pil",
- "//third_party/py/matplotlib",
- "//third_party/py/six",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "category_util_test",
- srcs = ["category_util_test.py"],
- deps = [
- ":category_util",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "config_util_test",
- srcs = ["config_util_test.py"],
- deps = [
- ":config_util",
- "//tensorflow:tensorflow_google",
- "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
- "//tensorflow_models/object_detection/protos:model_py_pb2",
- "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
- "//tensorflow_models/object_detection/protos:train_py_pb2",
- ],
-)
-
-py_test(
- name = "dataset_util_test",
- srcs = ["dataset_util_test.py"],
- deps = [
- ":dataset_util",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "label_map_util_test",
- srcs = ["label_map_util_test.py"],
- deps = [
- ":label_map_util",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "learning_schedules_test",
- srcs = ["learning_schedules_test.py"],
- deps = [
- ":learning_schedules",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "metrics_test",
- srcs = ["metrics_test.py"],
- deps = [
- ":metrics",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "np_box_list_test",
- srcs = ["np_box_list_test.py"],
- deps = [
- ":np_box_list",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "np_box_list_ops_test",
- srcs = ["np_box_list_ops_test.py"],
- deps = [
- ":np_box_list",
- ":np_box_list_ops",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "np_box_ops_test",
- srcs = ["np_box_ops_test.py"],
- deps = [
- ":np_box_ops",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "object_detection_evaluation_test",
- srcs = ["object_detection_evaluation_test.py"],
- deps = [
- ":object_detection_evaluation",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_test(
- name = "ops_test",
- srcs = ["ops_test.py"],
- deps = [
- ":ops",
- "//tensorflow",
- "//tensorflow_models/object_detection/core:standard_fields",
- ],
-)
-
-py_test(
- name = "per_image_evaluation_test",
- srcs = ["per_image_evaluation_test.py"],
- deps = [
- ":per_image_evaluation",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "shape_utils_test",
- srcs = ["shape_utils_test.py"],
- deps = [
- ":shape_utils",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "static_shape_test",
- srcs = ["static_shape_test.py"],
- deps = [
- ":static_shape",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "test_utils_test",
- srcs = ["test_utils_test.py"],
- deps = [
- ":test_utils",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "variables_helper_test",
- srcs = ["variables_helper_test.py"],
- deps = [
- ":variables_helper",
- "//tensorflow",
- ],
-)
-
-py_test(
- name = "visualization_utils_test",
- srcs = ["visualization_utils_test.py"],
- data = [
- "//tensorflow_models/object_detection/test_images:image1.jpg",
- ],
- deps = [
- ":visualization_utils",
- "//third_party/py/PIL:pil",
- ],
-)
diff --git a/object_detection/utils/__init__.py b/object_detection/utils/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/object_detection/utils/__pycache__/__init__.cpython-35.pyc b/object_detection/utils/__pycache__/__init__.cpython-35.pyc
deleted file mode 100644
index d87697a2..00000000
Binary files a/object_detection/utils/__pycache__/__init__.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc b/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc
deleted file mode 100644
index 9d680ab0..00000000
Binary files a/object_detection/utils/__pycache__/dataset_util.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc b/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc
deleted file mode 100644
index a26f73b2..00000000
Binary files a/object_detection/utils/__pycache__/label_map_util.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/ops.cpython-35.pyc b/object_detection/utils/__pycache__/ops.cpython-35.pyc
deleted file mode 100644
index 66cbd75d..00000000
Binary files a/object_detection/utils/__pycache__/ops.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc b/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc
deleted file mode 100644
index 0b99fd7f..00000000
Binary files a/object_detection/utils/__pycache__/shape_utils.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/static_shape.cpython-35.pyc b/object_detection/utils/__pycache__/static_shape.cpython-35.pyc
deleted file mode 100644
index 2e303314..00000000
Binary files a/object_detection/utils/__pycache__/static_shape.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc b/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc
deleted file mode 100644
index c7fd5b5a..00000000
Binary files a/object_detection/utils/__pycache__/visualization_utils.cpython-35.pyc and /dev/null differ
diff --git a/object_detection/utils/category_util.py b/object_detection/utils/category_util.py
deleted file mode 100644
index fdd9c1c1..00000000
--- a/object_detection/utils/category_util.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions for importing/exporting Object Detection categories."""
-import csv
-
-import tensorflow as tf
-
-
-def load_categories_from_csv_file(csv_path):
- """Loads categories from a csv file.
-
- The CSV file should have one comma delimited numeric category id and string
- category name pair per line. For example:
-
- 0,"cat"
- 1,"dog"
- 2,"bird"
- ...
-
- Args:
- csv_path: Path to the csv file to be parsed into categories.
- Returns:
- categories: A list of dictionaries representing all possible categories.
- The categories will contain an integer 'id' field and a string
- 'name' field.
- Raises:
- ValueError: If the csv file is incorrectly formatted.
- """
- categories = []
-
- with tf.gfile.Open(csv_path, 'r') as csvfile:
- reader = csv.reader(csvfile, delimiter=',', quotechar='"')
- for row in reader:
- if not row:
- continue
-
- if len(row) != 2:
- raise ValueError('Expected 2 fields per row in csv: %s' % ','.join(row))
-
- category_id = int(row[0])
- category_name = row[1]
- categories.append({'id': category_id, 'name': category_name})
-
- return categories
-
-
-def save_categories_to_csv_file(categories, csv_path):
- """Saves categories to a csv file.
-
- Args:
- categories: A list of dictionaries representing categories to save to file.
- Each category must contain an 'id' and 'name' field.
- csv_path: Path to the csv file to be parsed into categories.
- """
- categories.sort(key=lambda x: x['id'])
- with tf.gfile.Open(csv_path, 'w') as csvfile:
- writer = csv.writer(csvfile, delimiter=',', quotechar='"')
- for category in categories:
- writer.writerow([category['id'], category['name']])
diff --git a/object_detection/utils/category_util_test.py b/object_detection/utils/category_util_test.py
deleted file mode 100644
index 9c99079e..00000000
--- a/object_detection/utils/category_util_test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.category_util."""
-import os
-
-import tensorflow as tf
-
-from object_detection.utils import category_util
-
-
-class EvalUtilTest(tf.test.TestCase):
-
- def test_load_categories_from_csv_file(self):
- csv_data = """
- 0,"cat"
- 1,"dog"
- 2,"bird"
- """.strip(' ')
- csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
- with tf.gfile.Open(csv_path, 'wb') as f:
- f.write(csv_data)
-
- categories = category_util.load_categories_from_csv_file(csv_path)
- self.assertTrue({'id': 0, 'name': 'cat'} in categories)
- self.assertTrue({'id': 1, 'name': 'dog'} in categories)
- self.assertTrue({'id': 2, 'name': 'bird'} in categories)
-
- def test_save_categories_to_csv_file(self):
- categories = [
- {'id': 0, 'name': 'cat'},
- {'id': 1, 'name': 'dog'},
- {'id': 2, 'name': 'bird'},
- ]
- csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
- category_util.save_categories_to_csv_file(categories, csv_path)
- saved_categories = category_util.load_categories_from_csv_file(csv_path)
- self.assertEqual(saved_categories, categories)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/config_util.py b/object_detection/utils/config_util.py
deleted file mode 100644
index 1bf30089..00000000
--- a/object_detection/utils/config_util.py
+++ /dev/null
@@ -1,452 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions for reading and updating configuration files."""
-
-import tensorflow as tf
-
-from google.protobuf import text_format
-
-from object_detection.protos import eval_pb2
-from object_detection.protos import input_reader_pb2
-from object_detection.protos import model_pb2
-from object_detection.protos import pipeline_pb2
-from object_detection.protos import train_pb2
-
-
-def get_configs_from_pipeline_file(pipeline_config_path):
- """Reads configuration from a pipeline_pb2.TrainEvalPipelineConfig.
-
- Args:
- pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
- proto.
-
- Returns:
- Dictionary of configuration objects. Keys are `model`, `train_config`,
- `train_input_config`, `eval_config`, `eval_input_config`. Value are the
- corresponding config objects.
- """
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- with tf.gfile.GFile(pipeline_config_path, "r") as f:
- proto_str = f.read()
- text_format.Merge(proto_str, pipeline_config)
-
- configs = {}
- configs["model"] = pipeline_config.model
- configs["train_config"] = pipeline_config.train_config
- configs["train_input_config"] = pipeline_config.train_input_reader
- configs["eval_config"] = pipeline_config.eval_config
- configs["eval_input_config"] = pipeline_config.eval_input_reader
-
- return configs
-
-
-def create_pipeline_proto_from_configs(configs):
- """Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary.
-
- This function nearly performs the inverse operation of
- get_configs_from_pipeline_file(). Instead of returning a file path, it returns
- a `TrainEvalPipelineConfig` object.
-
- Args:
- configs: Dictionary of configs. See get_configs_from_pipeline_file().
-
- Returns:
- A fully populated pipeline_pb2.TrainEvalPipelineConfig.
- """
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.model.CopyFrom(configs["model"])
- pipeline_config.train_config.CopyFrom(configs["train_config"])
- pipeline_config.train_input_reader.CopyFrom(configs["train_input_config"])
- pipeline_config.eval_config.CopyFrom(configs["eval_config"])
- pipeline_config.eval_input_reader.CopyFrom(configs["eval_input_config"])
- return pipeline_config
-
-
-def get_configs_from_multiple_files(model_config_path="",
- train_config_path="",
- train_input_config_path="",
- eval_config_path="",
- eval_input_config_path=""):
- """Reads training configuration from multiple config files.
-
- Args:
- model_config_path: Path to model_pb2.DetectionModel.
- train_config_path: Path to train_pb2.TrainConfig.
- train_input_config_path: Path to input_reader_pb2.InputReader.
- eval_config_path: Path to eval_pb2.EvalConfig.
- eval_input_config_path: Path to input_reader_pb2.InputReader.
-
- Returns:
- Dictionary of configuration objects. Keys are `model`, `train_config`,
- `train_input_config`, `eval_config`, `eval_input_config`. Key/Values are
- returned only for valid (non-empty) strings.
- """
- configs = {}
- if model_config_path:
- model_config = model_pb2.DetectionModel()
- with tf.gfile.GFile(model_config_path, "r") as f:
- text_format.Merge(f.read(), model_config)
- configs["model"] = model_config
-
- if train_config_path:
- train_config = train_pb2.TrainConfig()
- with tf.gfile.GFile(train_config_path, "r") as f:
- text_format.Merge(f.read(), train_config)
- configs["train_config"] = train_config
-
- if train_input_config_path:
- train_input_config = input_reader_pb2.InputReader()
- with tf.gfile.GFile(train_input_config_path, "r") as f:
- text_format.Merge(f.read(), train_input_config)
- configs["train_input_config"] = train_input_config
-
- if eval_config_path:
- eval_config = eval_pb2.EvalConfig()
- with tf.gfile.GFile(eval_config_path, "r") as f:
- text_format.Merge(f.read(), eval_config)
- configs["eval_config"] = eval_config
-
- if eval_input_config_path:
- eval_input_config = input_reader_pb2.InputReader()
- with tf.gfile.GFile(eval_input_config_path, "r") as f:
- text_format.Merge(f.read(), eval_input_config)
- configs["eval_input_config"] = eval_input_config
-
- return configs
-
-
-def get_number_of_classes(model_config):
- """Returns the number of classes for a detection model.
-
- Args:
- model_config: A model_pb2.DetectionModel.
-
- Returns:
- Number of classes.
-
- Raises:
- ValueError: If the model type is not recognized.
- """
- meta_architecture = model_config.WhichOneof("model")
- if meta_architecture == "faster_rcnn":
- return model_config.faster_rcnn.num_classes
- if meta_architecture == "ssd":
- return model_config.ssd.num_classes
-
- raise ValueError("Expected the model to be one of 'faster_rcnn' or 'ssd'.")
-
-
-def get_optimizer_type(train_config):
- """Returns the optimizer type for training.
-
- Args:
- train_config: A train_pb2.TrainConfig.
-
- Returns:
- The type of the optimizer
- """
- return train_config.optimizer.WhichOneof("optimizer")
-
-
-def get_learning_rate_type(optimizer_config):
- """Returns the learning rate type for training.
-
- Args:
- optimizer_config: An optimizer_pb2.Optimizer.
-
- Returns:
- The type of the learning rate.
- """
- return optimizer_config.learning_rate.WhichOneof("learning_rate")
-
-
-def merge_external_params_with_configs(configs, hparams=None, **kwargs):
- """Updates `configs` dictionary based on supplied parameters.
-
- This utility is for modifying specific fields in the object detection configs.
- Say that one would like to experiment with different learning rates, momentum
- values, or batch sizes. Rather than creating a new config text file for each
- experiment, one can use a single base config file, and update particular
- values.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- hparams: A `HParams`.
- **kwargs: Extra keyword arguments that are treated the same way as
- attribute/value pairs in `hparams`. Note that hyperparameters with the
- same names will override keyword arguments.
-
- Returns:
- `configs` dictionary.
- """
-
- if hparams:
- kwargs.update(hparams.values())
- for key, value in kwargs.items():
- if key == "learning_rate":
- _update_initial_learning_rate(configs, value)
- tf.logging.info("Overwriting learning rate: %f", value)
- if key == "batch_size":
- _update_batch_size(configs, value)
- tf.logging.info("Overwriting batch size: %d", value)
- if key == "momentum_optimizer_value":
- _update_momentum_optimizer_value(configs, value)
- tf.logging.info("Overwriting momentum optimizer value: %f", value)
- if key == "classification_localization_weight_ratio":
- # Localization weight is fixed to 1.0.
- _update_classification_localization_weight_ratio(configs, value)
- if key == "focal_loss_gamma":
- _update_focal_loss_gamma(configs, value)
- if key == "focal_loss_alpha":
- _update_focal_loss_alpha(configs, value)
- if key == "train_steps":
- _update_train_steps(configs, value)
- tf.logging.info("Overwriting train steps: %d", value)
- if key == "eval_steps":
- _update_eval_steps(configs, value)
- tf.logging.info("Overwriting eval steps: %d", value)
- if key == "train_input_path":
- _update_input_path(configs["train_input_config"], value)
- tf.logging.info("Overwriting train input path: %s", value)
- if key == "eval_input_path":
- _update_input_path(configs["eval_input_config"], value)
- tf.logging.info("Overwriting eval input path: %s", value)
- if key == "label_map_path":
- if value:
- _update_label_map_path(configs, value)
- tf.logging.info("Overwriting label map path: %s", value)
- return configs
-
-
-def _update_initial_learning_rate(configs, learning_rate):
- """Updates `configs` to reflect the new initial learning rate.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- learning_rate: Initial learning rate for optimizer.
-
- Raises:
- TypeError: if optimizer type is not supported, or if learning rate type is
- not supported.
- """
-
- optimizer_type = get_optimizer_type(configs["train_config"])
- if optimizer_type == "rms_prop_optimizer":
- optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
- elif optimizer_type == "momentum_optimizer":
- optimizer_config = configs["train_config"].optimizer.momentum_optimizer
- elif optimizer_type == "adam_optimizer":
- optimizer_config = configs["train_config"].optimizer.adam_optimizer
- else:
- raise TypeError("Optimizer %s is not supported." % optimizer_type)
-
- learning_rate_type = get_learning_rate_type(optimizer_config)
- if learning_rate_type == "constant_learning_rate":
- constant_lr = optimizer_config.learning_rate.constant_learning_rate
- constant_lr.learning_rate = learning_rate
- elif learning_rate_type == "exponential_decay_learning_rate":
- exponential_lr = (
- optimizer_config.learning_rate.exponential_decay_learning_rate)
- exponential_lr.initial_learning_rate = learning_rate
- elif learning_rate_type == "manual_step_learning_rate":
- manual_lr = optimizer_config.learning_rate.manual_step_learning_rate
- original_learning_rate = manual_lr.initial_learning_rate
- learning_rate_scaling = float(learning_rate) / original_learning_rate
- manual_lr.initial_learning_rate = learning_rate
- for schedule in manual_lr.schedule:
- schedule.learning_rate *= learning_rate_scaling
- else:
- raise TypeError("Learning rate %s is not supported." % learning_rate_type)
-
-
-def _update_batch_size(configs, batch_size):
- """Updates `configs` to reflect the new training batch size.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- batch_size: Batch size to use for training (Ideally a power of 2). Inputs
- are rounded, and capped to be 1 or greater.
- """
- configs["train_config"].batch_size = max(1, int(round(batch_size)))
-
-
-def _update_momentum_optimizer_value(configs, momentum):
- """Updates `configs` to reflect the new momentum value.
-
- Momentum is only supported for RMSPropOptimizer and MomentumOptimizer. For any
- other optimizer, no changes take place. The configs dictionary is updated in
- place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- momentum: New momentum value. Values are clipped at 0.0 and 1.0.
-
- Raises:
- TypeError: If the optimizer type is not `rms_prop_optimizer` or
- `momentum_optimizer`.
- """
- optimizer_type = get_optimizer_type(configs["train_config"])
- if optimizer_type == "rms_prop_optimizer":
- optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
- elif optimizer_type == "momentum_optimizer":
- optimizer_config = configs["train_config"].optimizer.momentum_optimizer
- else:
- raise TypeError("Optimizer type must be one of `rms_prop_optimizer` or "
- "`momentum_optimizer`.")
-
- optimizer_config.momentum_optimizer_value = min(max(0.0, momentum), 1.0)
-
-
-def _update_classification_localization_weight_ratio(configs, ratio):
- """Updates the classification/localization weight loss ratio.
-
- Detection models usually define a loss weight for both classification and
- objectness. This function updates the weights such that the ratio between
- classification weight to localization weight is the ratio provided.
- Arbitrarily, localization weight is set to 1.0.
-
- Note that in the case of Faster R-CNN, this same ratio is applied to the first
- stage objectness loss weight relative to localization loss weight.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- ratio: Desired ratio of classification (and/or objectness) loss weight to
- localization loss weight.
- """
- meta_architecture = configs["model"].WhichOneof("model")
- if meta_architecture == "faster_rcnn":
- model = configs["model"].faster_rcnn
- model.first_stage_localization_loss_weight = 1.0
- model.first_stage_objectness_loss_weight = ratio
- model.second_stage_localization_loss_weight = 1.0
- model.second_stage_classification_loss_weight = ratio
- if meta_architecture == "ssd":
- model = configs["model"].ssd
- model.loss.localization_weight = 1.0
- model.loss.classification_weight = ratio
-
-
-def _get_classification_loss(model_config):
- """Returns the classification loss for a model."""
- meta_architecture = model_config.WhichOneof("model")
- if meta_architecture == "faster_rcnn":
- model = model_config.faster_rcnn
- classification_loss = model.second_stage_classification_loss
- if meta_architecture == "ssd":
- model = model_config.ssd
- classification_loss = model.loss.classification_loss
- else:
- raise TypeError("Did not recognize the model architecture.")
- return classification_loss
-
-
-def _update_focal_loss_gamma(configs, gamma):
- """Updates the gamma value for a sigmoid focal loss.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- gamma: Exponent term in focal loss.
-
- Raises:
- TypeError: If the classification loss is not `weighted_sigmoid_focal`.
- """
- classification_loss = _get_classification_loss(configs["model"])
- classification_loss_type = classification_loss.WhichOneof(
- "classification_loss")
- if classification_loss_type != "weighted_sigmoid_focal":
- raise TypeError("Classification loss must be `weighted_sigmoid_focal`.")
- classification_loss.weighted_sigmoid_focal.gamma = gamma
-
-
-def _update_focal_loss_alpha(configs, alpha):
- """Updates the alpha value for a sigmoid focal loss.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- alpha: Class weight multiplier for sigmoid loss.
-
- Raises:
- TypeError: If the classification loss is not `weighted_sigmoid_focal`.
- """
- classification_loss = _get_classification_loss(configs["model"])
- classification_loss_type = classification_loss.WhichOneof(
- "classification_loss")
- if classification_loss_type != "weighted_sigmoid_focal":
- raise TypeError("Classification loss must be `weighted_sigmoid_focal`.")
- classification_loss.weighted_sigmoid_focal.alpha = alpha
-
-
-def _update_train_steps(configs, train_steps):
- """Updates `configs` to reflect new number of training steps."""
- configs["train_config"].num_steps = int(train_steps)
-
-
-def _update_eval_steps(configs, eval_steps):
- """Updates `configs` to reflect new number of eval steps per evaluation."""
- configs["eval_config"].num_examples = int(eval_steps)
-
-
-def _update_input_path(input_config, input_path):
- """Updates input configuration to reflect a new input path.
-
- The input_config object is updated in place, and hence not returned.
-
- Args:
- input_config: A input_reader_pb2.InputReader.
- input_path: A path to data or list of paths.
-
- Raises:
- TypeError: if input reader type is not `tf_record_input_reader`.
- """
- input_reader_type = input_config.WhichOneof("input_reader")
- if input_reader_type == "tf_record_input_reader":
- input_config.tf_record_input_reader.ClearField("input_path")
- if isinstance(input_path, list):
- input_config.tf_record_input_reader.input_path.extend(input_path)
- else:
- input_config.tf_record_input_reader.input_path.append(input_path)
- else:
- raise TypeError("Input reader type must be `tf_record_input_reader`.")
-
-
-def _update_label_map_path(configs, label_map_path):
- """Updates the label map path for both train and eval input readers.
-
- The configs dictionary is updated in place, and hence not returned.
-
- Args:
- configs: Dictionary of configuration objects. See outputs from
- get_configs_from_pipeline_file() or get_configs_from_multiple_files().
- label_map_path: New path to `StringIntLabelMap` pbtxt file.
- """
- configs["train_input_config"].label_map_path = label_map_path
- configs["eval_input_config"].label_map_path = label_map_path
diff --git a/object_detection/utils/config_util_test.py b/object_detection/utils/config_util_test.py
deleted file mode 100644
index 075509e8..00000000
--- a/object_detection/utils/config_util_test.py
+++ /dev/null
@@ -1,401 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for object_detection.utils.config_util."""
-
-import os
-
-import google3
-import tensorflow.google as tf
-
-from google.protobuf import text_format
-
-from object_detection.protos import eval_pb2
-from object_detection.protos import input_reader_pb2
-from object_detection.protos import model_pb2
-from object_detection.protos import pipeline_pb2
-from object_detection.protos import train_pb2
-from object_detection.utils import config_util
-
-
-def _write_config(config, config_path):
- """Writes a config object to disk."""
- config_text = text_format.MessageToString(config)
- with tf.gfile.Open(config_path, "wb") as f:
- f.write(config_text)
-
-
-def _update_optimizer_with_constant_learning_rate(optimizer, learning_rate):
- """Adds a new constant learning rate."""
- constant_lr = optimizer.learning_rate.constant_learning_rate
- constant_lr.learning_rate = learning_rate
-
-
-def _update_optimizer_with_exponential_decay_learning_rate(
- optimizer, learning_rate):
- """Adds a new exponential decay learning rate."""
- exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
- exponential_lr.initial_learning_rate = learning_rate
-
-
-def _update_optimizer_with_manual_step_learning_rate(
- optimizer, initial_learning_rate, learning_rate_scaling):
- """Adds a learning rate schedule."""
- manual_lr = optimizer.learning_rate.manual_step_learning_rate
- manual_lr.initial_learning_rate = initial_learning_rate
- for i in range(3):
- schedule = manual_lr.schedule.add()
- schedule.learning_rate = initial_learning_rate * learning_rate_scaling**i
-
-
-class ConfigUtilTest(tf.test.TestCase):
-
- def test_get_configs_from_pipeline_file(self):
- """Test that proto configs can be read from pipeline config file."""
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.model.faster_rcnn.num_classes = 10
- pipeline_config.train_config.batch_size = 32
- pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
- pipeline_config.eval_config.num_examples = 20
- pipeline_config.eval_input_reader.queue_capacity = 100
-
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- self.assertProtoEquals(pipeline_config.model, configs["model"])
- self.assertProtoEquals(pipeline_config.train_config,
- configs["train_config"])
- self.assertProtoEquals(pipeline_config.train_input_reader,
- configs["train_input_config"])
- self.assertProtoEquals(pipeline_config.eval_config,
- configs["eval_config"])
- self.assertProtoEquals(pipeline_config.eval_input_reader,
- configs["eval_input_config"])
-
- def test_create_pipeline_proto_from_configs(self):
- """Tests that proto can be reconstructed from configs dictionary."""
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.model.faster_rcnn.num_classes = 10
- pipeline_config.train_config.batch_size = 32
- pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
- pipeline_config.eval_config.num_examples = 20
- pipeline_config.eval_input_reader.queue_capacity = 100
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- pipeline_config_reconstructed = (
- config_util.create_pipeline_proto_from_configs(configs))
- self.assertEqual(pipeline_config, pipeline_config_reconstructed)
-
- def test_get_configs_from_multiple_files(self):
- """Tests that proto configs can be read from multiple files."""
- temp_dir = self.get_temp_dir()
-
- # Write model config file.
- model_config_path = os.path.join(temp_dir, "model.config")
- model = model_pb2.DetectionModel()
- model.faster_rcnn.num_classes = 10
- _write_config(model, model_config_path)
-
- # Write train config file.
- train_config_path = os.path.join(temp_dir, "train.config")
- train_config = train_config = train_pb2.TrainConfig()
- train_config.batch_size = 32
- _write_config(train_config, train_config_path)
-
- # Write train input config file.
- train_input_config_path = os.path.join(temp_dir, "train_input.config")
- train_input_config = input_reader_pb2.InputReader()
- train_input_config.label_map_path = "path/to/label_map"
- _write_config(train_input_config, train_input_config_path)
-
- # Write eval config file.
- eval_config_path = os.path.join(temp_dir, "eval.config")
- eval_config = eval_pb2.EvalConfig()
- eval_config.num_examples = 20
- _write_config(eval_config, eval_config_path)
-
- # Write eval input config file.
- eval_input_config_path = os.path.join(temp_dir, "eval_input.config")
- eval_input_config = input_reader_pb2.InputReader()
- eval_input_config.label_map_path = "path/to/another/label_map"
- _write_config(eval_input_config, eval_input_config_path)
-
- configs = config_util.get_configs_from_multiple_files(
- model_config_path=model_config_path,
- train_config_path=train_config_path,
- train_input_config_path=train_input_config_path,
- eval_config_path=eval_config_path,
- eval_input_config_path=eval_input_config_path)
- self.assertProtoEquals(model, configs["model"])
- self.assertProtoEquals(train_config, configs["train_config"])
- self.assertProtoEquals(train_input_config,
- configs["train_input_config"])
- self.assertProtoEquals(eval_config, configs["eval_config"])
- self.assertProtoEquals(eval_input_config,
- configs["eval_input_config"])
-
- def _assertOptimizerWithNewLearningRate(self, optimizer_name):
- """Asserts successful updating of all learning rate schemes."""
- original_learning_rate = 0.7
- learning_rate_scaling = 0.1
- hparams = tf.HParams(learning_rate=0.15)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- # Constant learning rate.
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
- _update_optimizer_with_constant_learning_rate(optimizer,
- original_learning_rate)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
- constant_lr = optimizer.learning_rate.constant_learning_rate
- self.assertAlmostEqual(hparams.learning_rate, constant_lr.learning_rate)
-
- # Exponential decay learning rate.
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
- _update_optimizer_with_exponential_decay_learning_rate(
- optimizer, original_learning_rate)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
- exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
- self.assertAlmostEqual(hparams.learning_rate,
- exponential_lr.initial_learning_rate)
-
- # Manual step learning rate.
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
- _update_optimizer_with_manual_step_learning_rate(
- optimizer, original_learning_rate, learning_rate_scaling)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
- manual_lr = optimizer.learning_rate.manual_step_learning_rate
- self.assertAlmostEqual(hparams.learning_rate,
- manual_lr.initial_learning_rate)
- for i, schedule in enumerate(manual_lr.schedule):
- self.assertAlmostEqual(hparams.learning_rate * learning_rate_scaling**i,
- schedule.learning_rate)
-
- def testRMSPropWithNewLearingRate(self):
- """Tests new learning rates for RMSProp Optimizer."""
- self._assertOptimizerWithNewLearningRate("rms_prop_optimizer")
-
- def testMomentumOptimizerWithNewLearningRate(self):
- """Tests new learning rates for Momentum Optimizer."""
- self._assertOptimizerWithNewLearningRate("momentum_optimizer")
-
- def testAdamOptimizerWithNewLearningRate(self):
- """Tests new learning rates for Adam Optimizer."""
- self._assertOptimizerWithNewLearningRate("adam_optimizer")
-
- def testNewBatchSize(self):
- """Tests that batch size is updated appropriately."""
- original_batch_size = 2
- hparams = tf.HParams(batch_size=16)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.train_config.batch_size = original_batch_size
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- new_batch_size = configs["train_config"].batch_size
- self.assertEqual(16, new_batch_size)
-
- def testNewBatchSizeWithClipping(self):
- """Tests that batch size is clipped to 1 from below."""
- original_batch_size = 2
- hparams = tf.HParams(batch_size=0.5)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.train_config.batch_size = original_batch_size
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- new_batch_size = configs["train_config"].batch_size
- self.assertEqual(1, new_batch_size) # Clipped to 1.0.
-
- def testNewMomentumOptimizerValue(self):
- """Tests that new momentum value is updated appropriately."""
- original_momentum_value = 0.4
- hparams = tf.HParams(momentum_optimizer_value=1.1)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- optimizer_config = pipeline_config.train_config.optimizer.rms_prop_optimizer
- optimizer_config.momentum_optimizer_value = original_momentum_value
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
- new_momentum_value = optimizer_config.momentum_optimizer_value
- self.assertAlmostEqual(1.0, new_momentum_value) # Clipped to 1.0.
-
- def testNewClassificationLocalizationWeightRatio(self):
- """Tests that the loss weight ratio is updated appropriately."""
- original_localization_weight = 0.1
- original_classification_weight = 0.2
- new_weight_ratio = 5.0
- hparams = tf.HParams(
- classification_localization_weight_ratio=new_weight_ratio)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.model.ssd.loss.localization_weight = (
- original_localization_weight)
- pipeline_config.model.ssd.loss.classification_weight = (
- original_classification_weight)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- loss = configs["model"].ssd.loss
- self.assertAlmostEqual(1.0, loss.localization_weight)
- self.assertAlmostEqual(new_weight_ratio, loss.classification_weight)
-
- def testNewFocalLossParameters(self):
- """Tests that the loss weight ratio is updated appropriately."""
- original_alpha = 1.0
- original_gamma = 1.0
- new_alpha = 0.3
- new_gamma = 2.0
- hparams = tf.HParams(focal_loss_alpha=new_alpha, focal_loss_gamma=new_gamma)
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- classification_loss = pipeline_config.model.ssd.loss.classification_loss
- classification_loss.weighted_sigmoid_focal.alpha = original_alpha
- classification_loss.weighted_sigmoid_focal.gamma = original_gamma
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(configs, hparams)
- classification_loss = configs["model"].ssd.loss.classification_loss
- self.assertAlmostEqual(new_alpha,
- classification_loss.weighted_sigmoid_focal.alpha)
- self.assertAlmostEqual(new_gamma,
- classification_loss.weighted_sigmoid_focal.gamma)
-
- def testMergingKeywordArguments(self):
- """Tests that keyword arguments get merged as do hyperparameters."""
- original_num_train_steps = 100
- original_num_eval_steps = 5
- desired_num_train_steps = 10
- desired_num_eval_steps = 1
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.train_config.num_steps = original_num_train_steps
- pipeline_config.eval_config.num_examples = original_num_eval_steps
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(
- configs,
- train_steps=desired_num_train_steps,
- eval_steps=desired_num_eval_steps)
- train_steps = configs["train_config"].num_steps
- eval_steps = configs["eval_config"].num_examples
- self.assertEqual(desired_num_train_steps, train_steps)
- self.assertEqual(desired_num_eval_steps, eval_steps)
-
- def testGetNumberOfClasses(self):
- """Tests that number of classes can be retrieved."""
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- pipeline_config.model.faster_rcnn.num_classes = 20
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- number_of_classes = config_util.get_number_of_classes(configs["model"])
- self.assertEqual(20, number_of_classes)
-
- def testNewTrainInputPath(self):
- """Tests that train input path can be overwritten with single file."""
- original_train_path = ["path/to/data"]
- new_train_path = "another/path/to/data"
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- reader_config = pipeline_config.train_input_reader.tf_record_input_reader
- reader_config.input_path.extend(original_train_path)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(
- configs, train_input_path=new_train_path)
- reader_config = configs["train_input_config"].tf_record_input_reader
- final_path = reader_config.input_path
- self.assertEqual([new_train_path], final_path)
-
- def testNewTrainInputPathList(self):
- """Tests that train input path can be overwritten with multiple files."""
- original_train_path = ["path/to/data"]
- new_train_path = ["another/path/to/data", "yet/another/path/to/data"]
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- reader_config = pipeline_config.train_input_reader.tf_record_input_reader
- reader_config.input_path.extend(original_train_path)
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(
- configs, train_input_path=new_train_path)
- reader_config = configs["train_input_config"].tf_record_input_reader
- final_path = reader_config.input_path
- self.assertEqual(new_train_path, final_path)
-
- def testNewLabelMapPath(self):
- """Tests that label map path can be overwritten in input readers."""
- original_label_map_path = "path/to/original/label_map"
- new_label_map_path = "path//to/new/label_map"
- pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- train_input_reader = pipeline_config.train_input_reader
- train_input_reader.label_map_path = original_label_map_path
- eval_input_reader = pipeline_config.eval_input_reader
- eval_input_reader.label_map_path = original_label_map_path
- _write_config(pipeline_config, pipeline_config_path)
-
- configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
- configs = config_util.merge_external_params_with_configs(
- configs, label_map_path=new_label_map_path)
- self.assertEqual(new_label_map_path,
- configs["train_input_config"].label_map_path)
- self.assertEqual(new_label_map_path,
- configs["eval_input_config"].label_map_path)
-
-
-if __name__ == "__main__":
- tf.test.main()
diff --git a/object_detection/utils/dataset_util.py b/object_detection/utils/dataset_util.py
deleted file mode 100644
index 014a9118..00000000
--- a/object_detection/utils/dataset_util.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Utility functions for creating TFRecord data sets."""
-
-import tensorflow as tf
-
-
-def int64_feature(value):
- return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
-
-
-def int64_list_feature(value):
- return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
-
-def bytes_feature(value):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
-
-def bytes_list_feature(value):
- return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
-
-
-def float_list_feature(value):
- return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
-
-def read_examples_list(path):
- """Read list of training or validation examples.
-
- The file is assumed to contain a single example per line where the first
- token in the line is an identifier that allows us to find the image and
- annotation xml for that example.
-
- For example, the line:
- xyz 3
- would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
-
- Args:
- path: absolute path to examples list file.
-
- Returns:
- list of example identifiers (strings).
- """
- with tf.gfile.GFile(path) as fid:
- lines = fid.readlines()
- return [line.strip().split(' ')[0] for line in lines]
-
-
-def recursive_parse_xml_to_dict(xml):
- """Recursively parses XML contents to python dict.
-
- We assume that `object` tags are the only ones that can appear
- multiple times at the same level of a tree.
-
- Args:
- xml: xml tree obtained by parsing XML file contents using lxml.etree
-
- Returns:
- Python dictionary holding XML contents.
- """
- if not xml:
- return {xml.tag: xml.text}
- result = {}
- for child in xml:
- child_result = recursive_parse_xml_to_dict(child)
- if child.tag != 'object':
- result[child.tag] = child_result[child.tag]
- else:
- if child.tag not in result:
- result[child.tag] = []
- result[child.tag].append(child_result[child.tag])
- return {xml.tag: result}
diff --git a/object_detection/utils/dataset_util_test.py b/object_detection/utils/dataset_util_test.py
deleted file mode 100644
index 99cfb2cd..00000000
--- a/object_detection/utils/dataset_util_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.dataset_util."""
-
-import os
-import tensorflow as tf
-
-from object_detection.utils import dataset_util
-
-
-class DatasetUtilTest(tf.test.TestCase):
-
- def test_read_examples_list(self):
- example_list_data = """example1 1\nexample2 2"""
- example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt')
- with tf.gfile.Open(example_list_path, 'wb') as f:
- f.write(example_list_data)
-
- examples = dataset_util.read_examples_list(example_list_path)
- self.assertListEqual(['example1', 'example2'], examples)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/label_map_util.py b/object_detection/utils/label_map_util.py
deleted file mode 100644
index bf7bae63..00000000
--- a/object_detection/utils/label_map_util.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Label map utility functions."""
-
-import logging
-
-import tensorflow as tf
-from google.protobuf import text_format
-from object_detection.protos import string_int_label_map_pb2
-
-
-def _validate_label_map(label_map):
- """Checks if a label map is valid.
-
- Args:
- label_map: StringIntLabelMap to validate.
-
- Raises:
- ValueError: if label map is invalid.
- """
- for item in label_map.item:
- if item.id < 1:
- raise ValueError('Label map ids should be >= 1.')
-
-
-def create_category_index(categories):
- """Creates dictionary of COCO compatible categories keyed by category id.
-
- Args:
- categories: a list of dicts, each of which has the following keys:
- 'id': (required) an integer id uniquely identifying this category.
- 'name': (required) string representing category name
- e.g., 'cat', 'dog', 'pizza'.
-
- Returns:
- category_index: a dict containing the same entries as categories, but keyed
- by the 'id' field of each category.
- """
- category_index = {}
- for cat in categories:
- category_index[cat['id']] = cat
- return category_index
-
-
-def convert_label_map_to_categories(label_map,
- max_num_classes,
- use_display_name=True):
- """Loads label map proto and returns categories list compatible with eval.
-
- This function loads a label map and returns a list of dicts, each of which
- has the following keys:
- 'id': (required) an integer id uniquely identifying this category.
- 'name': (required) string representing category name
- e.g., 'cat', 'dog', 'pizza'.
- We only allow class into the list if its id-label_id_offset is
- between 0 (inclusive) and max_num_classes (exclusive).
- If there are several items mapping to the same id in the label map,
- we will only keep the first one in the categories list.
-
- Args:
- label_map: a StringIntLabelMapProto or None. If None, a default categories
- list is created with max_num_classes categories.
- max_num_classes: maximum number of (consecutive) label indices to include.
- use_display_name: (boolean) choose whether to load 'display_name' field
- as category name. If False or if the display_name field does not exist,
- uses 'name' field as category names instead.
- Returns:
- categories: a list of dictionaries representing all possible categories.
- """
- categories = []
- list_of_ids_already_added = []
- if not label_map:
- label_id_offset = 1
- for class_id in range(max_num_classes):
- categories.append({
- 'id': class_id + label_id_offset,
- 'name': 'category_{}'.format(class_id + label_id_offset)
- })
- return categories
- for item in label_map.item:
- if not 0 < item.id <= max_num_classes:
- logging.info('Ignore item %d since it falls outside of requested '
- 'label range.', item.id)
- continue
- if use_display_name and item.HasField('display_name'):
- name = item.display_name
- else:
- name = item.name
- if item.id not in list_of_ids_already_added:
- list_of_ids_already_added.append(item.id)
- categories.append({'id': item.id, 'name': name})
- return categories
-
-
-def load_labelmap(path):
- """Loads label map proto.
-
- Args:
- path: path to StringIntLabelMap proto text file.
- Returns:
- a StringIntLabelMapProto
- """
- with tf.gfile.GFile(path, 'r') as fid:
- label_map_string = fid.read()
- label_map = string_int_label_map_pb2.StringIntLabelMap()
- try:
- text_format.Merge(label_map_string, label_map)
- except text_format.ParseError:
- label_map.ParseFromString(label_map_string)
- _validate_label_map(label_map)
- return label_map
-
-
-def get_label_map_dict(label_map_path, use_display_name=False):
- """Reads a label map and returns a dictionary of label names to id.
-
- Args:
- label_map_path: path to label_map.
- use_display_name: whether to use the label map items' display names as keys.
-
- Returns:
- A dictionary mapping label names to id.
- """
- label_map = load_labelmap(label_map_path)
- label_map_dict = {}
- for item in label_map.item:
- if use_display_name:
- label_map_dict[item.display_name] = item.id
- else:
- label_map_dict[item.name] = item.id
- return label_map_dict
-
-
-def create_category_index_from_labelmap(label_map_path):
- """Reads a label map and returns a category index.
-
- Args:
- label_map_path: Path to `StringIntLabelMap` proto text file.
-
- Returns:
- A category index, which is a dictionary that maps integer ids to dicts
- containing categories, e.g.
- {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
- """
- label_map = load_labelmap(label_map_path)
- max_num_classes = max(item.id for item in label_map.item)
- categories = convert_label_map_to_categories(label_map, max_num_classes)
- return create_category_index(categories)
-
-
-def create_class_agnostic_category_index():
- """Creates a category index with a single `object` class."""
- return {1: {'id': 1, 'name': 'object'}}
diff --git a/object_detection/utils/label_map_util_test.py b/object_detection/utils/label_map_util_test.py
deleted file mode 100644
index 8671754c..00000000
--- a/object_detection/utils/label_map_util_test.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.label_map_util."""
-
-import os
-import tensorflow as tf
-
-from google.protobuf import text_format
-from object_detection.protos import string_int_label_map_pb2
-from object_detection.utils import label_map_util
-
-
-class LabelMapUtilTest(tf.test.TestCase):
-
- def _generate_label_map(self, num_classes):
- label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
- for i in range(1, num_classes + 1):
- item = label_map_proto.item.add()
- item.id = i
- item.name = 'label_' + str(i)
- item.display_name = str(i)
- return label_map_proto
-
- def test_get_label_map_dict(self):
- label_map_string = """
- item {
- id:2
- name:'cat'
- }
- item {
- id:1
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
-
- label_map_dict = label_map_util.get_label_map_dict(label_map_path)
- self.assertEqual(label_map_dict['dog'], 1)
- self.assertEqual(label_map_dict['cat'], 2)
-
- def test_get_label_map_dict_display(self):
- label_map_string = """
- item {
- id:2
- display_name:'cat'
- }
- item {
- id:1
- display_name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
-
- label_map_dict = label_map_util.get_label_map_dict(
- label_map_path, use_display_name=True)
- self.assertEqual(label_map_dict['dog'], 1)
- self.assertEqual(label_map_dict['cat'], 2)
-
- def test_load_bad_label_map(self):
- label_map_string = """
- item {
- id:0
- name:'class that should not be indexed at zero'
- }
- item {
- id:2
- name:'cat'
- }
- item {
- id:1
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
-
- with self.assertRaises(ValueError):
- label_map_util.load_labelmap(label_map_path)
-
- def test_keep_categories_with_unique_id(self):
- label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
- label_map_string = """
- item {
- id:2
- name:'cat'
- }
- item {
- id:1
- name:'child'
- }
- item {
- id:1
- name:'person'
- }
- item {
- id:1
- name:'n00007846'
- }
- """
- text_format.Merge(label_map_string, label_map_proto)
- categories = label_map_util.convert_label_map_to_categories(
- label_map_proto, max_num_classes=3)
- self.assertListEqual([{
- 'id': 2,
- 'name': u'cat'
- }, {
- 'id': 1,
- 'name': u'child'
- }], categories)
-
- def test_convert_label_map_to_categories_no_label_map(self):
- categories = label_map_util.convert_label_map_to_categories(
- None, max_num_classes=3)
- expected_categories_list = [{
- 'name': u'category_1',
- 'id': 1
- }, {
- 'name': u'category_2',
- 'id': 2
- }, {
- 'name': u'category_3',
- 'id': 3
- }]
- self.assertListEqual(expected_categories_list, categories)
-
- def test_convert_label_map_to_coco_categories(self):
- label_map_proto = self._generate_label_map(num_classes=4)
- categories = label_map_util.convert_label_map_to_categories(
- label_map_proto, max_num_classes=3)
- expected_categories_list = [{
- 'name': u'1',
- 'id': 1
- }, {
- 'name': u'2',
- 'id': 2
- }, {
- 'name': u'3',
- 'id': 3
- }]
- self.assertListEqual(expected_categories_list, categories)
-
- def test_convert_label_map_to_coco_categories_with_few_classes(self):
- label_map_proto = self._generate_label_map(num_classes=4)
- cat_no_offset = label_map_util.convert_label_map_to_categories(
- label_map_proto, max_num_classes=2)
- expected_categories_list = [{
- 'name': u'1',
- 'id': 1
- }, {
- 'name': u'2',
- 'id': 2
- }]
- self.assertListEqual(expected_categories_list, cat_no_offset)
-
- def test_create_category_index(self):
- categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
- category_index = label_map_util.create_category_index(categories)
- self.assertDictEqual({
- 1: {
- 'name': u'1',
- 'id': 1
- },
- 2: {
- 'name': u'2',
- 'id': 2
- }
- }, category_index)
-
- def test_create_category_index_from_labelmap(self):
- label_map_string = """
- item {
- id:2
- name:'cat'
- }
- item {
- id:1
- name:'dog'
- }
- """
- label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
- with tf.gfile.Open(label_map_path, 'wb') as f:
- f.write(label_map_string)
-
- category_index = label_map_util.create_category_index_from_labelmap(
- label_map_path)
- self.assertDictEqual({
- 1: {
- 'name': u'dog',
- 'id': 1
- },
- 2: {
- 'name': u'cat',
- 'id': 2
- }
- }, category_index)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/learning_schedules.py b/object_detection/utils/learning_schedules.py
deleted file mode 100644
index 14583527..00000000
--- a/object_detection/utils/learning_schedules.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Library of common learning rate schedules."""
-
-import numpy as np
-import tensorflow as tf
-
-
-def exponential_decay_with_burnin(global_step,
- learning_rate_base,
- learning_rate_decay_steps,
- learning_rate_decay_factor,
- burnin_learning_rate=0.0,
- burnin_steps=0):
- """Exponential decay schedule with burn-in period.
-
- In this schedule, learning rate is fixed at burnin_learning_rate
- for a fixed period, before transitioning to a regular exponential
- decay schedule.
-
- Args:
- global_step: int tensor representing global step.
- learning_rate_base: base learning rate.
- learning_rate_decay_steps: steps to take between decaying the learning rate.
- Note that this includes the number of burn-in steps.
- learning_rate_decay_factor: multiplicative factor by which to decay
- learning rate.
- burnin_learning_rate: initial learning rate during burn-in period. If
- 0.0 (which is the default), then the burn-in learning rate is simply
- set to learning_rate_base.
- burnin_steps: number of steps to use burnin learning rate.
-
- Returns:
- a (scalar) float tensor representing learning rate
- """
- if burnin_learning_rate == 0:
- burnin_learning_rate = learning_rate_base
- post_burnin_learning_rate = tf.train.exponential_decay(
- learning_rate_base,
- global_step,
- learning_rate_decay_steps,
- learning_rate_decay_factor,
- staircase=True)
- return tf.cond(
- tf.less(global_step, burnin_steps),
- lambda: tf.convert_to_tensor(burnin_learning_rate),
- lambda: post_burnin_learning_rate)
-
-
-def cosine_decay_with_warmup(global_step,
- learning_rate_base,
- total_steps,
- warmup_learning_rate=0.0,
- warmup_steps=0):
- """Cosine decay schedule with warm up period.
-
- Cosine annealing learning rate as described in:
- Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts.
- ICLR 2017. https://arxiv.org/abs/1608.03983
- In this schedule, the learning rate grows linearly from warmup_learning_rate
- to learning_rate_base for warmup_steps, then transitions to a cosine decay
- schedule.
-
- Args:
- global_step: int64 (scalar) tensor representing global step.
- learning_rate_base: base learning rate.
- total_steps: total number of training steps.
- warmup_learning_rate: initial learning rate for warm up.
- warmup_steps: number of warmup steps.
-
- Returns:
- a (scalar) float tensor representing learning rate.
-
- Raises:
- ValueError: if warmup_learning_rate is larger than learning_rate_base,
- or if warmup_steps is larger than total_steps.
- """
- if learning_rate_base < warmup_learning_rate:
- raise ValueError('learning_rate_base must be larger '
- 'or equal to warmup_learning_rate.')
- if total_steps < warmup_steps:
- raise ValueError('total_steps must be larger or equal to '
- 'warmup_steps.')
- learning_rate = 0.5 * learning_rate_base * (
- 1 + tf.cos(np.pi * tf.cast(
- global_step - warmup_steps, tf.float32
- ) / float(total_steps - warmup_steps)))
- if warmup_steps > 0:
- slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
- pre_cosine_learning_rate = slope * tf.cast(
- global_step, tf.float32) + warmup_learning_rate
- learning_rate = tf.cond(
- tf.less(global_step, warmup_steps), lambda: pre_cosine_learning_rate,
- lambda: learning_rate)
- return learning_rate
-
-
-def manual_stepping(global_step, boundaries, rates):
- """Manually stepped learning rate schedule.
-
- This function provides fine grained control over learning rates. One must
- specify a sequence of learning rates as well as a set of integer steps
- at which the current learning rate must transition to the next. For example,
- if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning
- rate returned by this function is .1 for global_step=0,...,4, .01 for
- global_step=5...9, and .001 for global_step=10 and onward.
-
- Args:
- global_step: int64 (scalar) tensor representing global step.
- boundaries: a list of global steps at which to switch learning
- rates. This list is assumed to consist of increasing positive integers.
- rates: a list of (float) learning rates corresponding to intervals between
- the boundaries. The length of this list must be exactly
- len(boundaries) + 1.
-
- Returns:
- a (scalar) float tensor representing learning rate
- Raises:
- ValueError: if one of the following checks fails:
- 1. boundaries is a strictly increasing list of positive integers
- 2. len(rates) == len(boundaries) + 1
- """
- if any([b < 0 for b in boundaries]) or any(
- [not isinstance(b, int) for b in boundaries]):
- raise ValueError('boundaries must be a list of positive integers')
- if any([bnext <= b for bnext, b in zip(boundaries[1:], boundaries[:-1])]):
- raise ValueError('Entries in boundaries must be strictly increasing.')
- if any([not isinstance(r, float) for r in rates]):
- raise ValueError('Learning rates must be floats')
- if len(rates) != len(boundaries) + 1:
- raise ValueError('Number of provided learning rates must exceed '
- 'number of boundary points by exactly 1.')
- step_boundaries = tf.constant(boundaries, tf.int64)
- learning_rates = tf.constant(rates, tf.float32)
- unreached_boundaries = tf.reshape(
- tf.where(tf.greater(step_boundaries, global_step)), [-1])
- unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0)
- index = tf.reshape(tf.reduce_min(unreached_boundaries), [1])
- return tf.reshape(tf.slice(learning_rates, index, [1]), [])
diff --git a/object_detection/utils/learning_schedules_test.py b/object_detection/utils/learning_schedules_test.py
deleted file mode 100644
index 8fdc8915..00000000
--- a/object_detection/utils/learning_schedules_test.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.learning_schedules."""
-import tensorflow as tf
-
-from object_detection.utils import learning_schedules
-
-
-class LearningSchedulesTest(tf.test.TestCase):
-
- def testExponentialDecayWithBurnin(self):
- global_step = tf.placeholder(tf.int32, [])
- learning_rate_base = 1.0
- learning_rate_decay_steps = 3
- learning_rate_decay_factor = .1
- burnin_learning_rate = .5
- burnin_steps = 2
- exp_rates = [.5, .5, 1, .1, .1, .1, .01, .01]
- learning_rate = learning_schedules.exponential_decay_with_burnin(
- global_step, learning_rate_base, learning_rate_decay_steps,
- learning_rate_decay_factor, burnin_learning_rate, burnin_steps)
- with self.test_session() as sess:
- output_rates = []
- for input_global_step in range(8):
- output_rate = sess.run(learning_rate,
- feed_dict={global_step: input_global_step})
- output_rates.append(output_rate)
- self.assertAllClose(output_rates, exp_rates)
-
- def testCosineDecayWithWarmup(self):
- global_step = tf.placeholder(tf.int32, [])
- learning_rate_base = 1.0
- total_steps = 100
- warmup_learning_rate = 0.1
- warmup_steps = 9
- input_global_steps = [0, 4, 8, 9, 100]
- exp_rates = [0.1, 0.5, 0.9, 1.0, 0]
- learning_rate = learning_schedules.cosine_decay_with_warmup(
- global_step, learning_rate_base, total_steps,
- warmup_learning_rate, warmup_steps)
- with self.test_session() as sess:
- output_rates = []
- for input_global_step in input_global_steps:
- output_rate = sess.run(learning_rate,
- feed_dict={global_step: input_global_step})
- output_rates.append(output_rate)
- self.assertAllClose(output_rates, exp_rates)
-
- def testManualStepping(self):
- global_step = tf.placeholder(tf.int64, [])
- boundaries = [2, 3, 7]
- rates = [1.0, 2.0, 3.0, 4.0]
- exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
- learning_rate = learning_schedules.manual_stepping(global_step, boundaries,
- rates)
- with self.test_session() as sess:
- output_rates = []
- for input_global_step in range(10):
- output_rate = sess.run(learning_rate,
- feed_dict={global_step: input_global_step})
- output_rates.append(output_rate)
- self.assertAllClose(output_rates, exp_rates)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/metrics.py b/object_detection/utils/metrics.py
deleted file mode 100644
index 719f1549..00000000
--- a/object_detection/utils/metrics.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions for computing metrics like precision, recall, CorLoc and etc."""
-from __future__ import division
-
-import numpy as np
-
-
-def compute_precision_recall(scores, labels, num_gt):
- """Compute precision and recall.
-
- Args:
- scores: A float numpy array representing detection score
- labels: A boolean numpy array representing true/false positive labels
- num_gt: Number of ground truth instances
-
- Raises:
- ValueError: if the input is not of the correct format
-
- Returns:
- precision: Fraction of positive instances over detected ones. This value is
- None if no ground truth labels are present.
- recall: Fraction of detected positive instance over all positive instances.
- This value is None if no ground truth labels are present.
-
- """
- if not isinstance(
- labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
- raise ValueError("labels must be single dimension bool numpy array")
-
- if not isinstance(
- scores, np.ndarray) or len(scores.shape) != 1:
- raise ValueError("scores must be single dimension numpy array")
-
- if num_gt < np.sum(labels):
- raise ValueError("Number of true positives must be smaller than num_gt.")
-
- if len(scores) != len(labels):
- raise ValueError("scores and labels must be of the same size.")
-
- if num_gt == 0:
- return None, None
-
- sorted_indices = np.argsort(scores)
- sorted_indices = sorted_indices[::-1]
- labels = labels.astype(int)
- true_positive_labels = labels[sorted_indices]
- false_positive_labels = 1 - true_positive_labels
- cum_true_positives = np.cumsum(true_positive_labels)
- cum_false_positives = np.cumsum(false_positive_labels)
- precision = cum_true_positives.astype(float) / (
- cum_true_positives + cum_false_positives)
- recall = cum_true_positives.astype(float) / num_gt
- return precision, recall
-
-
-def compute_average_precision(precision, recall):
- """Compute Average Precision according to the definition in VOCdevkit.
-
- Precision is modified to ensure that it does not decrease as recall
- decrease.
-
- Args:
- precision: A float [N, 1] numpy array of precisions
- recall: A float [N, 1] numpy array of recalls
-
- Raises:
- ValueError: if the input is not of the correct format
-
- Returns:
- average_precison: The area under the precision recall curve. NaN if
- precision and recall are None.
-
- """
- if precision is None:
- if recall is not None:
- raise ValueError("If precision is None, recall must also be None")
- return np.NAN
-
- if not isinstance(precision, np.ndarray) or not isinstance(recall,
- np.ndarray):
- raise ValueError("precision and recall must be numpy array")
- if precision.dtype != np.float or recall.dtype != np.float:
- raise ValueError("input must be float numpy array.")
- if len(precision) != len(recall):
- raise ValueError("precision and recall must be of the same size.")
- if not precision.size:
- return 0.0
- if np.amin(precision) < 0 or np.amax(precision) > 1:
- raise ValueError("Precision must be in the range of [0, 1].")
- if np.amin(recall) < 0 or np.amax(recall) > 1:
- raise ValueError("recall must be in the range of [0, 1].")
- if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
- raise ValueError("recall must be a non-decreasing array")
-
- recall = np.concatenate([[0], recall, [1]])
- precision = np.concatenate([[0], precision, [0]])
-
- # Preprocess precision to be a non-decreasing array
- for i in range(len(precision) - 2, -1, -1):
- precision[i] = np.maximum(precision[i], precision[i + 1])
-
- indices = np.where(recall[1:] != recall[:-1])[0] + 1
- average_precision = np.sum(
- (recall[indices] - recall[indices - 1]) * precision[indices])
- return average_precision
-
-
-def compute_cor_loc(num_gt_imgs_per_class,
- num_images_correctly_detected_per_class):
- """Compute CorLoc according to the definition in the following paper.
-
- https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
-
- Returns nans if there are no ground truth images for a class.
-
- Args:
- num_gt_imgs_per_class: 1D array, representing number of images containing
- at least one object instance of a particular class
- num_images_correctly_detected_per_class: 1D array, representing number of
- images that are correctly detected at least one object instance of a
- particular class
-
- Returns:
- corloc_per_class: A float numpy array represents the corloc score of each
- class
- """
- return np.where(
- num_gt_imgs_per_class == 0,
- np.nan,
- num_images_correctly_detected_per_class / num_gt_imgs_per_class)
diff --git a/object_detection/utils/metrics_test.py b/object_detection/utils/metrics_test.py
deleted file mode 100644
index a2064bbf..00000000
--- a/object_detection/utils/metrics_test.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.metrics."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import metrics
-
-
-class MetricsTest(tf.test.TestCase):
-
- def test_compute_cor_loc(self):
- num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
- num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
- dtype=int)
- corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
- num_images_correctly_detected_per_class)
- expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
- self.assertTrue(np.allclose(corloc, expected_corloc))
-
- def test_compute_cor_loc_nans(self):
- num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
- num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
- dtype=int)
- corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
- num_images_correctly_detected_per_class)
- expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
- self.assertAllClose(corloc, expected_corloc)
-
- def test_compute_precision_recall(self):
- num_gt = 10
- scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
- labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
- accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
- expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
- expected_recall = accumulated_tp_count / num_gt
- precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
- self.assertAllClose(precision, expected_precision)
- self.assertAllClose(recall, expected_recall)
-
- def test_compute_average_precision(self):
- precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
- recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
- processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0],
- dtype=float)
- recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
- expected_mean_ap = np.sum(recall_interval * processed_precision)
- mean_ap = metrics.compute_average_precision(precision, recall)
- self.assertAlmostEqual(expected_mean_ap, mean_ap)
-
- def test_compute_precision_recall_and_ap_no_groundtruth(self):
- num_gt = 0
- scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
- labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
- expected_precision = None
- expected_recall = None
- precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
- self.assertEquals(precision, expected_precision)
- self.assertEquals(recall, expected_recall)
- ap = metrics.compute_average_precision(precision, recall)
- self.assertTrue(np.isnan(ap))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/np_box_list.py b/object_detection/utils/np_box_list.py
deleted file mode 100644
index 70263ca3..00000000
--- a/object_detection/utils/np_box_list.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Numpy BoxList classes and functions."""
-
-import numpy as np
-
-
-class BoxList(object):
- """Box collection.
-
- BoxList represents a list of bounding boxes as numpy array, where each
- bounding box is represented as a row of 4 numbers,
- [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a
- given list correspond to a single image.
-
- Optionally, users can add additional related fields (such as
- objectness/classification scores).
- """
-
- def __init__(self, data):
- """Constructs box collection.
-
- Args:
- data: a numpy array of shape [N, 4] representing box coordinates
-
- Raises:
- ValueError: if bbox data is not a numpy array
- ValueError: if invalid dimensions for bbox data
- """
- if not isinstance(data, np.ndarray):
- raise ValueError('data must be a numpy array.')
- if len(data.shape) != 2 or data.shape[1] != 4:
- raise ValueError('Invalid dimensions for box data.')
- if data.dtype != np.float32 and data.dtype != np.float64:
- raise ValueError('Invalid data type for box data: float is required.')
- if not self._is_valid_boxes(data):
- raise ValueError('Invalid box data. data must be a numpy array of '
- 'N*[y_min, x_min, y_max, x_max]')
- self.data = {'boxes': data}
-
- def num_boxes(self):
- """Return number of boxes held in collections."""
- return self.data['boxes'].shape[0]
-
- def get_extra_fields(self):
- """Return all non-box fields."""
- return [k for k in self.data.keys() if k != 'boxes']
-
- def has_field(self, field):
- return field in self.data
-
- def add_field(self, field, field_data):
- """Add data to a specified field.
-
- Args:
- field: a string parameter used to speficy a related field to be accessed.
- field_data: a numpy array of [N, ...] representing the data associated
- with the field.
- Raises:
- ValueError: if the field is already exist or the dimension of the field
- data does not matches the number of boxes.
- """
- if self.has_field(field):
- raise ValueError('Field ' + field + 'already exists')
- if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
- raise ValueError('Invalid dimensions for field data')
- self.data[field] = field_data
-
- def get(self):
- """Convenience function for accesssing box coordinates.
-
- Returns:
- a numpy array of shape [N, 4] representing box corners
- """
- return self.get_field('boxes')
-
- def get_field(self, field):
- """Accesses data associated with the specified field in the box collection.
-
- Args:
- field: a string parameter used to speficy a related field to be accessed.
-
- Returns:
- a numpy 1-d array representing data of an associated field
-
- Raises:
- ValueError: if invalid field
- """
- if not self.has_field(field):
- raise ValueError('field {} does not exist'.format(field))
- return self.data[field]
-
- def get_coordinates(self):
- """Get corner coordinates of boxes.
-
- Returns:
- a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
- """
- box_coordinates = self.get()
- y_min = box_coordinates[:, 0]
- x_min = box_coordinates[:, 1]
- y_max = box_coordinates[:, 2]
- x_max = box_coordinates[:, 3]
- return [y_min, x_min, y_max, x_max]
-
- def _is_valid_boxes(self, data):
- """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
-
- Args:
- data: a numpy array of shape [N, 4] representing box coordinates
-
- Returns:
- a boolean indicating whether all ymax of boxes are equal or greater than
- ymin, and all xmax of boxes are equal or greater than xmin.
- """
- if data.shape[0] > 0:
- for i in range(data.shape[0]):
- if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
- return False
- return True
diff --git a/object_detection/utils/np_box_list_ops.py b/object_detection/utils/np_box_list_ops.py
deleted file mode 100644
index cb9fee85..00000000
--- a/object_detection/utils/np_box_list_ops.py
+++ /dev/null
@@ -1,555 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Bounding Box List operations for Numpy BoxLists.
-
-Example box operations that are supported:
- * Areas: compute bounding box areas
- * IOU: pairwise intersection-over-union scores
-"""
-
-import numpy as np
-
-from object_detection.utils import np_box_list
-from object_detection.utils import np_box_ops
-
-
-class SortOrder(object):
- """Enum class for sort order.
-
- Attributes:
- ascend: ascend order.
- descend: descend order.
- """
- ASCEND = 1
- DESCEND = 2
-
-
-def area(boxlist):
- """Computes area of boxes.
-
- Args:
- boxlist: BoxList holding N boxes
-
- Returns:
- a numpy array with shape [N*1] representing box areas
- """
- y_min, x_min, y_max, x_max = boxlist.get_coordinates()
- return (y_max - y_min) * (x_max - x_min)
-
-
-def intersection(boxlist1, boxlist2):
- """Compute pairwise intersection areas between boxes.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
-
- Returns:
- a numpy array with shape [N*M] representing pairwise intersection area
- """
- return np_box_ops.intersection(boxlist1.get(), boxlist2.get())
-
-
-def iou(boxlist1, boxlist2):
- """Computes pairwise intersection-over-union between box collections.
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
-
- Returns:
- a numpy array with shape [N, M] representing pairwise iou scores.
- """
- return np_box_ops.iou(boxlist1.get(), boxlist2.get())
-
-
-def ioa(boxlist1, boxlist2):
- """Computes pairwise intersection-over-area between box collections.
-
- Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
- their intersection area over box2's area. Note that ioa is not symmetric,
- that is, IOA(box1, box2) != IOA(box2, box1).
-
- Args:
- boxlist1: BoxList holding N boxes
- boxlist2: BoxList holding M boxes
-
- Returns:
- a numpy array with shape [N, M] representing pairwise ioa scores.
- """
- return np_box_ops.ioa(boxlist1.get(), boxlist2.get())
-
-
-def gather(boxlist, indices, fields=None):
- """Gather boxes from BoxList according to indices and return new BoxList.
-
- By default, Gather returns boxes corresponding to the input index list, as
- well as all additional fields stored in the boxlist (indexing into the
- first dimension). However one can optionally only gather from a
- subset of fields.
-
- Args:
- boxlist: BoxList holding N boxes
- indices: a 1-d numpy array of type int_
- fields: (optional) list of fields to also gather from. If None (default),
- all fields are gathered from. Pass an empty fields list to only gather
- the box coordinates.
-
- Returns:
- subboxlist: a BoxList corresponding to the subset of the input BoxList
- specified by indices
-
- Raises:
- ValueError: if specified field is not contained in boxlist or if the
- indices are not of type int_
- """
- if indices.size:
- if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
- raise ValueError('indices are out of valid range.')
- subboxlist = np_box_list.BoxList(boxlist.get()[indices, :])
- if fields is None:
- fields = boxlist.get_extra_fields()
- for field in fields:
- extra_field_data = boxlist.get_field(field)
- subboxlist.add_field(field, extra_field_data[indices, ...])
- return subboxlist
-
-
-def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
- """Sort boxes and associated fields according to a scalar field.
-
- A common use case is reordering the boxes according to descending scores.
-
- Args:
- boxlist: BoxList holding N boxes.
- field: A BoxList field for sorting and reordering the BoxList.
- order: (Optional) 'descend' or 'ascend'. Default is descend.
-
- Returns:
- sorted_boxlist: A sorted BoxList with the field in the specified order.
-
- Raises:
- ValueError: if specified field does not exist or is not of single dimension.
- ValueError: if the order is not either descend or ascend.
- """
- if not boxlist.has_field(field):
- raise ValueError('Field ' + field + ' does not exist')
- if len(boxlist.get_field(field).shape) != 1:
- raise ValueError('Field ' + field + 'should be single dimension.')
- if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
- raise ValueError('Invalid sort order')
-
- field_to_sort = boxlist.get_field(field)
- sorted_indices = np.argsort(field_to_sort)
- if order == SortOrder.DESCEND:
- sorted_indices = sorted_indices[::-1]
- return gather(boxlist, sorted_indices)
-
-
-def non_max_suppression(boxlist,
- max_output_size=10000,
- iou_threshold=1.0,
- score_threshold=-10.0):
- """Non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. In each iteration, the detected bounding box with
- highest score in the available pool is selected.
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores. All scores belong to the same class.
- max_output_size: maximum number of retained boxes
- iou_threshold: intersection over union threshold.
- score_threshold: minimum score threshold. Remove the boxes with scores
- less than this value. Default value is set to -10. A very
- low threshold to pass pretty much all the boxes, unless
- the user sets a different score threshold.
-
- Returns:
- a BoxList holding M boxes where M <= max_output_size
- Raises:
- ValueError: if 'scores' field does not exist
- ValueError: if threshold is not in [0, 1]
- ValueError: if max_output_size < 0
- """
- if not boxlist.has_field('scores'):
- raise ValueError('Field scores does not exist')
- if iou_threshold < 0. or iou_threshold > 1.0:
- raise ValueError('IOU threshold must be in [0, 1]')
- if max_output_size < 0:
- raise ValueError('max_output_size must be bigger than 0.')
-
- boxlist = filter_scores_greater_than(boxlist, score_threshold)
- if boxlist.num_boxes() == 0:
- return boxlist
-
- boxlist = sort_by_field(boxlist, 'scores')
-
- # Prevent further computation if NMS is disabled.
- if iou_threshold == 1.0:
- if boxlist.num_boxes() > max_output_size:
- selected_indices = np.arange(max_output_size)
- return gather(boxlist, selected_indices)
- else:
- return boxlist
-
- boxes = boxlist.get()
- num_boxes = boxlist.num_boxes()
- # is_index_valid is True only for all remaining valid boxes,
- is_index_valid = np.full(num_boxes, 1, dtype=bool)
- selected_indices = []
- num_output = 0
- for i in xrange(num_boxes):
- if num_output < max_output_size:
- if is_index_valid[i]:
- num_output += 1
- selected_indices.append(i)
- is_index_valid[i] = False
- valid_indices = np.where(is_index_valid)[0]
- if valid_indices.size == 0:
- break
-
- intersect_over_union = np_box_ops.iou(
- np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
- intersect_over_union = np.squeeze(intersect_over_union, axis=0)
- is_index_valid[valid_indices] = np.logical_and(
- is_index_valid[valid_indices],
- intersect_over_union <= iou_threshold)
- return gather(boxlist, np.array(selected_indices))
-
-
-def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh,
- max_output_size):
- """Multi-class version of non maximum suppression.
-
- This op greedily selects a subset of detection bounding boxes, pruning
- away boxes that have high IOU (intersection over union) overlap (> thresh)
- with already selected boxes. It operates independently for each class for
- which scores are provided (via the scores field of the input box_list),
- pruning boxes with score less than a provided threshold prior to
- applying NMS.
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores. This scores field is a tensor that can
- be 1 dimensional (in the case of a single class) or 2-dimensional, which
- which case we assume that it takes the shape [num_boxes, num_classes].
- We further assume that this rank is known statically and that
- scores.shape[1] is also known (i.e., the number of classes is fixed
- and known at graph construction time).
- score_thresh: scalar threshold for score (low scoring boxes are removed).
- iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
- with previously selected boxes are removed).
- max_output_size: maximum number of retained boxes per class.
-
- Returns:
- a BoxList holding M boxes with a rank-1 scores field representing
- corresponding scores for each box with scores sorted in decreasing order
- and a rank-1 classes field representing a class label for each box.
- Raises:
- ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
- a valid scores field.
- """
- if not 0 <= iou_thresh <= 1.0:
- raise ValueError('thresh must be between 0 and 1')
- if not isinstance(boxlist, np_box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- scores = boxlist.get_field('scores')
- if len(scores.shape) == 1:
- scores = np.reshape(scores, [-1, 1])
- elif len(scores.shape) == 2:
- if scores.shape[1] is None:
- raise ValueError('scores field must have statically defined second '
- 'dimension')
- else:
- raise ValueError('scores field must be of rank 1 or 2')
- num_boxes = boxlist.num_boxes()
- num_scores = scores.shape[0]
- num_classes = scores.shape[1]
-
- if num_boxes != num_scores:
- raise ValueError('Incorrect scores field length: actual vs expected.')
-
- selected_boxes_list = []
- for class_idx in range(num_classes):
- boxlist_and_class_scores = np_box_list.BoxList(boxlist.get())
- class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
- boxlist_and_class_scores.add_field('scores', class_scores)
- boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores,
- score_thresh)
- nms_result = non_max_suppression(boxlist_filt,
- max_output_size=max_output_size,
- iou_threshold=iou_thresh,
- score_threshold=score_thresh)
- nms_result.add_field(
- 'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
- selected_boxes_list.append(nms_result)
- selected_boxes = concatenate(selected_boxes_list)
- sorted_boxes = sort_by_field(selected_boxes, 'scores')
- return sorted_boxes
-
-
-def scale(boxlist, y_scale, x_scale):
- """Scale box coordinates in x and y dimensions.
-
- Args:
- boxlist: BoxList holding N boxes
- y_scale: float
- x_scale: float
-
- Returns:
- boxlist: BoxList holding N boxes
- """
- y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
- y_min = y_scale * y_min
- y_max = y_scale * y_max
- x_min = x_scale * x_min
- x_max = x_scale * x_max
- scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max]))
-
- fields = boxlist.get_extra_fields()
- for field in fields:
- extra_field_data = boxlist.get_field(field)
- scaled_boxlist.add_field(field, extra_field_data)
-
- return scaled_boxlist
-
-
-def clip_to_window(boxlist, window):
- """Clip bounding boxes to a window.
-
- This op clips input bounding boxes (represented by bounding box
- corners) to a window, optionally filtering out boxes that do not
- overlap at all with the window.
-
- Args:
- boxlist: BoxList holding M_in boxes
- window: a numpy array of shape [4] representing the
- [y_min, x_min, y_max, x_max] window to which the op
- should clip boxes.
-
- Returns:
- a BoxList holding M_out boxes where M_out <= M_in
- """
- y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
- win_y_min = window[0]
- win_x_min = window[1]
- win_y_max = window[2]
- win_x_max = window[3]
- y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
- y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
- x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
- x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
- clipped = np_box_list.BoxList(
- np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
- clipped = _copy_extra_fields(clipped, boxlist)
- areas = area(clipped)
- nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)),
- [-1]).astype(np.int32)
- return gather(clipped, nonzero_area_indices)
-
-
-def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
- """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
-
- For each box in boxlist1, we want its IOA to be more than minoverlap with
- at least one of the boxes in boxlist2. If it does not, we remove it.
-
- Args:
- boxlist1: BoxList holding N boxes.
- boxlist2: BoxList holding M boxes.
- minoverlap: Minimum required overlap between boxes, to count them as
- overlapping.
-
- Returns:
- A pruned boxlist with size [N', 4].
- """
- intersection_over_area = ioa(boxlist2, boxlist1) # [M, N] tensor
- intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor
- keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
- keep_inds = np.nonzero(keep_bool)[0]
- new_boxlist1 = gather(boxlist1, keep_inds)
- return new_boxlist1
-
-
-def prune_outside_window(boxlist, window):
- """Prunes bounding boxes that fall outside a given window.
-
- This function prunes bounding boxes that even partially fall outside the given
- window. See also ClipToWindow which only prunes bounding boxes that fall
- completely outside the window, and clips any bounding boxes that partially
- overflow.
-
- Args:
- boxlist: a BoxList holding M_in boxes.
- window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax]
- of the window.
-
- Returns:
- pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
- valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
- in the input tensor.
- """
-
- y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
- win_y_min = window[0]
- win_x_min = window[1]
- win_y_max = window[2]
- win_x_max = window[3]
- coordinate_violations = np.hstack([np.less(y_min, win_y_min),
- np.less(x_min, win_x_min),
- np.greater(y_max, win_y_max),
- np.greater(x_max, win_x_max)])
- valid_indices = np.reshape(
- np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
- return gather(boxlist, valid_indices), valid_indices
-
-
-def concatenate(boxlists, fields=None):
- """Concatenate list of BoxLists.
-
- This op concatenates a list of input BoxLists into a larger BoxList. It also
- handles concatenation of BoxList fields as long as the field tensor shapes
- are equal except for the first dimension.
-
- Args:
- boxlists: list of BoxList objects
- fields: optional list of fields to also concatenate. By default, all
- fields from the first BoxList in the list are included in the
- concatenation.
-
- Returns:
- a BoxList with number of boxes equal to
- sum([boxlist.num_boxes() for boxlist in BoxList])
- Raises:
- ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
- contains non BoxList objects), or if requested fields are not contained in
- all boxlists
- """
- if not isinstance(boxlists, list):
- raise ValueError('boxlists should be a list')
- if not boxlists:
- raise ValueError('boxlists should have nonzero length')
- for boxlist in boxlists:
- if not isinstance(boxlist, np_box_list.BoxList):
- raise ValueError('all elements of boxlists should be BoxList objects')
- concatenated = np_box_list.BoxList(
- np.vstack([boxlist.get() for boxlist in boxlists]))
- if fields is None:
- fields = boxlists[0].get_extra_fields()
- for field in fields:
- first_field_shape = boxlists[0].get_field(field).shape
- first_field_shape = first_field_shape[1:]
- for boxlist in boxlists:
- if not boxlist.has_field(field):
- raise ValueError('boxlist must contain all requested fields')
- field_shape = boxlist.get_field(field).shape
- field_shape = field_shape[1:]
- if field_shape != first_field_shape:
- raise ValueError('field %s must have same shape for all boxlists '
- 'except for the 0th dimension.' % field)
- concatenated_field = np.concatenate(
- [boxlist.get_field(field) for boxlist in boxlists], axis=0)
- concatenated.add_field(field, concatenated_field)
- return concatenated
-
-
-def filter_scores_greater_than(boxlist, thresh):
- """Filter to keep only boxes with score exceeding a given threshold.
-
- This op keeps the collection of boxes whose corresponding scores are
- greater than the input threshold.
-
- Args:
- boxlist: BoxList holding N boxes. Must contain a 'scores' field
- representing detection scores.
- thresh: scalar threshold
-
- Returns:
- a BoxList holding M boxes where M <= N
-
- Raises:
- ValueError: if boxlist not a BoxList object or if it does not
- have a scores field
- """
- if not isinstance(boxlist, np_box_list.BoxList):
- raise ValueError('boxlist must be a BoxList')
- if not boxlist.has_field('scores'):
- raise ValueError('input boxlist must have \'scores\' field')
- scores = boxlist.get_field('scores')
- if len(scores.shape) > 2:
- raise ValueError('Scores should have rank 1 or 2')
- if len(scores.shape) == 2 and scores.shape[1] != 1:
- raise ValueError('Scores should have rank 1 or have shape '
- 'consistent with [None, 1]')
- high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
- [-1]).astype(np.int32)
- return gather(boxlist, high_score_indices)
-
-
-def change_coordinate_frame(boxlist, window):
- """Change coordinate frame of the boxlist to be relative to window's frame.
-
- Given a window of the form [ymin, xmin, ymax, xmax],
- changes bounding box coordinates from boxlist to be relative to this window
- (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
-
- An example use case is data augmentation: where we are given groundtruth
- boxes (boxlist) and would like to randomly crop the image to some
- window (window). In this case we need to change the coordinate frame of
- each groundtruth box to be relative to this new window.
-
- Args:
- boxlist: A BoxList object holding N boxes.
- window: a size 4 1-D numpy array.
-
- Returns:
- Returns a BoxList object with N boxes.
- """
- win_height = window[2] - window[0]
- win_width = window[3] - window[1]
- boxlist_new = scale(
- np_box_list.BoxList(boxlist.get() -
- [window[0], window[1], window[0], window[1]]),
- 1.0 / win_height, 1.0 / win_width)
- _copy_extra_fields(boxlist_new, boxlist)
-
- return boxlist_new
-
-
-def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
- """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
-
- Args:
- boxlist_to_copy_to: BoxList to which extra fields are copied.
- boxlist_to_copy_from: BoxList from which fields are copied.
-
- Returns:
- boxlist_to_copy_to with extra fields.
- """
- for field in boxlist_to_copy_from.get_extra_fields():
- boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
- return boxlist_to_copy_to
-
-
-def _update_valid_indices_by_removing_high_iou_boxes(
- selected_indices, is_index_valid, intersect_over_union, threshold):
- max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
- return np.logical_and(is_index_valid, max_iou <= threshold)
diff --git a/object_detection/utils/np_box_list_ops_test.py b/object_detection/utils/np_box_list_ops_test.py
deleted file mode 100644
index 24a2cc8c..00000000
--- a/object_detection/utils/np_box_list_ops_test.py
+++ /dev/null
@@ -1,414 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.np_box_list_ops."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import np_box_list
-from object_detection.utils import np_box_list_ops
-
-
-class AreaRelatedTest(tf.test.TestCase):
-
- def setUp(self):
- boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
- dtype=float)
- boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.boxlist1 = np_box_list.BoxList(boxes1)
- self.boxlist2 = np_box_list.BoxList(boxes2)
-
- def test_area(self):
- areas = np_box_list_ops.area(self.boxlist1)
- expected_areas = np.array([6.0, 5.0], dtype=float)
- self.assertAllClose(expected_areas, areas)
-
- def test_intersection(self):
- intersection = np_box_list_ops.intersection(self.boxlist1, self.boxlist2)
- expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
- dtype=float)
- self.assertAllClose(intersection, expected_intersection)
-
- def test_iou(self):
- iou = np_box_list_ops.iou(self.boxlist1, self.boxlist2)
- expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
- [1.0 / 16.0, 0.0, 5.0 / 400.0]],
- dtype=float)
- self.assertAllClose(iou, expected_iou)
-
- def test_ioa(self):
- boxlist1 = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- boxlist2 = np_box_list.BoxList(
- np.array(
- [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
- ioa21 = np_box_list_ops.ioa(boxlist2, boxlist1)
- expected_ioa21 = np.array([[0.5, 0.0],
- [1.0, 1.0]],
- dtype=np.float32)
- self.assertAllClose(ioa21, expected_ioa21)
-
- def test_scale(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- boxlist_scaled = np_box_list_ops.scale(boxlist, 2.0, 3.0)
- expected_boxlist_scaled = np_box_list.BoxList(
- np.array(
- [[0.5, 0.75, 1.5, 2.25], [0.0, 0.0, 1.0, 2.25]], dtype=np.float32))
- self.assertAllClose(expected_boxlist_scaled.get(), boxlist_scaled.get())
-
- def test_clip_to_window(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
- [-0.2, -0.3, 0.7, 1.5]],
- dtype=np.float32))
- boxlist_clipped = np_box_list_ops.clip_to_window(boxlist,
- [0.0, 0.0, 1.0, 1.0])
- expected_boxlist_clipped = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
- [0.0, 0.0, 0.7, 1.0]],
- dtype=np.float32))
- self.assertAllClose(expected_boxlist_clipped.get(), boxlist_clipped.get())
-
- def test_prune_outside_window(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
- [-0.2, -0.3, 0.7, 1.5]],
- dtype=np.float32))
- boxlist_pruned, _ = np_box_list_ops.prune_outside_window(
- boxlist, [0.0, 0.0, 1.0, 1.0])
- expected_boxlist_pruned = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- self.assertAllClose(expected_boxlist_pruned.get(), boxlist_pruned.get())
-
- def test_concatenate(self):
- boxlist1 = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- boxlist2 = np_box_list.BoxList(
- np.array(
- [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
- boxlists = [boxlist1, boxlist2]
- boxlist_concatenated = np_box_list_ops.concatenate(boxlists)
- boxlist_concatenated_expected = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
- [0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]],
- dtype=np.float32))
- self.assertAllClose(boxlist_concatenated_expected.get(),
- boxlist_concatenated.get())
-
- def test_change_coordinate_frame(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- boxlist_coord = np_box_list_ops.change_coordinate_frame(
- boxlist, np.array([0, 0, 0.5, 0.5], dtype=np.float32))
- expected_boxlist_coord = np_box_list.BoxList(
- np.array([[0.5, 0.5, 1.5, 1.5], [0, 0, 1.0, 1.5]], dtype=np.float32))
- self.assertAllClose(boxlist_coord.get(), expected_boxlist_coord.get())
-
- def test_filter_scores_greater_than(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
- np.float32))
- boxlist.add_field('scores', np.array([0.8, 0.2], np.float32))
- boxlist_greater = np_box_list_ops.filter_scores_greater_than(boxlist, 0.5)
-
- expected_boxlist_greater = np_box_list.BoxList(
- np.array([[0.25, 0.25, 0.75, 0.75]], dtype=np.float32))
-
- self.assertAllClose(boxlist_greater.get(), expected_boxlist_greater.get())
-
-
-class GatherOpsTest(tf.test.TestCase):
-
- def setUp(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.boxlist = np_box_list.BoxList(boxes)
- self.boxlist.add_field('scores', np.array([0.5, 0.7, 0.9], dtype=float))
- self.boxlist.add_field('labels',
- np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
- [0, 0, 0, 0, 1]],
- dtype=int))
-
- def test_gather_with_out_of_range_indices(self):
- indices = np.array([3, 1], dtype=int)
- boxlist = self.boxlist
- with self.assertRaises(ValueError):
- np_box_list_ops.gather(boxlist, indices)
-
- def test_gather_with_invalid_multidimensional_indices(self):
- indices = np.array([[0, 1], [1, 2]], dtype=int)
- boxlist = self.boxlist
- with self.assertRaises(ValueError):
- np_box_list_ops.gather(boxlist, indices)
-
- def test_gather_without_fields_specified(self):
- indices = np.array([2, 0, 1], dtype=int)
- boxlist = self.boxlist
- subboxlist = np_box_list_ops.gather(boxlist, indices)
-
- expected_scores = np.array([0.9, 0.5, 0.7], dtype=float)
- self.assertAllClose(expected_scores, subboxlist.get_field('scores'))
-
- expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0]],
- dtype=float)
- self.assertAllClose(expected_boxes, subboxlist.get())
-
- expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
- [0, 1, 0, 0, 0]],
- dtype=int)
- self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
-
- def test_gather_with_invalid_field_specified(self):
- indices = np.array([2, 0, 1], dtype=int)
- boxlist = self.boxlist
-
- with self.assertRaises(ValueError):
- np_box_list_ops.gather(boxlist, indices, 'labels')
-
- with self.assertRaises(ValueError):
- np_box_list_ops.gather(boxlist, indices, ['objectness'])
-
- def test_gather_with_fields_specified(self):
- indices = np.array([2, 0, 1], dtype=int)
- boxlist = self.boxlist
- subboxlist = np_box_list_ops.gather(boxlist, indices, ['labels'])
-
- self.assertFalse(subboxlist.has_field('scores'))
-
- expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0]],
- dtype=float)
- self.assertAllClose(expected_boxes, subboxlist.get())
-
- expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
- [0, 1, 0, 0, 0]],
- dtype=int)
- self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
-
-
-class SortByFieldTest(tf.test.TestCase):
-
- def setUp(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.boxlist = np_box_list.BoxList(boxes)
- self.boxlist.add_field('scores', np.array([0.5, 0.9, 0.4], dtype=float))
- self.boxlist.add_field('labels',
- np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
- [0, 0, 0, 0, 1]],
- dtype=int))
-
- def test_with_invalid_field(self):
- with self.assertRaises(ValueError):
- np_box_list_ops.sort_by_field(self.boxlist, 'objectness')
- with self.assertRaises(ValueError):
- np_box_list_ops.sort_by_field(self.boxlist, 'labels')
-
- def test_with_invalid_sorting_order(self):
- with self.assertRaises(ValueError):
- np_box_list_ops.sort_by_field(self.boxlist, 'scores', 'Descending')
-
- def test_with_descending_sorting(self):
- sorted_boxlist = np_box_list_ops.sort_by_field(self.boxlist, 'scores')
-
- expected_boxes = np.array([[14.0, 14.0, 15.0, 15.0], [3.0, 4.0, 6.0, 8.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.assertAllClose(expected_boxes, sorted_boxlist.get())
-
- expected_scores = np.array([0.9, 0.5, 0.4], dtype=float)
- self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
-
- def test_with_ascending_sorting(self):
- sorted_boxlist = np_box_list_ops.sort_by_field(
- self.boxlist, 'scores', np_box_list_ops.SortOrder.ASCEND)
-
- expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0],
- [3.0, 4.0, 6.0, 8.0],
- [14.0, 14.0, 15.0, 15.0],],
- dtype=float)
- self.assertAllClose(expected_boxes, sorted_boxlist.get())
-
- expected_scores = np.array([0.4, 0.5, 0.9], dtype=float)
- self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
-
-
-class NonMaximumSuppressionTest(tf.test.TestCase):
-
- def setUp(self):
- self._boxes = np.array([[0, 0, 1, 1],
- [0, 0.1, 1, 1.1],
- [0, -0.1, 1, 0.9],
- [0, 10, 1, 11],
- [0, 10.1, 1, 11.1],
- [0, 100, 1, 101]],
- dtype=float)
- self._boxlist = np_box_list.BoxList(self._boxes)
-
- def test_with_no_scores_field(self):
- boxlist = np_box_list.BoxList(self._boxes)
- max_output_size = 3
- iou_threshold = 0.5
-
- with self.assertRaises(ValueError):
- np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
-
- def test_nms_disabled_max_output_size_equals_three(self):
- boxlist = np_box_list.BoxList(self._boxes)
- boxlist.add_field('scores',
- np.array([.9, .75, .6, .95, .2, .3], dtype=float))
- max_output_size = 3
- iou_threshold = 1. # No NMS
-
- expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 0.1, 1, 1.1]],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_select_from_three_clusters(self):
- boxlist = np_box_list.BoxList(self._boxes)
- boxlist.add_field('scores',
- np.array([.9, .75, .6, .95, .2, .3], dtype=float))
- max_output_size = 3
- iou_threshold = 0.5
-
- expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_select_at_most_two_from_three_clusters(self):
- boxlist = np_box_list.BoxList(self._boxes)
- boxlist.add_field('scores',
- np.array([.9, .75, .6, .95, .5, .3], dtype=float))
- max_output_size = 2
- iou_threshold = 0.5
-
- expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1]], dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_select_at_most_thirty_from_three_clusters(self):
- boxlist = np_box_list.BoxList(self._boxes)
- boxlist.add_field('scores',
- np.array([.9, .75, .6, .95, .5, .3], dtype=float))
- max_output_size = 30
- iou_threshold = 0.5
-
- expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_select_from_ten_indentical_boxes(self):
- boxes = np.array(10 * [[0, 0, 1, 1]], dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- boxlist.add_field('scores', np.array(10 * [0.8]))
- iou_threshold = .5
- max_output_size = 3
- expected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_different_iou_threshold(self):
- boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], [200, 200, 210, 300],
- [200, 200, 210, 250]],
- dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- boxlist.add_field('scores', np.array([0.9, 0.8, 0.7, 0.6]))
- max_output_size = 4
-
- iou_threshold = .4
- expected_boxes = np.array([[0, 0, 20, 100],
- [200, 200, 210, 300],],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- iou_threshold = .5
- expected_boxes = np.array([[0, 0, 20, 100], [200, 200, 210, 300],
- [200, 200, 210, 250]],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- iou_threshold = .8
- expected_boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80],
- [200, 200, 210, 300], [200, 200, 210, 250]],
- dtype=float)
- nms_boxlist = np_box_list_ops.non_max_suppression(
- boxlist, max_output_size, iou_threshold)
- self.assertAllClose(nms_boxlist.get(), expected_boxes)
-
- def test_multiclass_nms(self):
- boxlist = np_box_list.BoxList(
- np.array(
- [[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
- dtype=np.float32))
- scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
- [0.7, -0.7, 0.6, 0.2, -0.9],
- [0.4, 0.34, -0.9, 0.2, 0.31]],
- dtype=np.float32)
- boxlist.add_field('scores', scores)
- boxlist_clean = np_box_list_ops.multi_class_non_max_suppression(
- boxlist, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
-
- scores_clean = boxlist_clean.get_field('scores')
- classes_clean = boxlist_clean.get_field('classes')
- boxes = boxlist_clean.get()
- expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
- expected_classes = np.array([0, 2, 1, 4])
- expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
- [0.4, 0.2, 0.8, 0.8],
- [0.6, 0.0, 1.0, 1.0],
- [0.6, 0.0, 1.0, 1.0]],
- dtype=np.float32)
- self.assertAllClose(scores_clean, expected_scores)
- self.assertAllClose(classes_clean, expected_classes)
- self.assertAllClose(boxes, expected_boxes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/np_box_list_test.py b/object_detection/utils/np_box_list_test.py
deleted file mode 100644
index bb0ee5d2..00000000
--- a/object_detection/utils/np_box_list_test.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.np_box_list_test."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import np_box_list
-
-
-class BoxListTest(tf.test.TestCase):
-
- def test_invalid_box_data(self):
- with self.assertRaises(ValueError):
- np_box_list.BoxList([0, 0, 1, 1])
-
- with self.assertRaises(ValueError):
- np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int))
-
- with self.assertRaises(ValueError):
- np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float))
-
- with self.assertRaises(ValueError):
- np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float))
-
- def test_has_field_with_existed_field(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- self.assertTrue(boxlist.has_field('boxes'))
-
- def test_has_field_with_nonexisted_field(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- self.assertFalse(boxlist.has_field('scores'))
-
- def test_get_field_with_existed_field(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes))
-
- def test_get_field_with_nonexited_field(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- with self.assertRaises(ValueError):
- boxlist.get_field('scores')
-
-
-class AddExtraFieldTest(tf.test.TestCase):
-
- def setUp(self):
- boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.boxlist = np_box_list.BoxList(boxes)
-
- def test_add_already_existed_field(self):
- with self.assertRaises(ValueError):
- self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float))
-
- def test_add_invalid_field_data(self):
- with self.assertRaises(ValueError):
- self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float))
- with self.assertRaises(ValueError):
- self.boxlist.add_field('scores',
- np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
-
- def test_add_single_dimensional_field_data(self):
- boxlist = self.boxlist
- scores = np.array([0.5, 0.7, 0.9], dtype=float)
- boxlist.add_field('scores', scores)
- self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores')))
-
- def test_add_multi_dimensional_field_data(self):
- boxlist = self.boxlist
- labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
- dtype=int)
- boxlist.add_field('labels', labels)
- self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels')))
-
- def test_get_extra_fields(self):
- boxlist = self.boxlist
- self.assertSameElements(boxlist.get_extra_fields(), [])
-
- scores = np.array([0.5, 0.7, 0.9], dtype=float)
- boxlist.add_field('scores', scores)
- self.assertSameElements(boxlist.get_extra_fields(), ['scores'])
-
- labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
- dtype=int)
- boxlist.add_field('labels', labels)
- self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels'])
-
- def test_get_coordinates(self):
- y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
-
- expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
- expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
- expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
- expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
-
- self.assertTrue(np.allclose(y_min, expected_y_min))
- self.assertTrue(np.allclose(x_min, expected_x_min))
- self.assertTrue(np.allclose(y_max, expected_y_max))
- self.assertTrue(np.allclose(x_max, expected_x_max))
-
- def test_num_boxes(self):
- boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
- boxlist = np_box_list.BoxList(boxes)
- expected_num_boxes = 2
- self.assertEquals(boxlist.num_boxes(), expected_num_boxes)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/np_box_ops.py b/object_detection/utils/np_box_ops.py
deleted file mode 100644
index b4b46a75..00000000
--- a/object_detection/utils/np_box_ops.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Operations for [N, 4] numpy arrays representing bounding boxes.
-
-Example box operations that are supported:
- * Areas: compute bounding box areas
- * IOU: pairwise intersection-over-union scores
-"""
-import numpy as np
-
-
-def area(boxes):
- """Computes area of boxes.
-
- Args:
- boxes: Numpy array with shape [N, 4] holding N boxes
-
- Returns:
- a numpy array with shape [N*1] representing box areas
- """
- return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-
-
-def intersection(boxes1, boxes2):
- """Compute pairwise intersection areas between boxes.
-
- Args:
- boxes1: a numpy array with shape [N, 4] holding N boxes
- boxes2: a numpy array with shape [M, 4] holding M boxes
-
- Returns:
- a numpy array with shape [N*M] representing pairwise intersection area
- """
- [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
- [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
-
- all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
- all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
- intersect_heights = np.maximum(
- np.zeros(all_pairs_max_ymin.shape),
- all_pairs_min_ymax - all_pairs_max_ymin)
- all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
- all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
- intersect_widths = np.maximum(
- np.zeros(all_pairs_max_xmin.shape),
- all_pairs_min_xmax - all_pairs_max_xmin)
- return intersect_heights * intersect_widths
-
-
-def iou(boxes1, boxes2):
- """Computes pairwise intersection-over-union between box collections.
-
- Args:
- boxes1: a numpy array with shape [N, 4] holding N boxes.
- boxes2: a numpy array with shape [M, 4] holding N boxes.
-
- Returns:
- a numpy array with shape [N, M] representing pairwise iou scores.
- """
- intersect = intersection(boxes1, boxes2)
- area1 = area(boxes1)
- area2 = area(boxes2)
- union = np.expand_dims(area1, axis=1) + np.expand_dims(
- area2, axis=0) - intersect
- return intersect / union
-
-
-def ioa(boxes1, boxes2):
- """Computes pairwise intersection-over-area between box collections.
-
- Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
- their intersection area over box2's area. Note that ioa is not symmetric,
- that is, IOA(box1, box2) != IOA(box2, box1).
-
- Args:
- boxes1: a numpy array with shape [N, 4] holding N boxes.
- boxes2: a numpy array with shape [M, 4] holding N boxes.
-
- Returns:
- a numpy array with shape [N, M] representing pairwise ioa scores.
- """
- intersect = intersection(boxes1, boxes2)
- areas = np.expand_dims(area(boxes2), axis=0)
- return intersect / areas
diff --git a/object_detection/utils/np_box_ops_test.py b/object_detection/utils/np_box_ops_test.py
deleted file mode 100644
index 730f3d20..00000000
--- a/object_detection/utils/np_box_ops_test.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.np_box_ops."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import np_box_ops
-
-
-class BoxOpsTests(tf.test.TestCase):
-
- def setUp(self):
- boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
- dtype=float)
- boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
- [0.0, 0.0, 20.0, 20.0]],
- dtype=float)
- self.boxes1 = boxes1
- self.boxes2 = boxes2
-
- def testArea(self):
- areas = np_box_ops.area(self.boxes1)
- expected_areas = np.array([6.0, 5.0], dtype=float)
- self.assertAllClose(expected_areas, areas)
-
- def testIntersection(self):
- intersection = np_box_ops.intersection(self.boxes1, self.boxes2)
- expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
- dtype=float)
- self.assertAllClose(intersection, expected_intersection)
-
- def testIOU(self):
- iou = np_box_ops.iou(self.boxes1, self.boxes2)
- expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
- [1.0 / 16.0, 0.0, 5.0 / 400.0]],
- dtype=float)
- self.assertAllClose(iou, expected_iou)
-
- def testIOA(self):
- boxes1 = np.array([[0.25, 0.25, 0.75, 0.75],
- [0.0, 0.0, 0.5, 0.75]],
- dtype=np.float32)
- boxes2 = np.array([[0.5, 0.25, 1.0, 1.0],
- [0.0, 0.0, 1.0, 1.0]],
- dtype=np.float32)
- ioa21 = np_box_ops.ioa(boxes2, boxes1)
- expected_ioa21 = np.array([[0.5, 0.0],
- [1.0, 1.0]],
- dtype=np.float32)
- self.assertAllClose(ioa21, expected_ioa21)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/object_detection_evaluation.py b/object_detection/utils/object_detection_evaluation.py
deleted file mode 100644
index 5db1557d..00000000
--- a/object_detection/utils/object_detection_evaluation.py
+++ /dev/null
@@ -1,616 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""object_detection_evaluation module.
-
-ObjectDetectionEvaluation is a class which manages ground truth information of a
-object detection dataset, and computes frequently used detection metrics such as
-Precision, Recall, CorLoc of the provided detection results.
-It supports the following operations:
-1) Add ground truth information of images sequentially.
-2) Add detection result of images sequentially.
-3) Evaluate detection metrics on already inserted detection results.
-4) Write evaluation result into a pickle file for future processing or
- visualization.
-
-Note: This module operates on numpy boxes and box lists.
-"""
-
-from abc import ABCMeta
-from abc import abstractmethod
-import collections
-import logging
-import numpy as np
-
-from object_detection.core import standard_fields
-from object_detection.utils import label_map_util
-from object_detection.utils import metrics
-from object_detection.utils import per_image_evaluation
-
-
-class DetectionEvaluator(object):
- """Interface for object detection evalution classes.
-
- Example usage of the Evaluator:
- ------------------------------
- evaluator = DetectionEvaluator(categories)
-
- # Detections and groundtruth for image 1.
- evaluator.add_single_groundtruth_image_info(...)
- evaluator.add_single_detected_image_info(...)
-
- # Detections and groundtruth for image 2.
- evaluator.add_single_groundtruth_image_info(...)
- evaluator.add_single_detected_image_info(...)
-
- metrics_dict = evaluator.evaluate()
- """
- __metaclass__ = ABCMeta
-
- def __init__(self, categories):
- """Constructor.
-
- Args:
- categories: A list of dicts, each of which has the following keys -
- 'id': (required) an integer id uniquely identifying this category.
- 'name': (required) string representing category name e.g., 'cat', 'dog'.
- """
- self._categories = categories
-
- @abstractmethod
- def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
- """Adds groundtruth for a single image to be used for evaluation.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- groundtruth_dict: A dictionary of groundtruth numpy arrays required
- for evaluations.
- """
- pass
-
- @abstractmethod
- def add_single_detected_image_info(self, image_id, detections_dict):
- """Adds detections for a single image to be used for evaluation.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- detections_dict: A dictionary of detection numpy arrays required
- for evaluation.
- """
- pass
-
- @abstractmethod
- def evaluate(self):
- """Evaluates detections and returns a dictionary of metrics."""
- pass
-
- @abstractmethod
- def clear(self):
- """Clears the state to prepare for a fresh evaluation."""
- pass
-
-
-class ObjectDetectionEvaluator(DetectionEvaluator):
- """A class to evaluate detections."""
-
- def __init__(self,
- categories,
- matching_iou_threshold=0.5,
- evaluate_corlocs=False,
- metric_prefix=None,
- use_weighted_mean_ap=False):
- """Constructor.
-
- Args:
- categories: A list of dicts, each of which has the following keys -
- 'id': (required) an integer id uniquely identifying this category.
- 'name': (required) string representing category name e.g., 'cat', 'dog'.
- matching_iou_threshold: IOU threshold to use for matching groundtruth
- boxes to detection boxes.
- evaluate_corlocs: (optional) boolean which determines if corloc scores
- are to be returned or not.
- metric_prefix: (optional) string prefix for metric name; if None, no
- prefix is used.
- use_weighted_mean_ap: (optional) boolean which determines if the mean
- average precision is computed directly from the scores and tp_fp_labels
- of all classes.
- """
- super(ObjectDetectionEvaluator, self).__init__(categories)
- self._num_classes = max([cat['id'] for cat in categories])
- self._matching_iou_threshold = matching_iou_threshold
- self._use_weighted_mean_ap = use_weighted_mean_ap
- self._label_id_offset = 1
- self._evaluation = ObjectDetectionEvaluation(
- self._num_classes,
- matching_iou_threshold=self._matching_iou_threshold,
- use_weighted_mean_ap=self._use_weighted_mean_ap,
- label_id_offset=self._label_id_offset)
- self._image_ids = set([])
- self._evaluate_corlocs = evaluate_corlocs
- self._metric_prefix = (metric_prefix + '/') if metric_prefix else ''
-
- def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
- """Adds groundtruth for a single image to be used for evaluation.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- groundtruth_dict: A dictionary containing -
- standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
- of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
- the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
- standard_fields.InputDataFields.groundtruth_classes: integer numpy array
- of shape [num_boxes] containing 1-indexed groundtruth classes for the
- boxes.
- standard_fields.InputDataFields.groundtruth_difficult: Optional length
- M numpy boolean array denoting whether a ground truth box is a
- difficult instance or not. This field is optional to support the case
- that no boxes are difficult.
-
- Raises:
- ValueError: On adding groundtruth for an image more than once.
- """
- if image_id in self._image_ids:
- raise ValueError('Image with id {} already added.'.format(image_id))
-
- groundtruth_classes = groundtruth_dict[
- standard_fields.InputDataFields.groundtruth_classes]
- groundtruth_classes -= self._label_id_offset
- # If the key is not present in the groundtruth_dict or the array is empty
- # (unless there are no annotations for the groundtruth on this image)
- # use values from the dictionary or insert None otherwise.
- if (standard_fields.InputDataFields.groundtruth_difficult in
- groundtruth_dict.keys() and
- (groundtruth_dict[standard_fields.InputDataFields.groundtruth_difficult]
- .size or not groundtruth_classes.size)):
- groundtruth_difficult = groundtruth_dict[
- standard_fields.InputDataFields.groundtruth_difficult]
- else:
- groundtruth_difficult = None
- if not len(self._image_ids) % 1000:
- logging.warn(
- 'image %s does not have groundtruth difficult flag specified',
- image_id)
- self._evaluation.add_single_ground_truth_image_info(
- image_id,
- groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes],
- groundtruth_classes,
- groundtruth_is_difficult_list=groundtruth_difficult)
- self._image_ids.update([image_id])
-
- def add_single_detected_image_info(self, image_id, detections_dict):
- """Adds detections for a single image to be used for evaluation.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- detections_dict: A dictionary containing -
- standard_fields.DetectionResultFields.detection_boxes: float32 numpy
- array of shape [num_boxes, 4] containing `num_boxes` detection boxes
- of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
- standard_fields.DetectionResultFields.detection_scores: float32 numpy
- array of shape [num_boxes] containing detection scores for the boxes.
- standard_fields.DetectionResultFields.detection_classes: integer numpy
- array of shape [num_boxes] containing 1-indexed detection classes for
- the boxes.
- """
- detection_classes = detections_dict[
- standard_fields.DetectionResultFields.detection_classes]
- detection_classes -= self._label_id_offset
- self._evaluation.add_single_detected_image_info(
- image_id,
- detections_dict[standard_fields.DetectionResultFields.detection_boxes],
- detections_dict[standard_fields.DetectionResultFields.detection_scores],
- detection_classes)
-
- def evaluate(self):
- """Compute evaluation result.
-
- Returns:
- A dictionary of metrics with the following fields -
-
- 1. summary_metrics:
- 'Precision/mAP@IOU': mean average precision at
- the specified IOU threshold.
-
- 2. per_category_ap: category specific results with keys of the form
- 'PerformanceByCategory/mAP@IOU/category'.
- """
- (per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = (
- self._evaluation.evaluate())
- pascal_metrics = {
- self._metric_prefix +
- 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold):
- mean_ap
- }
- if self._evaluate_corlocs:
- pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(
- self._matching_iou_threshold)] = mean_corloc
- category_index = label_map_util.create_category_index(self._categories)
- for idx in range(per_class_ap.size):
- if idx + self._label_id_offset in category_index:
- display_name = (
- self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
- self._matching_iou_threshold,
- category_index[idx + self._label_id_offset]['name']))
- pascal_metrics[display_name] = per_class_ap[idx]
-
- # Optionally add CorLoc metrics.classes
- if self._evaluate_corlocs:
- display_name = (
- self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
- .format(self._matching_iou_threshold,
- category_index[idx + self._label_id_offset]['name']))
- pascal_metrics[display_name] = per_class_corloc[idx]
-
- return pascal_metrics
-
- def clear(self):
- """Clears the state to prepare for a fresh evaluation."""
- self._evaluation = ObjectDetectionEvaluation(
- self._num_classes,
- matching_iou_threshold=self._matching_iou_threshold,
- use_weighted_mean_ap=self._use_weighted_mean_ap,
- label_id_offset=self._label_id_offset)
- self._image_ids.clear()
-
-
-class PascalDetectionEvaluator(ObjectDetectionEvaluator):
- """A class to evaluate detections using PASCAL metrics."""
-
- def __init__(self, categories, matching_iou_threshold=0.5):
- super(PascalDetectionEvaluator, self).__init__(
- categories,
- matching_iou_threshold=matching_iou_threshold,
- evaluate_corlocs=False,
- metric_prefix='PASCAL',
- use_weighted_mean_ap=False)
-
-
-class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
- """A class to evaluate detections using weighted PASCAL metrics.
-
- Weighted PASCAL metrics computes the mean average precision as the average
- precision given the scores and tp_fp_labels of all classes. In comparison,
- PASCAL metrics computes the mean average precision as the mean of the
- per-class average precisions.
-
- This definition is very similar to the mean of the per-class average
- precisions weighted by class frequency. However, they are typically not the
- same as the average precision is not a linear function of the scores and
- tp_fp_labels.
- """
-
- def __init__(self, categories, matching_iou_threshold=0.5):
- super(WeightedPascalDetectionEvaluator, self).__init__(
- categories,
- matching_iou_threshold=matching_iou_threshold,
- evaluate_corlocs=False,
- metric_prefix='WeightedPASCAL',
- use_weighted_mean_ap=True)
-
-
-class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
- """A class to evaluate detections using Open Images V2 metrics.
-
- Open Images V2 introduce group_of type of bounding boxes and this metric
- handles those boxes appropriately.
- """
-
- def __init__(self,
- categories,
- matching_iou_threshold=0.5,
- evaluate_corlocs=False):
- """Constructor.
-
- Args:
- categories: A list of dicts, each of which has the following keys -
- 'id': (required) an integer id uniquely identifying this category.
- 'name': (required) string representing category name e.g., 'cat', 'dog'.
- matching_iou_threshold: IOU threshold to use for matching groundtruth
- boxes to detection boxes.
- evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
- """
- super(OpenImagesDetectionEvaluator, self).__init__(
- categories,
- matching_iou_threshold,
- evaluate_corlocs,
- metric_prefix='OpenImagesV2')
-
- def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
- """Adds groundtruth for a single image to be used for evaluation.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- groundtruth_dict: A dictionary containing -
- standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
- of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
- the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
- standard_fields.InputDataFields.groundtruth_classes: integer numpy array
- of shape [num_boxes] containing 1-indexed groundtruth classes for the
- boxes.
- standard_fields.InputDataFields.groundtruth_group_of: Optional length
- M numpy boolean array denoting whether a groundtruth box contains a
- group of instances.
-
- Raises:
- ValueError: On adding groundtruth for an image more than once.
- """
- if image_id in self._image_ids:
- raise ValueError('Image with id {} already added.'.format(image_id))
-
- groundtruth_classes = groundtruth_dict[
- standard_fields.InputDataFields.groundtruth_classes]
- groundtruth_classes -= self._label_id_offset
- # If the key is not present in the groundtruth_dict or the array is empty
- # (unless there are no annotations for the groundtruth on this image)
- # use values from the dictionary or insert None otherwise.
- if (standard_fields.InputDataFields.groundtruth_group_of in
- groundtruth_dict.keys() and
- (groundtruth_dict[standard_fields.InputDataFields.groundtruth_group_of]
- .size or not groundtruth_classes.size)):
- groundtruth_group_of = groundtruth_dict[
- standard_fields.InputDataFields.groundtruth_group_of]
- else:
- groundtruth_group_of = None
- if not len(self._image_ids) % 1000:
- logging.warn(
- 'image %s does not have groundtruth group_of flag specified',
- image_id)
- self._evaluation.add_single_ground_truth_image_info(
- image_id,
- groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes],
- groundtruth_classes,
- groundtruth_is_difficult_list=None,
- groundtruth_is_group_of_list=groundtruth_group_of)
- self._image_ids.update([image_id])
-
-
-ObjectDetectionEvalMetrics = collections.namedtuple(
- 'ObjectDetectionEvalMetrics', [
- 'average_precisions', 'mean_ap', 'precisions', 'recalls', 'corlocs',
- 'mean_corloc'
- ])
-
-
-class ObjectDetectionEvaluation(object):
- """Internal implementation of Pascal object detection metrics."""
-
- def __init__(self,
- num_groundtruth_classes,
- matching_iou_threshold=0.5,
- nms_iou_threshold=1.0,
- nms_max_output_boxes=10000,
- use_weighted_mean_ap=False,
- label_id_offset=0):
- self.per_image_eval = per_image_evaluation.PerImageEvaluation(
- num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
- nms_max_output_boxes)
- self.num_class = num_groundtruth_classes
- self.label_id_offset = label_id_offset
-
- self.groundtruth_boxes = {}
- self.groundtruth_class_labels = {}
- self.groundtruth_is_difficult_list = {}
- self.groundtruth_is_group_of_list = {}
- self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
- self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
-
- self.detection_keys = set()
- self.scores_per_class = [[] for _ in range(self.num_class)]
- self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
- self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
- self.average_precision_per_class = np.empty(self.num_class, dtype=float)
- self.average_precision_per_class.fill(np.nan)
- self.precisions_per_class = []
- self.recalls_per_class = []
- self.corloc_per_class = np.ones(self.num_class, dtype=float)
-
- self.use_weighted_mean_ap = use_weighted_mean_ap
-
- def clear_detections(self):
- self.detection_keys = {}
- self.scores_per_class = [[] for _ in range(self.num_class)]
- self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
- self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
- self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
- self.precisions_per_class = []
- self.recalls_per_class = []
- self.corloc_per_class = np.ones(self.num_class, dtype=float)
-
- def add_single_ground_truth_image_info(self,
- image_key,
- groundtruth_boxes,
- groundtruth_class_labels,
- groundtruth_is_difficult_list=None,
- groundtruth_is_group_of_list=None):
- """Adds groundtruth for a single image to be used for evaluation.
-
- Args:
- image_key: A unique string/integer identifier for the image.
- groundtruth_boxes: float32 numpy array of shape [num_boxes, 4]
- containing `num_boxes` groundtruth boxes of the format
- [ymin, xmin, ymax, xmax] in absolute image coordinates.
- groundtruth_class_labels: integer numpy array of shape [num_boxes]
- containing 0-indexed groundtruth classes for the boxes.
- groundtruth_is_difficult_list: A length M numpy boolean array denoting
- whether a ground truth box is a difficult instance or not. To support
- the case that no boxes are difficult, it is by default set as None.
- groundtruth_is_group_of_list: A length M numpy boolean array denoting
- whether a ground truth box is a group-of box or not. To support
- the case that no boxes are groups-of, it is by default set as None.
- """
- if image_key in self.groundtruth_boxes:
- logging.warn(
- 'image %s has already been added to the ground truth database.',
- image_key)
- return
-
- self.groundtruth_boxes[image_key] = groundtruth_boxes
- self.groundtruth_class_labels[image_key] = groundtruth_class_labels
- if groundtruth_is_difficult_list is None:
- num_boxes = groundtruth_boxes.shape[0]
- groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
- self.groundtruth_is_difficult_list[
- image_key] = groundtruth_is_difficult_list.astype(dtype=bool)
- if groundtruth_is_group_of_list is None:
- num_boxes = groundtruth_boxes.shape[0]
- groundtruth_is_group_of_list = np.zeros(num_boxes, dtype=bool)
- self.groundtruth_is_group_of_list[
- image_key] = groundtruth_is_group_of_list.astype(dtype=bool)
-
- self._update_ground_truth_statistics(
- groundtruth_class_labels,
- groundtruth_is_difficult_list.astype(dtype=bool),
- groundtruth_is_group_of_list.astype(dtype=bool))
-
- def add_single_detected_image_info(self, image_key, detected_boxes,
- detected_scores, detected_class_labels):
- """Adds detections for a single image to be used for evaluation.
-
- Args:
- image_key: A unique string/integer identifier for the image.
- detected_boxes: float32 numpy array of shape [num_boxes, 4]
- containing `num_boxes` detection boxes of the format
- [ymin, xmin, ymax, xmax] in absolute image coordinates.
- detected_scores: float32 numpy array of shape [num_boxes] containing
- detection scores for the boxes.
- detected_class_labels: integer numpy array of shape [num_boxes] containing
- 0-indexed detection classes for the boxes.
-
- Raises:
- ValueError: if the number of boxes, scores and class labels differ in
- length.
- """
- if (len(detected_boxes) != len(detected_scores) or
- len(detected_boxes) != len(detected_class_labels)):
- raise ValueError('detected_boxes, detected_scores and '
- 'detected_class_labels should all have same lengths. Got'
- '[%d, %d, %d]' % len(detected_boxes),
- len(detected_scores), len(detected_class_labels))
-
- if image_key in self.detection_keys:
- logging.warn(
- 'image %s has already been added to the detection result database',
- image_key)
- return
-
- self.detection_keys.add(image_key)
- if image_key in self.groundtruth_boxes:
- groundtruth_boxes = self.groundtruth_boxes[image_key]
- groundtruth_class_labels = self.groundtruth_class_labels[image_key]
- groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
- image_key]
- groundtruth_is_group_of_list = self.groundtruth_is_group_of_list[
- image_key]
- else:
- groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
- groundtruth_class_labels = np.array([], dtype=int)
- groundtruth_is_difficult_list = np.array([], dtype=bool)
- groundtruth_is_group_of_list = np.array([], dtype=bool)
- scores, tp_fp_labels, is_class_correctly_detected_in_image = (
- self.per_image_eval.compute_object_detection_metrics(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels,
- groundtruth_is_difficult_list, groundtruth_is_group_of_list))
-
- for i in range(self.num_class):
- if scores[i].shape[0] > 0:
- self.scores_per_class[i].append(scores[i])
- self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
- (self.num_images_correctly_detected_per_class
- ) += is_class_correctly_detected_in_image
-
- def _update_ground_truth_statistics(self, groundtruth_class_labels,
- groundtruth_is_difficult_list,
- groundtruth_is_group_of_list):
- """Update grouth truth statitistics.
-
- 1. Difficult boxes are ignored when counting the number of ground truth
- instances as done in Pascal VOC devkit.
- 2. Difficult boxes are treated as normal boxes when computing CorLoc related
- statitistics.
-
- Args:
- groundtruth_class_labels: An integer numpy array of length M,
- representing M class labels of object instances in ground truth
- groundtruth_is_difficult_list: A boolean numpy array of length M denoting
- whether a ground truth box is a difficult instance or not
- groundtruth_is_group_of_list: A boolean numpy array of length M denoting
- whether a ground truth box is a group-of box or not
- """
- for class_index in range(self.num_class):
- num_gt_instances = np.sum(groundtruth_class_labels[
- ~groundtruth_is_difficult_list
- & ~groundtruth_is_group_of_list] == class_index)
- self.num_gt_instances_per_class[class_index] += num_gt_instances
- if np.any(groundtruth_class_labels == class_index):
- self.num_gt_imgs_per_class[class_index] += 1
-
- def evaluate(self):
- """Compute evaluation result.
-
- Returns:
- A named tuple with the following fields -
- average_precision: float numpy array of average precision for
- each class.
- mean_ap: mean average precision of all classes, float scalar
- precisions: List of precisions, each precision is a float numpy
- array
- recalls: List of recalls, each recall is a float numpy array
- corloc: numpy float array
- mean_corloc: Mean CorLoc score for each class, float scalar
- """
- if (self.num_gt_instances_per_class == 0).any():
- logging.warn(
- 'The following classes have no ground truth examples: %s',
- np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) +
- self.label_id_offset)
-
- if self.use_weighted_mean_ap:
- all_scores = np.array([], dtype=float)
- all_tp_fp_labels = np.array([], dtype=bool)
-
- for class_index in range(self.num_class):
- if self.num_gt_instances_per_class[class_index] == 0:
- continue
- if not self.scores_per_class[class_index]:
- scores = np.array([], dtype=float)
- tp_fp_labels = np.array([], dtype=bool)
- else:
- scores = np.concatenate(self.scores_per_class[class_index])
- tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
- if self.use_weighted_mean_ap:
- all_scores = np.append(all_scores, scores)
- all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
- precision, recall = metrics.compute_precision_recall(
- scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
- self.precisions_per_class.append(precision)
- self.recalls_per_class.append(recall)
- average_precision = metrics.compute_average_precision(precision, recall)
- self.average_precision_per_class[class_index] = average_precision
-
- self.corloc_per_class = metrics.compute_cor_loc(
- self.num_gt_imgs_per_class,
- self.num_images_correctly_detected_per_class)
-
- if self.use_weighted_mean_ap:
- num_gt_instances = np.sum(self.num_gt_instances_per_class)
- precision, recall = metrics.compute_precision_recall(
- all_scores, all_tp_fp_labels, num_gt_instances)
- mean_ap = metrics.compute_average_precision(precision, recall)
- else:
- mean_ap = np.nanmean(self.average_precision_per_class)
- mean_corloc = np.nanmean(self.corloc_per_class)
- return ObjectDetectionEvalMetrics(
- self.average_precision_per_class, mean_ap, self.precisions_per_class,
- self.recalls_per_class, self.corloc_per_class, mean_corloc)
diff --git a/object_detection/utils/object_detection_evaluation_test.py b/object_detection/utils/object_detection_evaluation_test.py
deleted file mode 100644
index fcaf80a2..00000000
--- a/object_detection/utils/object_detection_evaluation_test.py
+++ /dev/null
@@ -1,436 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.object_detection_evaluation."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import standard_fields
-from object_detection.utils import object_detection_evaluation
-
-
-class OpenImagesV2EvaluationTest(tf.test.TestCase):
-
- def test_returns_correct_metric_values(self):
- categories = [{
- 'id': 1,
- 'name': 'cat'
- }, {
- 'id': 2,
- 'name': 'dog'
- }, {
- 'id': 3,
- 'name': 'elephant'
- }]
-
- oiv2_evaluator = object_detection_evaluation.OpenImagesDetectionEvaluator(
- categories)
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array(
- [[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
- groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
- oiv2_evaluator.add_single_ground_truth_image_info(image_key1, {
- standard_fields.InputDataFields.groundtruth_boxes:
- groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1,
- standard_fields.InputDataFields.groundtruth_group_of:
- np.array([], dtype=bool)
- })
- image_key2 = 'img2'
- groundtruth_boxes2 = np.array(
- [[10, 10, 11, 11], [500, 500, 510, 510], [10, 10, 12, 12]], dtype=float)
- groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
- groundtruth_is_group_of_list2 = np.array([False, True, False], dtype=bool)
- oiv2_evaluator.add_single_ground_truth_image_info(image_key2, {
- standard_fields.InputDataFields.groundtruth_boxes:
- groundtruth_boxes2,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels2,
- standard_fields.InputDataFields.groundtruth_group_of:
- groundtruth_is_group_of_list2
- })
- image_key3 = 'img3'
- groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_class_labels3 = np.array([2], dtype=int)
- oiv2_evaluator.add_single_ground_truth_image_info(image_key3, {
- standard_fields.InputDataFields.groundtruth_boxes:
- groundtruth_boxes3,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels3
- })
- # Add detections
- image_key = 'img2'
- detected_boxes = np.array(
- [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
- dtype=float)
- detected_class_labels = np.array([1, 1, 3], dtype=int)
- detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
- oiv2_evaluator.add_single_detected_image_info(image_key, {
- standard_fields.DetectionResultFields.detection_boxes:
- detected_boxes,
- standard_fields.DetectionResultFields.detection_scores:
- detected_scores,
- standard_fields.DetectionResultFields.detection_classes:
- detected_class_labels
- })
- metrics = oiv2_evaluator.evaluate()
- self.assertAlmostEqual(
- metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
- self.assertAlmostEqual(
- metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
- self.assertAlmostEqual(
- metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
- self.assertAlmostEqual(metrics['OpenImagesV2/Precision/mAP@0.5IOU'],
- 0.05555555)
- oiv2_evaluator.clear()
- self.assertFalse(oiv2_evaluator._image_ids)
-
-
-class PascalEvaluationTest(tf.test.TestCase):
-
- def test_returns_correct_metric_values(self):
- categories = [{'id': 1, 'name': 'cat'},
- {'id': 2, 'name': 'dog'},
- {'id': 3, 'name': 'elephant'}]
- # Add groundtruth
- pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
- categories)
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
- pascal_evaluator.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1,
- standard_fields.InputDataFields.groundtruth_difficult:
- np.array([], dtype=bool)})
- image_key2 = 'img2'
- groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
- [10, 10, 12, 12]], dtype=float)
- groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
- groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
- pascal_evaluator.add_single_ground_truth_image_info(
- image_key2,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels2,
- standard_fields.InputDataFields.groundtruth_difficult:
- groundtruth_is_difficult_list2})
- image_key3 = 'img3'
- groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_class_labels3 = np.array([2], dtype=int)
- pascal_evaluator.add_single_ground_truth_image_info(
- image_key3,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels3})
-
- # Add detections
- image_key = 'img2'
- detected_boxes = np.array(
- [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
- dtype=float)
- detected_class_labels = np.array([1, 1, 3], dtype=int)
- detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
- pascal_evaluator.add_single_detected_image_info(
- image_key,
- {standard_fields.DetectionResultFields.detection_boxes: detected_boxes,
- standard_fields.DetectionResultFields.detection_scores:
- detected_scores,
- standard_fields.DetectionResultFields.detection_classes:
- detected_class_labels})
-
- metrics = pascal_evaluator.evaluate()
- self.assertAlmostEqual(
- metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
- self.assertAlmostEqual(
- metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
- self.assertAlmostEqual(
- metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
- self.assertAlmostEqual(metrics['PASCAL/Precision/mAP@0.5IOU'], 0.05555555)
- pascal_evaluator.clear()
- self.assertFalse(pascal_evaluator._image_ids)
-
- def test_value_error_on_duplicate_images(self):
- categories = [{'id': 1, 'name': 'cat'},
- {'id': 2, 'name': 'dog'},
- {'id': 3, 'name': 'elephant'}]
- # Add groundtruth
- pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
- categories)
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
- pascal_evaluator.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1})
- with self.assertRaises(ValueError):
- pascal_evaluator.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes:
- groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1})
-
-
-class WeightedPascalEvaluationTest(tf.test.TestCase):
-
- def setUp(self):
- self.categories = [{'id': 1, 'name': 'cat'},
- {'id': 2, 'name': 'dog'},
- {'id': 3, 'name': 'elephant'}]
-
- def create_and_add_common_ground_truth(self):
- # Add groundtruth
- self.wp_eval = (
- object_detection_evaluation.WeightedPascalDetectionEvaluator(
- self.categories))
-
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
- self.wp_eval.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1})
- # add 'img2' separately
- image_key3 = 'img3'
- groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_class_labels3 = np.array([2], dtype=int)
- self.wp_eval.add_single_ground_truth_image_info(
- image_key3,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels3})
-
- def add_common_detected(self):
- image_key = 'img2'
- detected_boxes = np.array(
- [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
- dtype=float)
- detected_class_labels = np.array([1, 1, 3], dtype=int)
- detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
- self.wp_eval.add_single_detected_image_info(
- image_key,
- {standard_fields.DetectionResultFields.detection_boxes: detected_boxes,
- standard_fields.DetectionResultFields.detection_scores:
- detected_scores,
- standard_fields.DetectionResultFields.detection_classes:
- detected_class_labels})
-
- def test_returns_correct_metric_values(self):
- self.create_and_add_common_ground_truth()
- image_key2 = 'img2'
- groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
- [10, 10, 12, 12]], dtype=float)
- groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
- self.wp_eval.add_single_ground_truth_image_info(
- image_key2,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels2
- })
- self.add_common_detected()
-
- metrics = self.wp_eval.evaluate()
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 4)
- self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix +
- 'Precision/mAP@0.5IOU'],
- 1. / (4 + 1 + 2) / 3)
- self.wp_eval.clear()
- self.assertFalse(self.wp_eval._image_ids)
-
- def test_returns_correct_metric_values_with_difficult_list(self):
- self.create_and_add_common_ground_truth()
- image_key2 = 'img2'
- groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
- [10, 10, 12, 12]], dtype=float)
- groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
- groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
- self.wp_eval.add_single_ground_truth_image_info(
- image_key2,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels2,
- standard_fields.InputDataFields.groundtruth_difficult:
- groundtruth_is_difficult_list2
- })
- self.add_common_detected()
-
- metrics = self.wp_eval.evaluate()
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
- self.assertAlmostEqual(
- metrics[self.wp_eval._metric_prefix +
- 'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 3)
- self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix +
- 'Precision/mAP@0.5IOU'],
- 1. / (3 + 1 + 2) / 3)
- self.wp_eval.clear()
- self.assertFalse(self.wp_eval._image_ids)
-
- def test_value_error_on_duplicate_images(self):
- # Add groundtruth
- self.wp_eval = (
- object_detection_evaluation.WeightedPascalDetectionEvaluator(
- self.categories))
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
- self.wp_eval.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1})
- with self.assertRaises(ValueError):
- self.wp_eval.add_single_ground_truth_image_info(
- image_key1,
- {standard_fields.InputDataFields.groundtruth_boxes:
- groundtruth_boxes1,
- standard_fields.InputDataFields.groundtruth_classes:
- groundtruth_class_labels1})
-
-
-class ObjectDetectionEvaluationTest(tf.test.TestCase):
-
- def setUp(self):
- num_groundtruth_classes = 3
- self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
- num_groundtruth_classes)
-
- image_key1 = 'img1'
- groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
- self.od_eval.add_single_ground_truth_image_info(
- image_key1, groundtruth_boxes1, groundtruth_class_labels1)
- image_key2 = 'img2'
- groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
- [10, 10, 12, 12]], dtype=float)
- groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
- groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
- groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool)
- self.od_eval.add_single_ground_truth_image_info(
- image_key2, groundtruth_boxes2, groundtruth_class_labels2,
- groundtruth_is_difficult_list2, groundtruth_is_group_of_list2)
-
- image_key3 = 'img3'
- groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_class_labels3 = np.array([1], dtype=int)
- self.od_eval.add_single_ground_truth_image_info(
- image_key3, groundtruth_boxes3, groundtruth_class_labels3)
-
- image_key = 'img2'
- detected_boxes = np.array(
- [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
- dtype=float)
- detected_class_labels = np.array([0, 0, 2], dtype=int)
- detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
- self.od_eval.add_single_detected_image_info(
- image_key, detected_boxes, detected_scores, detected_class_labels)
-
- def test_add_single_ground_truth_image_info(self):
- expected_num_gt_instances_per_class = np.array([3, 1, 1], dtype=int)
- expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
- self.assertTrue(np.array_equal(expected_num_gt_instances_per_class,
- self.od_eval.num_gt_instances_per_class))
- self.assertTrue(np.array_equal(expected_num_gt_imgs_per_class,
- self.od_eval.num_gt_imgs_per_class))
- groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
- [10, 10, 12, 12]], dtype=float)
- self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes['img2'],
- groundtruth_boxes2))
- groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
- self.assertTrue(np.allclose(
- self.od_eval.groundtruth_is_difficult_list['img2'],
- groundtruth_is_difficult_list2))
- groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool)
- self.assertTrue(
- np.allclose(self.od_eval.groundtruth_is_group_of_list['img2'],
- groundtruth_is_group_of_list2))
-
- groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
- self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[
- 'img1'], groundtruth_class_labels1))
-
- def test_add_single_detected_image_info(self):
- expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [],
- [np.array([0.9], dtype=float)]]
- expected_tp_fp_labels_per_class = [[np.array([0, 1], dtype=bool)], [],
- [np.array([0], dtype=bool)]]
- expected_num_images_correctly_detected_per_class = np.array([0, 0, 0],
- dtype=int)
- for i in range(self.od_eval.num_class):
- for j in range(len(expected_scores_per_class[i])):
- self.assertTrue(np.allclose(expected_scores_per_class[i][j],
- self.od_eval.scores_per_class[i][j]))
- self.assertTrue(np.array_equal(expected_tp_fp_labels_per_class[i][
- j], self.od_eval.tp_fp_labels_per_class[i][j]))
- self.assertTrue(np.array_equal(
- expected_num_images_correctly_detected_per_class,
- self.od_eval.num_images_correctly_detected_per_class))
-
- def test_evaluate(self):
- (average_precision_per_class, mean_ap, precisions_per_class,
- recalls_per_class, corloc_per_class,
- mean_corloc) = self.od_eval.evaluate()
- expected_precisions_per_class = [np.array([0, 0.5], dtype=float),
- np.array([], dtype=float),
- np.array([0], dtype=float)]
- expected_recalls_per_class = [
- np.array([0, 1. / 3.], dtype=float), np.array([], dtype=float),
- np.array([0], dtype=float)
- ]
- expected_average_precision_per_class = np.array([1. / 6., 0, 0],
- dtype=float)
- expected_corloc_per_class = np.array([0, np.divide(0, 0), 0], dtype=float)
- expected_mean_ap = 1. / 18
- expected_mean_corloc = 0.0
- for i in range(self.od_eval.num_class):
- self.assertTrue(np.allclose(expected_precisions_per_class[i],
- precisions_per_class[i]))
- self.assertTrue(np.allclose(expected_recalls_per_class[i],
- recalls_per_class[i]))
- self.assertTrue(np.allclose(expected_average_precision_per_class,
- average_precision_per_class))
- self.assertTrue(np.allclose(expected_corloc_per_class, corloc_per_class))
- self.assertAlmostEqual(expected_mean_ap, mean_ap)
- self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/ops.py b/object_detection/utils/ops.py
deleted file mode 100644
index 03325cfd..00000000
--- a/object_detection/utils/ops.py
+++ /dev/null
@@ -1,741 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A module for helper tensorflow ops."""
-import math
-import numpy as np
-import six
-
-import tensorflow as tf
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import standard_fields as fields
-from object_detection.utils import static_shape
-
-
-def expanded_shape(orig_shape, start_dim, num_dims):
- """Inserts multiple ones into a shape vector.
-
- Inserts an all-1 vector of length num_dims at position start_dim into a shape.
- Can be combined with tf.reshape to generalize tf.expand_dims.
-
- Args:
- orig_shape: the shape into which the all-1 vector is added (int32 vector)
- start_dim: insertion position (int scalar)
- num_dims: length of the inserted all-1 vector (int scalar)
- Returns:
- An int32 vector of length tf.size(orig_shape) + num_dims.
- """
- with tf.name_scope('ExpandedShape'):
- start_dim = tf.expand_dims(start_dim, 0) # scalar to rank-1
- before = tf.slice(orig_shape, [0], start_dim)
- add_shape = tf.ones(tf.reshape(num_dims, [1]), dtype=tf.int32)
- after = tf.slice(orig_shape, start_dim, [-1])
- new_shape = tf.concat([before, add_shape, after], 0)
- return new_shape
-
-
-def normalized_to_image_coordinates(normalized_boxes, image_shape,
- parallel_iterations=32):
- """Converts a batch of boxes from normal to image coordinates.
-
- Args:
- normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
- normalized coordinates.
- image_shape: a float32 tensor of shape [4] containing the image shape.
- parallel_iterations: parallelism for the map_fn op.
-
- Returns:
- absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
- boxes in image coordinates.
- """
- def _to_absolute_coordinates(normalized_boxes):
- return box_list_ops.to_absolute_coordinates(
- box_list.BoxList(normalized_boxes),
- image_shape[1], image_shape[2], check_range=False).get()
-
- absolute_boxes = tf.map_fn(
- _to_absolute_coordinates,
- elems=(normalized_boxes),
- dtype=tf.float32,
- parallel_iterations=parallel_iterations,
- back_prop=True)
- return absolute_boxes
-
-
-def meshgrid(x, y):
- """Tiles the contents of x and y into a pair of grids.
-
- Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y
- are vectors. Generally, this will give:
-
- xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n)
- ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m)
-
- Keep in mind that the order of the arguments and outputs is reverse relative
- to the order of the indices they go into, done for compatibility with numpy.
- The output tensors have the same shapes. Specifically:
-
- xgrid.get_shape() = y.get_shape().concatenate(x.get_shape())
- ygrid.get_shape() = y.get_shape().concatenate(x.get_shape())
-
- Args:
- x: A tensor of arbitrary shape and rank. xgrid will contain these values
- varying in its last dimensions.
- y: A tensor of arbitrary shape and rank. ygrid will contain these values
- varying in its first dimensions.
- Returns:
- A tuple of tensors (xgrid, ygrid).
- """
- with tf.name_scope('Meshgrid'):
- x = tf.convert_to_tensor(x)
- y = tf.convert_to_tensor(y)
- x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y))
- y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x))
-
- xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape)
- ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape)
- new_shape = y.get_shape().concatenate(x.get_shape())
- xgrid.set_shape(new_shape)
- ygrid.set_shape(new_shape)
-
- return xgrid, ygrid
-
-
-def pad_to_multiple(tensor, multiple):
- """Returns the tensor zero padded to the specified multiple.
-
- Appends 0s to the end of the first and second dimension (height and width) of
- the tensor until both dimensions are a multiple of the input argument
- 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
- multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
- be of shape [1, 4, 8, 1].
-
- Args:
- tensor: rank 4 float32 tensor, where
- tensor -> [batch_size, height, width, channels].
- multiple: the multiple to pad to.
-
- Returns:
- padded_tensor: the tensor zero padded to the specified multiple.
- """
- tensor_shape = tensor.get_shape()
- batch_size = static_shape.get_batch_size(tensor_shape)
- tensor_height = static_shape.get_height(tensor_shape)
- tensor_width = static_shape.get_width(tensor_shape)
- tensor_depth = static_shape.get_depth(tensor_shape)
-
- if batch_size is None:
- batch_size = tf.shape(tensor)[0]
-
- if tensor_height is None:
- tensor_height = tf.shape(tensor)[1]
- padded_tensor_height = tf.to_int32(
- tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
- else:
- padded_tensor_height = int(
- math.ceil(float(tensor_height) / multiple) * multiple)
-
- if tensor_width is None:
- tensor_width = tf.shape(tensor)[2]
- padded_tensor_width = tf.to_int32(
- tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
- else:
- padded_tensor_width = int(
- math.ceil(float(tensor_width) / multiple) * multiple)
-
- if (padded_tensor_height == tensor_height and
- padded_tensor_width == tensor_width):
- return tensor
-
- if tensor_depth is None:
- tensor_depth = tf.shape(tensor)[3]
-
- # Use tf.concat instead of tf.pad to preserve static shape
- height_pad = tf.zeros([
- batch_size, padded_tensor_height - tensor_height, tensor_width,
- tensor_depth
- ])
- padded_tensor = tf.concat([tensor, height_pad], 1)
- width_pad = tf.zeros([
- batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
- tensor_depth
- ])
- padded_tensor = tf.concat([padded_tensor, width_pad], 2)
-
- return padded_tensor
-
-
-def padded_one_hot_encoding(indices, depth, left_pad):
- """Returns a zero padded one-hot tensor.
-
- This function converts a sparse representation of indices (e.g., [4]) to a
- zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
- left_pad = 1). If `indices` is empty, the result will simply be a tensor of
- shape (0, depth + left_pad). If depth = 0, then this function just returns
- `None`.
-
- Args:
- indices: an integer tensor of shape [num_indices].
- depth: depth for the one-hot tensor (integer).
- left_pad: number of zeros to left pad the one-hot tensor with (integer).
-
- Returns:
- padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
- `None` if the depth is zero.
-
- Raises:
- ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
- either negative or non-integers.
-
- TODO: add runtime checks for depth and indices.
- """
- if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int):
- raise ValueError('`depth` must be a non-negative integer.')
- if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int):
- raise ValueError('`left_pad` must be a non-negative integer.')
- if depth == 0:
- return None
- if len(indices.get_shape().as_list()) != 1:
- raise ValueError('`indices` must have rank 1')
-
- def one_hot_and_pad():
- one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
- on_value=1, off_value=0), tf.float32)
- return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
- result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
- lambda: tf.zeros((depth + left_pad, 0)))
- return tf.reshape(result, [-1, depth + left_pad])
-
-
-def dense_to_sparse_boxes(dense_locations, dense_num_boxes, num_classes):
- """Converts bounding boxes from dense to sparse form.
-
- Args:
- dense_locations: a [max_num_boxes, 4] tensor in which only the first k rows
- are valid bounding box location coordinates, where k is the sum of
- elements in dense_num_boxes.
- dense_num_boxes: a [max_num_classes] tensor indicating the counts of
- various bounding box classes e.g. [1, 0, 0, 2] means that the first
- bounding box is of class 0 and the second and third bounding boxes are
- of class 3. The sum of elements in this tensor is the number of valid
- bounding boxes.
- num_classes: number of classes
-
- Returns:
- box_locations: a [num_boxes, 4] tensor containing only valid bounding
- boxes (i.e. the first num_boxes rows of dense_locations)
- box_classes: a [num_boxes] tensor containing the classes of each bounding
- box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3]
- """
-
- num_valid_boxes = tf.reduce_sum(dense_num_boxes)
- box_locations = tf.slice(dense_locations,
- tf.constant([0, 0]), tf.stack([num_valid_boxes, 4]))
- tiled_classes = [tf.tile([i], tf.expand_dims(dense_num_boxes[i], 0))
- for i in range(num_classes)]
- box_classes = tf.concat(tiled_classes, 0)
- box_locations.set_shape([None, 4])
- return box_locations, box_classes
-
-
-def indices_to_dense_vector(indices,
- size,
- indices_value=1.,
- default_value=0,
- dtype=tf.float32):
- """Creates dense vector with indices set to specific value and rest to zeros.
-
- This function exists because it is unclear if it is safe to use
- tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
- with indices which are not ordered.
- This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
-
- Args:
- indices: 1d Tensor with integer indices which are to be set to
- indices_values.
- size: scalar with size (integer) of output Tensor.
- indices_value: values of elements specified by indices in the output vector
- default_value: values of other elements in the output vector.
- dtype: data type.
-
- Returns:
- dense 1D Tensor of shape [size] with indices set to indices_values and the
- rest set to default_value.
- """
- size = tf.to_int32(size)
- zeros = tf.ones([size], dtype=dtype) * default_value
- values = tf.ones_like(indices, dtype=dtype) * indices_value
-
- return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
- [zeros, values])
-
-
-def retain_groundtruth(tensor_dict, valid_indices):
- """Retains groundtruth by valid indices.
-
- Args:
- tensor_dict: a dictionary of following groundtruth tensors -
- fields.InputDataFields.groundtruth_boxes
- fields.InputDataFields.groundtruth_instance_masks
- fields.InputDataFields.groundtruth_classes
- fields.InputDataFields.groundtruth_is_crowd
- fields.InputDataFields.groundtruth_area
- fields.InputDataFields.groundtruth_label_types
- fields.InputDataFields.groundtruth_difficult
- valid_indices: a tensor with valid indices for the box-level groundtruth.
-
- Returns:
- a dictionary of tensors containing only the groundtruth for valid_indices.
-
- Raises:
- ValueError: If the shape of valid_indices is invalid.
- ValueError: field fields.InputDataFields.groundtruth_boxes is
- not present in tensor_dict.
- """
- input_shape = valid_indices.get_shape().as_list()
- if not (len(input_shape) == 1 or
- (len(input_shape) == 2 and input_shape[1] == 1)):
- raise ValueError('The shape of valid_indices is invalid.')
- valid_indices = tf.reshape(valid_indices, [-1])
- valid_dict = {}
- if fields.InputDataFields.groundtruth_boxes in tensor_dict:
- # Prevents reshape failure when num_boxes is 0.
- num_boxes = tf.maximum(tf.shape(
- tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1)
- for key in tensor_dict:
- if key in [fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_instance_masks]:
- valid_dict[key] = tf.gather(tensor_dict[key], valid_indices)
- # Input decoder returns empty tensor when these fields are not provided.
- # Needs to reshape into [num_boxes, -1] for tf.gather() to work.
- elif key in [fields.InputDataFields.groundtruth_is_crowd,
- fields.InputDataFields.groundtruth_area,
- fields.InputDataFields.groundtruth_difficult,
- fields.InputDataFields.groundtruth_label_types]:
- valid_dict[key] = tf.reshape(
- tf.gather(tf.reshape(tensor_dict[key], [num_boxes, -1]),
- valid_indices), [-1])
- # Fields that are not associated with boxes.
- else:
- valid_dict[key] = tensor_dict[key]
- else:
- raise ValueError('%s not present in input tensor dict.' % (
- fields.InputDataFields.groundtruth_boxes))
- return valid_dict
-
-
-def retain_groundtruth_with_positive_classes(tensor_dict):
- """Retains only groundtruth with positive class ids.
-
- Args:
- tensor_dict: a dictionary of following groundtruth tensors -
- fields.InputDataFields.groundtruth_boxes
- fields.InputDataFields.groundtruth_classes
- fields.InputDataFields.groundtruth_is_crowd
- fields.InputDataFields.groundtruth_area
- fields.InputDataFields.groundtruth_label_types
- fields.InputDataFields.groundtruth_difficult
-
- Returns:
- a dictionary of tensors containing only the groundtruth with positive
- classes.
-
- Raises:
- ValueError: If groundtruth_classes tensor is not in tensor_dict.
- """
- if fields.InputDataFields.groundtruth_classes not in tensor_dict:
- raise ValueError('`groundtruth classes` not in tensor_dict.')
- keep_indices = tf.where(tf.greater(
- tensor_dict[fields.InputDataFields.groundtruth_classes], 0))
- return retain_groundtruth(tensor_dict, keep_indices)
-
-
-def replace_nan_groundtruth_label_scores_with_ones(label_scores):
- """Replaces nan label scores with 1.0.
-
- Args:
- label_scores: a tensor containing object annoation label scores.
-
- Returns:
- a tensor where NaN label scores have been replaced by ones.
- """
- return tf.where(
- tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
-
-
-def filter_groundtruth_with_crowd_boxes(tensor_dict):
- """Filters out groundtruth with boxes corresponding to crowd.
-
- Args:
- tensor_dict: a dictionary of following groundtruth tensors -
- fields.InputDataFields.groundtruth_boxes
- fields.InputDataFields.groundtruth_classes
- fields.InputDataFields.groundtruth_is_crowd
- fields.InputDataFields.groundtruth_area
- fields.InputDataFields.groundtruth_label_types
-
- Returns:
- a dictionary of tensors containing only the groundtruth that have bounding
- boxes.
- """
- if fields.InputDataFields.groundtruth_is_crowd in tensor_dict:
- is_crowd = tensor_dict[fields.InputDataFields.groundtruth_is_crowd]
- is_not_crowd = tf.logical_not(is_crowd)
- is_not_crowd_indices = tf.where(is_not_crowd)
- tensor_dict = retain_groundtruth(tensor_dict, is_not_crowd_indices)
- return tensor_dict
-
-
-def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
- """Filters out groundtruth with no bounding boxes.
-
- Args:
- tensor_dict: a dictionary of following groundtruth tensors -
- fields.InputDataFields.groundtruth_boxes
- fields.InputDataFields.groundtruth_instance_masks
- fields.InputDataFields.groundtruth_classes
- fields.InputDataFields.groundtruth_is_crowd
- fields.InputDataFields.groundtruth_area
- fields.InputDataFields.groundtruth_label_types
-
- Returns:
- a dictionary of tensors containing only the groundtruth that have bounding
- boxes.
- """
- groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
- nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
- tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
- valid_indicator_vector = tf.logical_not(nan_indicator_vector)
- valid_indices = tf.where(valid_indicator_vector)
-
- return retain_groundtruth(tensor_dict, valid_indices)
-
-
-def normalize_to_target(inputs,
- target_norm_value,
- dim,
- epsilon=1e-7,
- trainable=True,
- scope='NormalizeToTarget',
- summarize=True):
- """L2 normalizes the inputs across the specified dimension to a target norm.
-
- This op implements the L2 Normalization layer introduced in
- Liu, Wei, et al. "SSD: Single Shot MultiBox Detector."
- and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg.
- "Parsenet: Looking wider to see better." and is useful for bringing
- activations from multiple layers in a convnet to a standard scale.
-
- Note that the rank of `inputs` must be known and the dimension to which
- normalization is to be applied should be statically defined.
-
- TODO: Add option to scale by L2 norm of the entire input.
-
- Args:
- inputs: A `Tensor` of arbitrary size.
- target_norm_value: A float value that specifies an initial target norm or
- a list of floats (whose length must be equal to the depth along the
- dimension to be normalized) specifying a per-dimension multiplier
- after normalization.
- dim: The dimension along which the input is normalized.
- epsilon: A small value to add to the inputs to avoid dividing by zero.
- trainable: Whether the norm is trainable or not
- scope: Optional scope for variable_scope.
- summarize: Whether or not to add a tensorflow summary for the op.
-
- Returns:
- The input tensor normalized to the specified target norm.
-
- Raises:
- ValueError: If dim is smaller than the number of dimensions in 'inputs'.
- ValueError: If target_norm_value is not a float or a list of floats with
- length equal to the depth along the dimension to be normalized.
- """
- with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]):
- if not inputs.get_shape():
- raise ValueError('The input rank must be known.')
- input_shape = inputs.get_shape().as_list()
- input_rank = len(input_shape)
- if dim < 0 or dim >= input_rank:
- raise ValueError(
- 'dim must be non-negative but smaller than the input rank.')
- if not input_shape[dim]:
- raise ValueError('input shape should be statically defined along '
- 'the specified dimension.')
- depth = input_shape[dim]
- if not (isinstance(target_norm_value, float) or
- (isinstance(target_norm_value, list) and
- len(target_norm_value) == depth) and
- all([isinstance(val, float) for val in target_norm_value])):
- raise ValueError('target_norm_value must be a float or a list of floats '
- 'with length equal to the depth along the dimension to '
- 'be normalized.')
- if isinstance(target_norm_value, float):
- initial_norm = depth * [target_norm_value]
- else:
- initial_norm = target_norm_value
- target_norm = tf.contrib.framework.model_variable(
- name='weights', dtype=tf.float32,
- initializer=tf.constant(initial_norm, dtype=tf.float32),
- trainable=trainable)
- if summarize:
- mean = tf.reduce_mean(target_norm)
- mean = tf.Print(mean, ['NormalizeToTarget:', mean])
- tf.summary.scalar(tf.get_variable_scope().name, mean)
- lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True))
- mult_shape = input_rank*[1]
- mult_shape[dim] = depth
- return tf.reshape(target_norm, mult_shape) * tf.truediv(inputs, lengths)
-
-
-def position_sensitive_crop_regions(image,
- boxes,
- box_ind,
- crop_size,
- num_spatial_bins,
- global_pool,
- extrapolation_value=None):
- """Position-sensitive crop and pool rectangular regions from a feature grid.
-
- The output crops are split into `spatial_bins_y` vertical bins
- and `spatial_bins_x` horizontal bins. For each intersection of a vertical
- and a horizontal bin the output values are gathered by performing
- `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
- channels of the image. This reduces `depth` by a factor of
- `(spatial_bins_y * spatial_bins_x)`.
-
- When global_pool is True, this function implements a differentiable version
- of position-sensitive RoI pooling used in
- [R-FCN detection system](https://arxiv.org/abs/1605.06409).
-
- When global_pool is False, this function implements a differentiable version
- of position-sensitive assembling operation used in
- [instance FCN](https://arxiv.org/abs/1603.08678).
-
- Args:
- image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
- `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
- A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
- Both `image_height` and `image_width` need to be positive.
- boxes: A `Tensor` of type `float32`.
- A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
- specifies the coordinates of a box in the `box_ind[i]` image and is
- specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
- coordinate value of `y` is mapped to the image coordinate at
- `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
- height is mapped to `[0, image_height - 1] in image height coordinates.
- We do allow y1 > y2, in which case the sampled crop is an up-down flipped
- version of the original image. The width dimension is treated similarly.
- Normalized coordinates outside the `[0, 1]` range are allowed, in which
- case we use `extrapolation_value` to extrapolate the input image values.
- box_ind: A `Tensor` of type `int32`.
- A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
- The value of `box_ind[i]` specifies the image that the `i`-th box refers
- to.
- crop_size: A list of two integers `[crop_height, crop_width]`. All
- cropped image patches are resized to this size. The aspect ratio of the
- image content is not preserved. Both `crop_height` and `crop_width` need
- to be positive.
- num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
- Represents the number of position-sensitive bins in y and x directions.
- Both values should be >= 1. `crop_height` should be divisible by
- `spatial_bins_y`, and similarly for width.
- The number of image channels should be divisible by
- (spatial_bins_y * spatial_bins_x).
- Suggested value from R-FCN paper: [3, 3].
- global_pool: A boolean variable.
- If True, we perform average global pooling on the features assembled from
- the position-sensitive score maps.
- If False, we keep the position-pooled features without global pooling
- over the spatial coordinates.
- Note that using global_pool=True is equivalent to but more efficient than
- running the function with global_pool=False and then performing global
- average pooling.
- extrapolation_value: An optional `float`. Defaults to `0`.
- Value used for extrapolation, when applicable.
- Returns:
- position_sensitive_features: A 4-D tensor of shape
- `[num_boxes, K, K, crop_channels]`,
- where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
- where K = 1 when global_pool is True (Average-pooled cropped regions),
- and K = crop_size when global_pool is False.
- Raises:
- ValueError: Raised in four situations:
- `num_spatial_bins` is not >= 1;
- `num_spatial_bins` does not divide `crop_size`;
- `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
- `bin_crop_size` is not square when global_pool=False due to the
- constraint in function space_to_depth.
- """
- total_bins = 1
- bin_crop_size = []
-
- for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
- if num_bins < 1:
- raise ValueError('num_spatial_bins should be >= 1')
-
- if crop_dim % num_bins != 0:
- raise ValueError('crop_size should be divisible by num_spatial_bins')
-
- total_bins *= num_bins
- bin_crop_size.append(crop_dim // num_bins)
-
- if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
- raise ValueError('Only support square bin crop size for now.')
-
- ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
- spatial_bins_y, spatial_bins_x = num_spatial_bins
-
- # Split each box into spatial_bins_y * spatial_bins_x bins.
- position_sensitive_boxes = []
- for bin_y in range(spatial_bins_y):
- step_y = (ymax - ymin) / spatial_bins_y
- for bin_x in range(spatial_bins_x):
- step_x = (xmax - xmin) / spatial_bins_x
- box_coordinates = [ymin + bin_y * step_y,
- xmin + bin_x * step_x,
- ymin + (bin_y + 1) * step_y,
- xmin + (bin_x + 1) * step_x,
- ]
- position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))
-
- image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)
-
- image_crops = []
- for (split, box) in zip(image_splits, position_sensitive_boxes):
- crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size,
- extrapolation_value=extrapolation_value)
- image_crops.append(crop)
-
- if global_pool:
- # Average over all bins.
- position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
- # Then average over spatial positions within the bins.
- position_sensitive_features = tf.reduce_mean(
- position_sensitive_features, [1, 2], keep_dims=True)
- else:
- # Reorder height/width to depth channel.
- block_size = bin_crop_size[0]
- if block_size >= 2:
- image_crops = [tf.space_to_depth(
- crop, block_size=block_size) for crop in image_crops]
-
- # Pack image_crops so that first dimension is for position-senstive boxes.
- position_sensitive_features = tf.stack(image_crops, axis=0)
-
- # Unroll the position-sensitive boxes to spatial positions.
- position_sensitive_features = tf.squeeze(
- tf.batch_to_space_nd(position_sensitive_features,
- block_shape=[1] + num_spatial_bins,
- crops=tf.zeros((3, 2), dtype=tf.int32)),
- squeeze_dims=[0])
-
- # Reorder back the depth channel.
- if block_size >= 2:
- position_sensitive_features = tf.depth_to_space(
- position_sensitive_features, block_size=block_size)
-
- return position_sensitive_features
-
-
-def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
- image_width):
- """Transforms the box masks back to full image masks.
-
- Embeds masks in bounding boxes of larger masks whose shapes correspond to
- image shape.
-
- Args:
- box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
- boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
- corners. Row i contains [ymin, xmin, ymax, xmax] of the box
- corresponding to mask i. Note that the box corners are in
- normalized coordinates.
- image_height: Image height. The output mask will have the same height as
- the image height.
- image_width: Image width. The output mask will have the same width as the
- image width.
-
- Returns:
- A tf.float32 tensor of size [num_masks, image_height, image_width].
- """
- # TODO: Make this a public function.
- def transform_boxes_relative_to_boxes(boxes, reference_boxes):
- boxes = tf.reshape(boxes, [-1, 2, 2])
- min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
- max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1)
- transformed_boxes = (boxes - min_corner) / (max_corner - min_corner)
- return tf.reshape(transformed_boxes, [-1, 4])
-
- box_masks = tf.expand_dims(box_masks, axis=3)
- num_boxes = tf.shape(box_masks)[0]
- unit_boxes = tf.concat(
- [tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1)
- reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes)
- image_masks = tf.image.crop_and_resize(image=box_masks,
- boxes=reverse_boxes,
- box_ind=tf.range(num_boxes),
- crop_size=[image_height, image_width],
- extrapolation_value=0.0)
- return tf.squeeze(image_masks, axis=3)
-
-
-def merge_boxes_with_multiple_labels(boxes, classes, num_classes):
- """Merges boxes with same coordinates and returns K-hot encoded classes.
-
- Args:
- boxes: A tf.float32 tensor with shape [N, 4] holding N boxes.
- classes: A tf.int32 tensor with shape [N] holding class indices.
- The class index starts at 0.
- num_classes: total number of classes to use for K-hot encoding.
-
- Returns:
- merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
- where N' <= N.
- class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
- k-hot encodings for the merged boxes.
- merged_box_indices: A tf.int32 tensor with shape [N'] holding original
- indices of the boxes.
- """
- def merge_numpy_boxes(boxes, classes, num_classes):
- """Python function to merge numpy boxes."""
- if boxes.size < 1:
- return (np.zeros([0, 4], dtype=np.float32),
- np.zeros([0, num_classes], dtype=np.int32),
- np.zeros([0], dtype=np.int32))
- box_to_class_indices = {}
- for box_index in range(boxes.shape[0]):
- box = tuple(boxes[box_index, :].tolist())
- class_index = classes[box_index]
- if box not in box_to_class_indices:
- box_to_class_indices[box] = [box_index, np.zeros([num_classes])]
- box_to_class_indices[box][1][class_index] = 1
- merged_boxes = np.vstack(box_to_class_indices.keys()).astype(np.float32)
- class_encodings = [item[1] for item in box_to_class_indices.values()]
- class_encodings = np.vstack(class_encodings).astype(np.int32)
- merged_box_indices = [item[0] for item in box_to_class_indices.values()]
- merged_box_indices = np.array(merged_box_indices).astype(np.int32)
- return merged_boxes, class_encodings, merged_box_indices
-
- merged_boxes, class_encodings, merged_box_indices = tf.py_func(
- merge_numpy_boxes, [boxes, classes, num_classes],
- [tf.float32, tf.int32, tf.int32])
- merged_boxes = tf.reshape(merged_boxes, [-1, 4])
- class_encodings = tf.reshape(class_encodings, [-1, num_classes])
- merged_box_indices = tf.reshape(merged_box_indices, [-1])
- return merged_boxes, class_encodings, merged_box_indices
diff --git a/object_detection/utils/ops_test.py b/object_detection/utils/ops_test.py
deleted file mode 100644
index 1bdd174b..00000000
--- a/object_detection/utils/ops_test.py
+++ /dev/null
@@ -1,1132 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.ops."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import standard_fields as fields
-from object_detection.utils import ops
-
-
-class NormalizedToImageCoordinatesTest(tf.test.TestCase):
-
- def test_normalized_to_image_coordinates(self):
- normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
- normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
- [[0.5, 0.5, 1.0, 1.0]]])
- image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
- absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
- image_shape,
- parallel_iterations=2)
-
- expected_boxes = np.array([[[0, 0, 4, 4]],
- [[2, 2, 4, 4]]])
- with self.test_session() as sess:
- absolute_boxes = sess.run(absolute_boxes,
- feed_dict={normalized_boxes:
- normalized_boxes_np})
-
- self.assertAllEqual(absolute_boxes, expected_boxes)
-
-
-class MeshgridTest(tf.test.TestCase):
-
- def test_meshgrid_numpy_comparison(self):
- """Tests meshgrid op with vectors, for which it should match numpy."""
- x = np.arange(4)
- y = np.arange(6)
- exp_xgrid, exp_ygrid = np.meshgrid(x, y)
- xgrid, ygrid = ops.meshgrid(x, y)
- with self.test_session() as sess:
- xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
- self.assertAllEqual(xgrid_output, exp_xgrid)
- self.assertAllEqual(ygrid_output, exp_ygrid)
-
- def test_meshgrid_multidimensional(self):
- np.random.seed(18)
- x = np.random.rand(4, 1, 2).astype(np.float32)
- y = np.random.rand(2, 3).astype(np.float32)
-
- xgrid, ygrid = ops.meshgrid(x, y)
-
- grid_shape = list(y.shape) + list(x.shape)
- self.assertEqual(xgrid.get_shape().as_list(), grid_shape)
- self.assertEqual(ygrid.get_shape().as_list(), grid_shape)
- with self.test_session() as sess:
- xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
-
- # Check the shape of the output grids
- self.assertEqual(xgrid_output.shape, tuple(grid_shape))
- self.assertEqual(ygrid_output.shape, tuple(grid_shape))
-
- # Check a few elements
- test_elements = [((3, 0, 0), (1, 2)),
- ((2, 0, 1), (0, 0)),
- ((0, 0, 0), (1, 1))]
- for xind, yind in test_elements:
- # These are float equality tests, but the meshgrid op should not introduce
- # rounding.
- self.assertEqual(xgrid_output[yind + xind], x[xind])
- self.assertEqual(ygrid_output[yind + xind], y[yind])
-
-
-class OpsTestPadToMultiple(tf.test.TestCase):
-
- def test_zero_padding(self):
- tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
- padded_tensor = ops.pad_to_multiple(tensor, 1)
- with self.test_session() as sess:
- padded_tensor_out = sess.run(padded_tensor)
- self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
-
- def test_no_padding(self):
- tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
- padded_tensor = ops.pad_to_multiple(tensor, 2)
- with self.test_session() as sess:
- padded_tensor_out = sess.run(padded_tensor)
- self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
-
- def test_padding(self):
- tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
- padded_tensor = ops.pad_to_multiple(tensor, 4)
- with self.test_session() as sess:
- padded_tensor_out = sess.run(padded_tensor)
- self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
-
-
-class OpsTestPaddedOneHotEncoding(tf.test.TestCase):
-
- def test_correct_one_hot_tensor_with_no_pad(self):
- indices = tf.constant([1, 2, 3, 5])
- one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0)
- expected_tensor = np.array([[0, 1, 0, 0, 0, 0],
- [0, 0, 1, 0, 0, 0],
- [0, 0, 0, 1, 0, 0],
- [0, 0, 0, 0, 0, 1]], np.float32)
- with self.test_session() as sess:
- out_one_hot_tensor = sess.run(one_hot_tensor)
- self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
- atol=1e-10)
-
- def test_correct_one_hot_tensor_with_pad_one(self):
- indices = tf.constant([1, 2, 3, 5])
- one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1)
- expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0],
- [0, 0, 0, 1, 0, 0, 0],
- [0, 0, 0, 0, 1, 0, 0],
- [0, 0, 0, 0, 0, 0, 1]], np.float32)
- with self.test_session() as sess:
- out_one_hot_tensor = sess.run(one_hot_tensor)
- self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
- atol=1e-10)
-
- def test_correct_one_hot_tensor_with_pad_three(self):
- indices = tf.constant([1, 2, 3, 5])
- one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3)
- expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 1, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 1, 0, 0],
- [0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
- with self.test_session() as sess:
- out_one_hot_tensor = sess.run(one_hot_tensor)
- self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
- atol=1e-10)
-
- def test_correct_padded_one_hot_tensor_with_empty_indices(self):
- depth = 6
- pad = 2
- indices = tf.constant([])
- one_hot_tensor = ops.padded_one_hot_encoding(
- indices, depth=depth, left_pad=pad)
- expected_tensor = np.zeros((0, depth + pad))
- with self.test_session() as sess:
- out_one_hot_tensor = sess.run(one_hot_tensor)
- self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
- atol=1e-10)
-
- def test_return_none_on_zero_depth(self):
- indices = tf.constant([1, 2, 3, 4, 5])
- one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2)
- self.assertEqual(one_hot_tensor, None)
-
- def test_raise_value_error_on_rank_two_input(self):
- indices = tf.constant(1.0, shape=(2, 3))
- with self.assertRaises(ValueError):
- ops.padded_one_hot_encoding(indices, depth=6, left_pad=2)
-
- def test_raise_value_error_on_negative_pad(self):
- indices = tf.constant(1.0, shape=(2, 3))
- with self.assertRaises(ValueError):
- ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1)
-
- def test_raise_value_error_on_float_pad(self):
- indices = tf.constant(1.0, shape=(2, 3))
- with self.assertRaises(ValueError):
- ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1)
-
- def test_raise_value_error_on_float_depth(self):
- indices = tf.constant(1.0, shape=(2, 3))
- with self.assertRaises(ValueError):
- ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2)
-
-
-class OpsDenseToSparseBoxesTest(tf.test.TestCase):
-
- def test_return_all_boxes_when_all_input_boxes_are_valid(self):
- num_classes = 4
- num_valid_boxes = 3
- code_size = 4
- dense_location_placeholder = tf.placeholder(tf.float32,
- shape=(num_valid_boxes,
- code_size))
- dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
- box_locations, box_classes = ops.dense_to_sparse_boxes(
- dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
- feed_dict = {dense_location_placeholder: np.random.uniform(
- size=[num_valid_boxes, code_size]),
- dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
- dtype=np.int32)}
-
- expected_box_locations = feed_dict[dense_location_placeholder]
- expected_box_classses = np.array([0, 3, 3])
- with self.test_session() as sess:
- box_locations, box_classes = sess.run([box_locations, box_classes],
- feed_dict=feed_dict)
-
- self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
- atol=1e-6)
- self.assertAllEqual(box_classes, expected_box_classses)
-
- def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self):
- num_classes = 4
- num_valid_boxes = 3
- num_boxes = 10
- code_size = 4
-
- dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes,
- code_size))
- dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
- box_locations, box_classes = ops.dense_to_sparse_boxes(
- dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
- feed_dict = {dense_location_placeholder: np.random.uniform(
- size=[num_boxes, code_size]),
- dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
- dtype=np.int32)}
-
- expected_box_locations = (feed_dict[dense_location_placeholder]
- [:num_valid_boxes])
- expected_box_classses = np.array([0, 3, 3])
- with self.test_session() as sess:
- box_locations, box_classes = sess.run([box_locations, box_classes],
- feed_dict=feed_dict)
-
- self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
- atol=1e-6)
- self.assertAllEqual(box_classes, expected_box_classses)
-
-
-class OpsTestIndicesToDenseVector(tf.test.TestCase):
-
- def test_indices_to_dense_vector(self):
- size = 10000
- num_indices = np.random.randint(size)
- rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
-
- expected_output = np.zeros(size, dtype=np.float32)
- expected_output[rand_indices] = 1.
-
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
-
- with self.test_session() as sess:
- output = sess.run(indicator)
- self.assertAllEqual(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
- def test_indices_to_dense_vector_size_at_inference(self):
- size = 5000
- num_indices = 250
- all_indices = np.arange(size)
- rand_indices = np.random.permutation(all_indices)[0:num_indices]
-
- expected_output = np.zeros(size, dtype=np.float32)
- expected_output[rand_indices] = 1.
-
- tf_all_indices = tf.placeholder(tf.int32)
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(tf_rand_indices,
- tf.shape(tf_all_indices)[0])
- feed_dict = {tf_all_indices: all_indices}
-
- with self.test_session() as sess:
- output = sess.run(indicator, feed_dict=feed_dict)
- self.assertAllEqual(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
- def test_indices_to_dense_vector_int(self):
- size = 500
- num_indices = 25
- rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
-
- expected_output = np.zeros(size, dtype=np.int64)
- expected_output[rand_indices] = 1
-
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(
- tf_rand_indices, size, 1, dtype=tf.int64)
-
- with self.test_session() as sess:
- output = sess.run(indicator)
- self.assertAllEqual(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
- def test_indices_to_dense_vector_custom_values(self):
- size = 100
- num_indices = 10
- rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
- indices_value = np.random.rand(1)
- default_value = np.random.rand(1)
-
- expected_output = np.float32(np.ones(size) * default_value)
- expected_output[rand_indices] = indices_value
-
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(
- tf_rand_indices,
- size,
- indices_value=indices_value,
- default_value=default_value)
-
- with self.test_session() as sess:
- output = sess.run(indicator)
- self.assertAllClose(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
- def test_indices_to_dense_vector_all_indices_as_input(self):
- size = 500
- num_indices = 500
- rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
-
- expected_output = np.ones(size, dtype=np.float32)
-
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
-
- with self.test_session() as sess:
- output = sess.run(indicator)
- self.assertAllEqual(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
- def test_indices_to_dense_vector_empty_indices_as_input(self):
- size = 500
- rand_indices = []
-
- expected_output = np.zeros(size, dtype=np.float32)
-
- tf_rand_indices = tf.constant(rand_indices)
- indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
-
- with self.test_session() as sess:
- output = sess.run(indicator)
- self.assertAllEqual(output, expected_output)
- self.assertEqual(output.dtype, expected_output.dtype)
-
-
-class GroundtruthFilterTest(tf.test.TestCase):
-
- def test_filter_groundtruth(self):
- input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
- input_classes = tf.placeholder(tf.int32, shape=(None,))
- input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
- input_area = tf.placeholder(tf.float32, shape=(None,))
- input_difficult = tf.placeholder(tf.float32, shape=(None,))
- input_label_types = tf.placeholder(tf.string, shape=(None,))
- valid_indices = tf.placeholder(tf.int32, shape=(None,))
- input_tensors = {
- fields.InputDataFields.image: input_image,
- fields.InputDataFields.groundtruth_boxes: input_boxes,
- fields.InputDataFields.groundtruth_classes: input_classes,
- fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
- fields.InputDataFields.groundtruth_area: input_area,
- fields.InputDataFields.groundtruth_difficult: input_difficult,
- fields.InputDataFields.groundtruth_label_types: input_label_types
- }
- output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
-
- image_tensor = np.random.rand(224, 224, 3)
- feed_dict = {
- input_image: image_tensor,
- input_boxes:
- np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
- input_classes:
- np.array([1, 2], dtype=np.int32),
- input_is_crowd:
- np.array([False, True], dtype=np.bool),
- input_area:
- np.array([32, 48], dtype=np.float32),
- input_difficult:
- np.array([True, False], dtype=np.bool),
- input_label_types:
- np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
- valid_indices:
- np.array([0], dtype=np.int32)
- }
- expected_tensors = {
- fields.InputDataFields.image:
- image_tensor,
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1],
- fields.InputDataFields.groundtruth_is_crowd:
- [False],
- fields.InputDataFields.groundtruth_area:
- [32],
- fields.InputDataFields.groundtruth_difficult:
- [True],
- fields.InputDataFields.groundtruth_label_types:
- ['APPROPRIATE']
- }
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
- for key in [fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_area]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_is_crowd,
- fields.InputDataFields.groundtruth_label_types]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
- def test_filter_with_missing_fields(self):
- input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
- input_classes = tf.placeholder(tf.int32, shape=(None,))
- input_tensors = {
- fields.InputDataFields.groundtruth_boxes: input_boxes,
- fields.InputDataFields.groundtruth_classes: input_classes
- }
- valid_indices = tf.placeholder(tf.int32, shape=(None,))
-
- feed_dict = {
- input_boxes:
- np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
- input_classes:
- np.array([1, 2], dtype=np.int32),
- valid_indices:
- np.array([0], dtype=np.int32)
- }
- expected_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1]
- }
-
- output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
- for key in [fields.InputDataFields.groundtruth_boxes]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
- def test_filter_with_empty_fields(self):
- input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
- input_classes = tf.placeholder(tf.int32, shape=(None,))
- input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
- input_area = tf.placeholder(tf.float32, shape=(None,))
- input_difficult = tf.placeholder(tf.float32, shape=(None,))
- valid_indices = tf.placeholder(tf.int32, shape=(None,))
- input_tensors = {
- fields.InputDataFields.groundtruth_boxes: input_boxes,
- fields.InputDataFields.groundtruth_classes: input_classes,
- fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
- fields.InputDataFields.groundtruth_area: input_area,
- fields.InputDataFields.groundtruth_difficult: input_difficult
- }
- output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
-
- feed_dict = {
- input_boxes:
- np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
- input_classes:
- np.array([1, 2], dtype=np.int32),
- input_is_crowd:
- np.array([False, True], dtype=np.bool),
- input_area:
- np.array([], dtype=np.float32),
- input_difficult:
- np.array([], dtype=np.float32),
- valid_indices:
- np.array([0], dtype=np.int32)
- }
- expected_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1],
- fields.InputDataFields.groundtruth_is_crowd:
- [False],
- fields.InputDataFields.groundtruth_area:
- [],
- fields.InputDataFields.groundtruth_difficult:
- []
- }
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
- for key in [fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_area]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_is_crowd]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
- def test_filter_with_empty_groundtruth_boxes(self):
- input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
- input_classes = tf.placeholder(tf.int32, shape=(None,))
- input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
- input_area = tf.placeholder(tf.float32, shape=(None,))
- input_difficult = tf.placeholder(tf.float32, shape=(None,))
- valid_indices = tf.placeholder(tf.int32, shape=(None,))
- input_tensors = {
- fields.InputDataFields.groundtruth_boxes: input_boxes,
- fields.InputDataFields.groundtruth_classes: input_classes,
- fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
- fields.InputDataFields.groundtruth_area: input_area,
- fields.InputDataFields.groundtruth_difficult: input_difficult
- }
- output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
-
- feed_dict = {
- input_boxes:
- np.array([], dtype=np.float).reshape(0, 4),
- input_classes:
- np.array([], dtype=np.int32),
- input_is_crowd:
- np.array([], dtype=np.bool),
- input_area:
- np.array([], dtype=np.float32),
- input_difficult:
- np.array([], dtype=np.float32),
- valid_indices:
- np.array([], dtype=np.int32)
- }
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
- for key in input_tensors:
- if key == fields.InputDataFields.groundtruth_boxes:
- self.assertAllEqual([0, 4], output_tensors[key].shape)
- else:
- self.assertAllEqual([0], output_tensors[key].shape)
-
-
-class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
-
- def test_filter_groundtruth_with_positive_classes(self):
- input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
- input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
- input_classes = tf.placeholder(tf.int32, shape=(None,))
- input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
- input_area = tf.placeholder(tf.float32, shape=(None,))
- input_difficult = tf.placeholder(tf.float32, shape=(None,))
- input_label_types = tf.placeholder(tf.string, shape=(None,))
- valid_indices = tf.placeholder(tf.int32, shape=(None,))
- input_tensors = {
- fields.InputDataFields.image: input_image,
- fields.InputDataFields.groundtruth_boxes: input_boxes,
- fields.InputDataFields.groundtruth_classes: input_classes,
- fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
- fields.InputDataFields.groundtruth_area: input_area,
- fields.InputDataFields.groundtruth_difficult: input_difficult,
- fields.InputDataFields.groundtruth_label_types: input_label_types
- }
- output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors)
-
- image_tensor = np.random.rand(224, 224, 3)
- feed_dict = {
- input_image: image_tensor,
- input_boxes:
- np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
- input_classes:
- np.array([1, 0], dtype=np.int32),
- input_is_crowd:
- np.array([False, True], dtype=np.bool),
- input_area:
- np.array([32, 48], dtype=np.float32),
- input_difficult:
- np.array([True, False], dtype=np.bool),
- input_label_types:
- np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
- valid_indices:
- np.array([0], dtype=np.int32)
- }
- expected_tensors = {
- fields.InputDataFields.image:
- image_tensor,
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1],
- fields.InputDataFields.groundtruth_is_crowd:
- [False],
- fields.InputDataFields.groundtruth_area:
- [32],
- fields.InputDataFields.groundtruth_difficult:
- [True],
- fields.InputDataFields.groundtruth_label_types:
- ['APPROPRIATE']
- }
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
- for key in [fields.InputDataFields.image,
- fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_area]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_is_crowd,
- fields.InputDataFields.groundtruth_label_types]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
-
-class ReplaceNaNGroundtruthLabelScoresWithOnes(tf.test.TestCase):
-
- def test_replace_nan_groundtruth_label_scores_with_ones(self):
- label_scores = tf.constant([np.nan, 1.0, np.nan])
- output_tensor = ops.replace_nan_groundtruth_label_scores_with_ones(
- label_scores)
- expected_tensor = [1.0, 1.0, 1.0]
- with self.test_session():
- output_tensor = output_tensor.eval()
- self.assertAllClose(expected_tensor, output_tensor)
-
- def test_input_equals_output_when_no_nans(self):
- input_label_scores = [0.5, 1.0, 1.0]
- label_scores_tensor = tf.constant(input_label_scores)
- output_label_scores = ops.replace_nan_groundtruth_label_scores_with_ones(
- label_scores_tensor)
- with self.test_session():
- output_label_scores = output_label_scores.eval()
- self.assertAllClose(input_label_scores, output_label_scores)
-
-
-class GroundtruthFilterWithCrowdBoxesTest(tf.test.TestCase):
-
- def test_filter_groundtruth_with_crowd_boxes(self):
- input_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[0.1, 0.2, 0.6, 0.8], [0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1, 2],
- fields.InputDataFields.groundtruth_is_crowd:
- [True, False],
- fields.InputDataFields.groundtruth_area:
- [100.0, 238.7]
- }
-
- expected_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [2],
- fields.InputDataFields.groundtruth_is_crowd:
- [False],
- fields.InputDataFields.groundtruth_area:
- [238.7]
- }
-
- output_tensors = ops.filter_groundtruth_with_crowd_boxes(
- input_tensors)
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors)
- for key in [fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_area]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_is_crowd]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
-
-class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
-
- def test_filter_groundtruth_with_nan_box_coordinates(self):
- input_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [1, 2],
- fields.InputDataFields.groundtruth_is_crowd:
- [False, True],
- fields.InputDataFields.groundtruth_area:
- [100.0, 238.7]
- }
-
- expected_tensors = {
- fields.InputDataFields.groundtruth_boxes:
- [[0.2, 0.4, 0.1, 0.8]],
- fields.InputDataFields.groundtruth_classes:
- [2],
- fields.InputDataFields.groundtruth_is_crowd:
- [True],
- fields.InputDataFields.groundtruth_area:
- [238.7]
- }
-
- output_tensors = ops.filter_groundtruth_with_nan_box_coordinates(
- input_tensors)
- with self.test_session() as sess:
- output_tensors = sess.run(output_tensors)
- for key in [fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_area]:
- self.assertAllClose(expected_tensors[key], output_tensors[key])
- for key in [fields.InputDataFields.groundtruth_classes,
- fields.InputDataFields.groundtruth_is_crowd]:
- self.assertAllEqual(expected_tensors[key], output_tensors[key])
-
-
-class OpsTestNormalizeToTarget(tf.test.TestCase):
-
- def test_create_normalize_to_target(self):
- inputs = tf.random_uniform([5, 10, 12, 3])
- target_norm_value = 4.0
- dim = 3
- with self.test_session():
- output = ops.normalize_to_target(inputs, target_norm_value, dim)
- self.assertEqual(output.op.name, 'NormalizeToTarget/mul')
- var_name = tf.contrib.framework.get_variables()[0].name
- self.assertEqual(var_name, 'NormalizeToTarget/weights:0')
-
- def test_invalid_dim(self):
- inputs = tf.random_uniform([5, 10, 12, 3])
- target_norm_value = 4.0
- dim = 10
- with self.assertRaisesRegexp(
- ValueError,
- 'dim must be non-negative but smaller than the input rank.'):
- ops.normalize_to_target(inputs, target_norm_value, dim)
-
- def test_invalid_target_norm_values(self):
- inputs = tf.random_uniform([5, 10, 12, 3])
- target_norm_value = [4.0, 4.0]
- dim = 3
- with self.assertRaisesRegexp(
- ValueError, 'target_norm_value must be a float or a list of floats'):
- ops.normalize_to_target(inputs, target_norm_value, dim)
-
- def test_correct_output_shape(self):
- inputs = tf.random_uniform([5, 10, 12, 3])
- target_norm_value = 4.0
- dim = 3
- with self.test_session():
- output = ops.normalize_to_target(inputs, target_norm_value, dim)
- self.assertEqual(output.get_shape().as_list(),
- inputs.get_shape().as_list())
-
- def test_correct_initial_output_values(self):
- inputs = tf.constant([[[[3, 4], [7, 24]],
- [[5, -12], [-1, 0]]]], tf.float32)
- target_norm_value = 10.0
- dim = 3
- expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]],
- [[50/13.0, -120/13.0], [-10, 0]]]]
- with self.test_session() as sess:
- normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
- dim)
- sess.run(tf.global_variables_initializer())
- output = normalized_inputs.eval()
- self.assertAllClose(output, expected_output)
-
- def test_multiple_target_norm_values(self):
- inputs = tf.constant([[[[3, 4], [7, 24]],
- [[5, -12], [-1, 0]]]], tf.float32)
- target_norm_value = [10.0, 20.0]
- dim = 3
- expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]],
- [[50/13.0, -240/13.0], [-10, 0]]]]
- with self.test_session() as sess:
- normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
- dim)
- sess.run(tf.global_variables_initializer())
- output = normalized_inputs.eval()
- self.assertAllClose(output, expected_output)
-
-
-class OpsTestPositionSensitiveCropRegions(tf.test.TestCase):
-
- def test_position_sensitive(self):
- num_spatial_bins = [3, 2]
- image_shape = [1, 3, 2, 6]
-
- # First channel is 1's, second channel is 2's, etc.
- image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
- shape=image_shape)
- boxes = tf.random_uniform((2, 4))
- box_ind = tf.constant([0, 0], dtype=tf.int32)
-
- # The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
- # before averaging.
- expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])
-
- for crop_size_mult in range(1, 3):
- crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
- ps_crop_and_pool = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
-
- with self.test_session() as sess:
- output = sess.run(ps_crop_and_pool)
- self.assertAllClose(output, expected_output)
-
- def test_position_sensitive_with_equal_channels(self):
- num_spatial_bins = [2, 2]
- image_shape = [1, 3, 3, 4]
- crop_size = [2, 2]
-
- image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32,
- shape=[1, 3, 3, 1])
- tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]])
- boxes = tf.random_uniform((3, 4))
- box_ind = tf.constant([0, 0, 0], dtype=tf.int32)
-
- # All channels are equal so position-sensitive crop and resize should
- # work as the usual crop and resize for just one channel.
- crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
- crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
-
- ps_crop_and_pool = ops.position_sensitive_crop_regions(
- tiled_image,
- boxes,
- box_ind,
- crop_size,
- num_spatial_bins,
- global_pool=True)
-
- with self.test_session() as sess:
- expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
- self.assertAllClose(output, expected_output)
-
- def test_position_sensitive_with_single_bin(self):
- num_spatial_bins = [1, 1]
- image_shape = [2, 3, 3, 4]
- crop_size = [2, 2]
-
- image = tf.random_uniform(image_shape)
- boxes = tf.random_uniform((6, 4))
- box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
-
- # When a single bin is used, position-sensitive crop and pool should be
- # the same as non-position sensitive crop and pool.
- crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
- crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
-
- ps_crop_and_pool = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
-
- with self.test_session() as sess:
- expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
- self.assertAllClose(output, expected_output)
-
- def test_raise_value_error_on_num_bins_less_than_one(self):
- num_spatial_bins = [1, -1]
- image_shape = [1, 1, 1, 2]
- crop_size = [2, 2]
-
- image = tf.constant(1, dtype=tf.float32, shape=image_shape)
- boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
- box_ind = tf.constant([0], dtype=tf.int32)
-
- with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'):
- ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
-
- def test_raise_value_error_on_non_divisible_crop_size(self):
- num_spatial_bins = [2, 3]
- image_shape = [1, 1, 1, 6]
- crop_size = [3, 2]
-
- image = tf.constant(1, dtype=tf.float32, shape=image_shape)
- boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
- box_ind = tf.constant([0], dtype=tf.int32)
-
- with self.assertRaisesRegexp(
- ValueError, 'crop_size should be divisible by num_spatial_bins'):
- ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
-
- def test_raise_value_error_on_non_divisible_num_channels(self):
- num_spatial_bins = [2, 2]
- image_shape = [1, 1, 1, 5]
- crop_size = [2, 2]
-
- image = tf.constant(1, dtype=tf.float32, shape=image_shape)
- boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
- box_ind = tf.constant([0], dtype=tf.int32)
-
- with self.assertRaisesRegexp(
- ValueError, 'Dimension size must be evenly divisible by 4 but is 5'):
- ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
-
- def test_position_sensitive_with_global_pool_false(self):
- num_spatial_bins = [3, 2]
- image_shape = [1, 3, 2, 6]
- num_boxes = 2
-
- # First channel is 1's, second channel is 2's, etc.
- image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
- shape=image_shape)
- boxes = tf.random_uniform((num_boxes, 4))
- box_ind = tf.constant([0, 0], dtype=tf.int32)
-
- expected_output = []
-
- # Expected output, when crop_size = [3, 2].
- expected_output.append(np.expand_dims(
- np.tile(np.array([[1, 2],
- [3, 4],
- [5, 6]]), (num_boxes, 1, 1)),
- axis=-1))
-
- # Expected output, when crop_size = [6, 4].
- expected_output.append(np.expand_dims(
- np.tile(np.array([[1, 1, 2, 2],
- [1, 1, 2, 2],
- [3, 3, 4, 4],
- [3, 3, 4, 4],
- [5, 5, 6, 6],
- [5, 5, 6, 6]]), (num_boxes, 1, 1)),
- axis=-1))
-
- for crop_size_mult in range(1, 3):
- crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
- ps_crop = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
- with self.test_session() as sess:
- output = sess.run(ps_crop)
-
- self.assertAllEqual(output, expected_output[crop_size_mult - 1])
-
- def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
- num_spatial_bins = [2, 2]
- image_shape = [2, 2, 2, 4]
- crop_size = [2, 2]
-
- image = tf.constant(range(1, 2 * 2 * 4 + 1) * 2, dtype=tf.float32,
- shape=image_shape)
-
- # First box contains whole image, and second box contains only first row.
- boxes = tf.constant(np.array([[0., 0., 1., 1.],
- [0., 0., 0.5, 1.]]), dtype=tf.float32)
- box_ind = tf.constant([0, 1], dtype=tf.int32)
-
- expected_output = []
-
- # Expected output, when the box containing whole image.
- expected_output.append(
- np.reshape(np.array([[4, 7],
- [10, 13]]),
- (1, 2, 2, 1))
- )
-
- # Expected output, when the box containing only first row.
- expected_output.append(
- np.reshape(np.array([[3, 6],
- [7, 10]]),
- (1, 2, 2, 1))
- )
- expected_output = np.concatenate(expected_output, axis=0)
-
- ps_crop = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
-
- with self.test_session() as sess:
- output = sess.run(ps_crop)
- self.assertAllEqual(output, expected_output)
-
- def test_position_sensitive_with_global_pool_false_and_single_bin(self):
- num_spatial_bins = [1, 1]
- image_shape = [2, 3, 3, 4]
- crop_size = [1, 1]
-
- image = tf.random_uniform(image_shape)
- boxes = tf.random_uniform((6, 4))
- box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
-
- # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
- # the outputs are the same whatever the global_pool value is.
- ps_crop_and_pool = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
- ps_crop = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
-
- with self.test_session() as sess:
- pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop))
- self.assertAllClose(pooled_output, unpooled_output)
-
- def test_position_sensitive_with_global_pool_false_and_do_global_pool(self):
- num_spatial_bins = [3, 2]
- image_shape = [1, 3, 2, 6]
- num_boxes = 2
-
- # First channel is 1's, second channel is 2's, etc.
- image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
- shape=image_shape)
- boxes = tf.random_uniform((num_boxes, 4))
- box_ind = tf.constant([0, 0], dtype=tf.int32)
-
- expected_output = []
-
- # Expected output, when crop_size = [3, 2].
- expected_output.append(np.mean(
- np.expand_dims(
- np.tile(np.array([[1, 2],
- [3, 4],
- [5, 6]]), (num_boxes, 1, 1)),
- axis=-1),
- axis=(1, 2), keepdims=True))
-
- # Expected output, when crop_size = [6, 4].
- expected_output.append(np.mean(
- np.expand_dims(
- np.tile(np.array([[1, 1, 2, 2],
- [1, 1, 2, 2],
- [3, 3, 4, 4],
- [3, 3, 4, 4],
- [5, 5, 6, 6],
- [5, 5, 6, 6]]), (num_boxes, 1, 1)),
- axis=-1),
- axis=(1, 2), keepdims=True))
-
- for crop_size_mult in range(1, 3):
- crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
-
- # Perform global_pooling after running the function with
- # global_pool=False.
- ps_crop = ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
- ps_crop_and_pool = tf.reduce_mean(
- ps_crop, reduction_indices=(1, 2), keep_dims=True)
-
- with self.test_session() as sess:
- output = sess.run(ps_crop_and_pool)
-
- self.assertAllEqual(output, expected_output[crop_size_mult - 1])
-
- def test_raise_value_error_on_non_square_block_size(self):
- num_spatial_bins = [3, 2]
- image_shape = [1, 3, 2, 6]
- crop_size = [6, 2]
-
- image = tf.constant(1, dtype=tf.float32, shape=image_shape)
- boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
- box_ind = tf.constant([0], dtype=tf.int32)
-
- with self.assertRaisesRegexp(
- ValueError, 'Only support square bin crop size for now.'):
- ops.position_sensitive_crop_regions(
- image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
-
-
-class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
-
- def testZeroImageOnEmptyMask(self):
- box_masks = tf.constant([[[0, 0],
- [0, 0]]], dtype=tf.float32)
- boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
- image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
- image_height=4,
- image_width=4)
- np_expected_image_masks = np.array([[[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]]], dtype=np.float32)
- with self.test_session() as sess:
- np_image_masks = sess.run(image_masks)
- self.assertAllClose(np_image_masks, np_expected_image_masks)
-
- def testMaskIsCenteredInImageWhenBoxIsCentered(self):
- box_masks = tf.constant([[[1, 1],
- [1, 1]]], dtype=tf.float32)
- boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
- image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
- image_height=4,
- image_width=4)
- np_expected_image_masks = np.array([[[0, 0, 0, 0],
- [0, 1, 1, 0],
- [0, 1, 1, 0],
- [0, 0, 0, 0]]], dtype=np.float32)
- with self.test_session() as sess:
- np_image_masks = sess.run(image_masks)
- self.assertAllClose(np_image_masks, np_expected_image_masks)
-
- def testMaskOffCenterRemainsOffCenterInImage(self):
- box_masks = tf.constant([[[1, 0],
- [0, 1]]], dtype=tf.float32)
- boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
- image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
- image_height=4,
- image_width=4)
- np_expected_image_masks = np.array([[[0, 0, 0, 0],
- [0, 0, 0.6111111, 0.16666669],
- [0, 0, 0.3888889, 0.83333337],
- [0, 0, 0, 0]]], dtype=np.float32)
- with self.test_session() as sess:
- np_image_masks = sess.run(image_masks)
- self.assertAllClose(np_image_masks, np_expected_image_masks)
-
-
-class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
-
- def testMergeBoxesWithMultipleLabels(self):
- boxes = tf.constant(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
- [0.25, 0.25, 0.75, 0.75]],
- dtype=tf.float32)
- class_indices = tf.constant([0, 4, 2], dtype=tf.int32)
- num_classes = 5
- merged_boxes, merged_classes, merged_box_indices = (
- ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
- expected_merged_boxes = np.array(
- [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32)
- expected_merged_classes = np.array(
- [[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32)
- expected_merged_box_indices = np.array([0, 1], dtype=np.int32)
- with self.test_session() as sess:
- np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
- [merged_boxes, merged_classes, merged_box_indices])
- if np_merged_classes[0, 0] != 1:
- expected_merged_boxes = expected_merged_boxes[::-1, :]
- expected_merged_classes = expected_merged_classes[::-1, :]
- expected_merged_box_indices = expected_merged_box_indices[::-1, :]
- self.assertAllClose(np_merged_boxes, expected_merged_boxes)
- self.assertAllClose(np_merged_classes, expected_merged_classes)
- self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
-
- def testMergeBoxesWithEmptyInputs(self):
- boxes = tf.constant([[]])
- class_indices = tf.constant([])
- num_classes = 5
- merged_boxes, merged_classes, merged_box_indices = (
- ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
- with self.test_session() as sess:
- np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
- [merged_boxes, merged_classes, merged_box_indices])
- self.assertAllEqual(np_merged_boxes.shape, [0, 4])
- self.assertAllEqual(np_merged_classes.shape, [0, 5])
- self.assertAllEqual(np_merged_box_indices.shape, [0])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/per_image_evaluation.py b/object_detection/utils/per_image_evaluation.py
deleted file mode 100644
index eb7001fc..00000000
--- a/object_detection/utils/per_image_evaluation.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Evaluate Object Detection result on a single image.
-
-Annotate each detected result as true positives or false positive according to
-a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
-detection is supported by default.
-"""
-import numpy as np
-
-from object_detection.utils import np_box_list
-from object_detection.utils import np_box_list_ops
-
-
-class PerImageEvaluation(object):
- """Evaluate detection result of a single image."""
-
- def __init__(self,
- num_groundtruth_classes,
- matching_iou_threshold=0.5,
- nms_iou_threshold=0.3,
- nms_max_output_boxes=50):
- """Initialized PerImageEvaluation by evaluation parameters.
-
- Args:
- num_groundtruth_classes: Number of ground truth object classes
- matching_iou_threshold: A ratio of area intersection to union, which is
- the threshold to consider whether a detection is true positive or not
- nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
- nms_max_output_boxes: Number of maximum output boxes in NMS.
- """
- self.matching_iou_threshold = matching_iou_threshold
- self.nms_iou_threshold = nms_iou_threshold
- self.nms_max_output_boxes = nms_max_output_boxes
- self.num_groundtruth_classes = num_groundtruth_classes
-
- def compute_object_detection_metrics(
- self, detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels,
- groundtruth_is_difficult_lists, groundtruth_is_group_of_list):
- """Evaluates detections as being tp, fp or ignored from a single image.
-
- The evaluation is done in two stages:
- 1. All detections are matched to non group-of boxes; true positives are
- determined and detections matched to difficult boxes are ignored.
- 2. Detections that are determined as false positives are matched against
- group-of boxes and ignored if matched.
-
- Args:
- detected_boxes: A float numpy array of shape [N, 4], representing N
- regions of detected object regions.
- Each row is of the format [y_min, x_min, y_max, x_max]
- detected_scores: A float numpy array of shape [N, 1], representing
- the confidence scores of the detected N object instances.
- detected_class_labels: A integer numpy array of shape [N, 1], repreneting
- the class labels of the detected N object instances.
- groundtruth_boxes: A float numpy array of shape [M, 4], representing M
- regions of object instances in ground truth
- groundtruth_class_labels: An integer numpy array of shape [M, 1],
- representing M class labels of object instances in ground truth
- groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
- whether a ground truth box is a difficult instance or not
- groundtruth_is_group_of_list: A boolean numpy array of length M denoting
- whether a ground truth box has group-of tag
-
- Returns:
- scores: A list of C float numpy arrays. Each numpy array is of
- shape [K, 1], representing K scores detected with object class
- label c
- tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
- is of shape [K, 1], representing K True/False positive label of
- object instances detected with class label c
- is_class_correctly_detected_in_image: a numpy integer array of
- shape [C, 1], indicating whether the correponding class has a least
- one instance being correctly detected in the image
- """
- detected_boxes, detected_scores, detected_class_labels = (
- self._remove_invalid_boxes(detected_boxes, detected_scores,
- detected_class_labels))
- scores, tp_fp_labels = self._compute_tp_fp(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels,
- groundtruth_is_difficult_lists, groundtruth_is_group_of_list)
-
- is_class_correctly_detected_in_image = self._compute_cor_loc(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels)
- return scores, tp_fp_labels, is_class_correctly_detected_in_image
-
- def _compute_cor_loc(self, detected_boxes, detected_scores,
- detected_class_labels, groundtruth_boxes,
- groundtruth_class_labels):
- """Compute CorLoc score for object detection result.
-
- Args:
- detected_boxes: A float numpy array of shape [N, 4], representing N
- regions of detected object regions.
- Each row is of the format [y_min, x_min, y_max, x_max]
- detected_scores: A float numpy array of shape [N, 1], representing
- the confidence scores of the detected N object instances.
- detected_class_labels: A integer numpy array of shape [N, 1], repreneting
- the class labels of the detected N object instances.
- groundtruth_boxes: A float numpy array of shape [M, 4], representing M
- regions of object instances in ground truth
- groundtruth_class_labels: An integer numpy array of shape [M, 1],
- representing M class labels of object instances in ground truth
- Returns:
- is_class_correctly_detected_in_image: a numpy integer array of
- shape [C, 1], indicating whether the correponding class has a least
- one instance being correctly detected in the image
- """
- is_class_correctly_detected_in_image = np.zeros(
- self.num_groundtruth_classes, dtype=int)
- for i in range(self.num_groundtruth_classes):
- gt_boxes_at_ith_class = groundtruth_boxes[groundtruth_class_labels ==
- i, :]
- detected_boxes_at_ith_class = detected_boxes[detected_class_labels ==
- i, :]
- detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
- is_class_correctly_detected_in_image[i] = (
- self._compute_is_aclass_correctly_detected_in_image(
- detected_boxes_at_ith_class, detected_scores_at_ith_class,
- gt_boxes_at_ith_class))
-
- return is_class_correctly_detected_in_image
-
- def _compute_is_aclass_correctly_detected_in_image(
- self, detected_boxes, detected_scores, groundtruth_boxes):
- """Compute CorLoc score for a single class.
-
- Args:
- detected_boxes: A numpy array of shape [N, 4] representing detected box
- coordinates
- detected_scores: A 1-d numpy array of length N representing classification
- score
- groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
- box coordinates
-
- Returns:
- is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
- class is correctly detected in the image or not
- """
- if detected_boxes.size > 0:
- if groundtruth_boxes.size > 0:
- max_score_id = np.argmax(detected_scores)
- detected_boxlist = np_box_list.BoxList(
- np.expand_dims(detected_boxes[max_score_id, :], axis=0))
- gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
- iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
- if np.max(iou) >= self.matching_iou_threshold:
- return 1
- return 0
-
- def _compute_tp_fp(self, detected_boxes, detected_scores,
- detected_class_labels, groundtruth_boxes,
- groundtruth_class_labels, groundtruth_is_difficult_lists,
- groundtruth_is_group_of_list):
- """Labels true/false positives of detections of an image across all classes.
-
- Args:
- detected_boxes: A float numpy array of shape [N, 4], representing N
- regions of detected object regions.
- Each row is of the format [y_min, x_min, y_max, x_max]
- detected_scores: A float numpy array of shape [N, 1], representing
- the confidence scores of the detected N object instances.
- detected_class_labels: A integer numpy array of shape [N, 1], repreneting
- the class labels of the detected N object instances.
- groundtruth_boxes: A float numpy array of shape [M, 4], representing M
- regions of object instances in ground truth
- groundtruth_class_labels: An integer numpy array of shape [M, 1],
- representing M class labels of object instances in ground truth
- groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
- whether a ground truth box is a difficult instance or not
- groundtruth_is_group_of_list: A boolean numpy array of length M denoting
- whether a ground truth box has group-of tag
-
- Returns:
- result_scores: A list of float numpy arrays. Each numpy array is of
- shape [K, 1], representing K scores detected with object class
- label c
- result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
- shape [K, 1], representing K True/False positive label of object
- instances detected with class label c
- """
- result_scores = []
- result_tp_fp_labels = []
- for i in range(self.num_groundtruth_classes):
- gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
- ), :]
- groundtruth_is_difficult_list_at_ith_class = (
- groundtruth_is_difficult_lists[groundtruth_class_labels == i])
- groundtruth_is_group_of_list_at_ith_class = (
- groundtruth_is_group_of_list[groundtruth_class_labels == i])
- detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
- ), :]
- detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
- scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
- detected_boxes_at_ith_class, detected_scores_at_ith_class,
- gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class,
- groundtruth_is_group_of_list_at_ith_class)
- result_scores.append(scores)
- result_tp_fp_labels.append(tp_fp_labels)
- return result_scores, result_tp_fp_labels
-
- def _remove_invalid_boxes(self, detected_boxes, detected_scores,
- detected_class_labels):
- valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
- detected_boxes[:, 1] < detected_boxes[:, 3])
- return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
- detected_class_labels[valid_indices])
-
- def _compute_tp_fp_for_single_class(
- self, detected_boxes, detected_scores, groundtruth_boxes,
- groundtruth_is_difficult_list, groundtruth_is_group_of_list):
- """Labels boxes detected with the same class from the same image as tp/fp.
-
- Args:
- detected_boxes: A numpy array of shape [N, 4] representing detected box
- coordinates
- detected_scores: A 1-d numpy array of length N representing classification
- score
- groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
- box coordinates
- groundtruth_is_difficult_list: A boolean numpy array of length M denoting
- whether a ground truth box is a difficult instance or not. If a
- groundtruth box is difficult, every detection matching this box
- is ignored.
- groundtruth_is_group_of_list: A boolean numpy array of length M denoting
- whether a ground truth box has group-of tag. If a groundtruth box
- is group-of box, every detection matching this box is ignored.
-
- Returns:
- Two arrays of the same size, containing all boxes that were evaluated as
- being true positives or false positives; if a box matched to a difficult
- box or to a group-of box, it is ignored.
-
- scores: A numpy array representing the detection scores.
- tp_fp_labels: a boolean numpy array indicating whether a detection is a
- true positive.
-
- """
- if detected_boxes.size == 0:
- return np.array([], dtype=float), np.array([], dtype=bool)
- detected_boxlist = np_box_list.BoxList(detected_boxes)
- detected_boxlist.add_field('scores', detected_scores)
- detected_boxlist = np_box_list_ops.non_max_suppression(
- detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
-
- scores = detected_boxlist.get_field('scores')
-
- if groundtruth_boxes.size == 0:
- return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
-
- tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
- is_matched_to_difficult_box = np.zeros(
- detected_boxlist.num_boxes(), dtype=bool)
- is_matched_to_group_of_box = np.zeros(
- detected_boxlist.num_boxes(), dtype=bool)
-
- # The evaluation is done in two stages:
- # 1. All detections are matched to non group-of boxes; true positives are
- # determined and detections matched to difficult boxes are ignored.
- # 2. Detections that are determined as false positives are matched against
- # group-of boxes and ignored if matched.
-
- # Tp-fp evaluation for non-group of boxes (if any).
- gt_non_group_of_boxlist = np_box_list.BoxList(
- groundtruth_boxes[~groundtruth_is_group_of_list, :])
- if gt_non_group_of_boxlist.num_boxes() > 0:
- groundtruth_nongroup_of_is_difficult_list = groundtruth_is_difficult_list[
- ~groundtruth_is_group_of_list]
- iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
- max_overlap_gt_ids = np.argmax(iou, axis=1)
- is_gt_box_detected = np.zeros(
- gt_non_group_of_boxlist.num_boxes(), dtype=bool)
- for i in range(detected_boxlist.num_boxes()):
- gt_id = max_overlap_gt_ids[i]
- if iou[i, gt_id] >= self.matching_iou_threshold:
- if not groundtruth_nongroup_of_is_difficult_list[gt_id]:
- if not is_gt_box_detected[gt_id]:
- tp_fp_labels[i] = True
- is_gt_box_detected[gt_id] = True
- else:
- is_matched_to_difficult_box[i] = True
-
- # Tp-fp evaluation for group of boxes.
- gt_group_of_boxlist = np_box_list.BoxList(
- groundtruth_boxes[groundtruth_is_group_of_list, :])
- if gt_group_of_boxlist.num_boxes() > 0:
- ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
- max_overlap_group_of_gt = np.max(ioa, axis=0)
- for i in range(detected_boxlist.num_boxes()):
- if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
- max_overlap_group_of_gt[i] >= self.matching_iou_threshold):
- is_matched_to_group_of_box[i] = True
-
- return scores[~is_matched_to_difficult_box
- & ~is_matched_to_group_of_box], tp_fp_labels[
- ~is_matched_to_difficult_box
- & ~is_matched_to_group_of_box]
diff --git a/object_detection/utils/per_image_evaluation_test.py b/object_detection/utils/per_image_evaluation_test.py
deleted file mode 100644
index ffd089bf..00000000
--- a/object_detection/utils/per_image_evaluation_test.py
+++ /dev/null
@@ -1,276 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.per_image_evaluation."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import per_image_evaluation
-
-
-class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
-
- def setUp(self):
- num_groundtruth_classes = 1
- matching_iou_threshold = 0.5
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- self.eval = per_image_evaluation.PerImageEvaluation(
- num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
- nms_max_output_boxes)
-
- self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
- self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
- dtype=float)
-
- def test_match_to_not_difficult_box(self):
- groundtruth_groundtruth_is_difficult_list = np.array([False, True],
- dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [False, False], dtype=bool)
- scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
- def test_match_to_difficult_box(self):
- groundtruth_groundtruth_is_difficult_list = np.array([True, False],
- dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [False, False], dtype=bool)
- scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([False, False], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
-
-class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
-
- def setUp(self):
- num_groundtruth_classes = 1
- matching_iou_threshold = 0.5
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- self.eval = per_image_evaluation.PerImageEvaluation(
- num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
- nms_max_output_boxes)
-
- self.detected_boxes = np.array(
- [[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float)
- self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- self.groundtruth_boxes = np.array(
- [[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float)
-
- def test_match_to_non_group_of_and_group_of_box(self):
- groundtruth_groundtruth_is_difficult_list = np.array(
- [False, False, False], dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [False, True, True], dtype=bool)
- expected_scores = np.array([0.8], dtype=float)
- expected_tp_fp_labels = np.array([True], dtype=bool)
- scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
- def test_match_two_to_group_of_box(self):
- groundtruth_groundtruth_is_difficult_list = np.array(
- [False, False, False], dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [True, False, True], dtype=bool)
- expected_scores = np.array([0.5], dtype=float)
- expected_tp_fp_labels = np.array([False], dtype=bool)
- scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
-
-class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
-
- def setUp(self):
- num_groundtruth_classes = 1
- matching_iou_threshold1 = 0.5
- matching_iou_threshold2 = 0.1
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- self.eval1 = per_image_evaluation.PerImageEvaluation(
- num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
- nms_max_output_boxes)
-
- self.eval2 = per_image_evaluation.PerImageEvaluation(
- num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
- nms_max_output_boxes)
-
- self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
- dtype=float)
- self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
-
- def test_no_true_positives(self):
- groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
- groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
- scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
- def test_one_true_positives_with_large_iou_threshold(self):
- groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
- scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
- def test_one_true_positives_with_very_small_iou_threshold(self):
- groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
- groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
- scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
- def test_two_true_positives_with_large_iou_threshold(self):
- groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
- groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [False, False], dtype=bool)
- scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
- self.detected_boxes, self.detected_scores, groundtruth_boxes,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
- expected_tp_fp_labels = np.array([False, True, True], dtype=bool)
- self.assertTrue(np.allclose(expected_scores, scores))
- self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
-
-
-class MultiClassesTpFpTest(tf.test.TestCase):
-
- def test_tp_fp(self):
- num_groundtruth_classes = 3
- matching_iou_threshold = 0.5
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
- matching_iou_threshold,
- nms_iou_threshold,
- nms_max_output_boxes)
- detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2],
- [5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]],
- dtype=float)
- detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float)
- detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int)
- groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
- groundtruth_class_labels = np.array([0, 2], dtype=int)
- groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
- groundtruth_groundtruth_is_group_of_list = np.array(
- [False, False], dtype=bool)
- scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels,
- groundtruth_groundtruth_is_difficult_list,
- groundtruth_groundtruth_is_group_of_list)
- expected_scores = [np.array([0.8], dtype=float)] * 3
- expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
- ])]
- for i in range(len(expected_scores)):
- self.assertTrue(np.allclose(expected_scores[i], scores[i]))
- self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i]))
-
-
-class CorLocTest(tf.test.TestCase):
-
- def test_compute_corloc_with_normal_iou_threshold(self):
- num_groundtruth_classes = 3
- matching_iou_threshold = 0.5
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
- matching_iou_threshold,
- nms_iou_threshold,
- nms_max_output_boxes)
- detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
- [0, 0, 5, 5]], dtype=float)
- detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
- detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
- groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
- dtype=float)
- groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
-
- is_class_correctly_detected_in_image = eval1._compute_cor_loc(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels)
- expected_result = np.array([1, 0, 1], dtype=int)
- self.assertTrue(np.array_equal(expected_result,
- is_class_correctly_detected_in_image))
-
- def test_compute_corloc_with_very_large_iou_threshold(self):
- num_groundtruth_classes = 3
- matching_iou_threshold = 0.9
- nms_iou_threshold = 1.0
- nms_max_output_boxes = 10000
- eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
- matching_iou_threshold,
- nms_iou_threshold,
- nms_max_output_boxes)
- detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
- [0, 0, 5, 5]], dtype=float)
- detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
- detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
- groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
- dtype=float)
- groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
-
- is_class_correctly_detected_in_image = eval1._compute_cor_loc(
- detected_boxes, detected_scores, detected_class_labels,
- groundtruth_boxes, groundtruth_class_labels)
- expected_result = np.array([1, 0, 0], dtype=int)
- self.assertTrue(np.array_equal(expected_result,
- is_class_correctly_detected_in_image))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/shape_utils.py b/object_detection/utils/shape_utils.py
deleted file mode 100644
index 880d367e..00000000
--- a/object_detection/utils/shape_utils.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Utils used to manipulate tensor shapes."""
-
-import tensorflow as tf
-
-
-def _is_tensor(t):
- """Returns a boolean indicating whether the input is a tensor.
-
- Args:
- t: the input to be tested.
-
- Returns:
- a boolean that indicates whether t is a tensor.
- """
- return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable))
-
-
-def _set_dim_0(t, d0):
- """Sets the 0-th dimension of the input tensor.
-
- Args:
- t: the input tensor, assuming the rank is at least 1.
- d0: an integer indicating the 0-th dimension of the input tensor.
-
- Returns:
- the tensor t with the 0-th dimension set.
- """
- t_shape = t.get_shape().as_list()
- t_shape[0] = d0
- t.set_shape(t_shape)
- return t
-
-
-def pad_tensor(t, length):
- """Pads the input tensor with 0s along the first dimension up to the length.
-
- Args:
- t: the input tensor, assuming the rank is at least 1.
- length: a tensor of shape [1] or an integer, indicating the first dimension
- of the input tensor t after padding, assuming length <= t.shape[0].
-
- Returns:
- padded_t: the padded tensor, whose first dimension is length. If the length
- is an integer, the first dimension of padded_t is set to length
- statically.
- """
- t_rank = tf.rank(t)
- t_shape = tf.shape(t)
- t_d0 = t_shape[0]
- pad_d0 = tf.expand_dims(length - t_d0, 0)
- pad_shape = tf.cond(
- tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0),
- lambda: tf.expand_dims(length - t_d0, 0))
- padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0)
- if not _is_tensor(length):
- padded_t = _set_dim_0(padded_t, length)
- return padded_t
-
-
-def clip_tensor(t, length):
- """Clips the input tensor along the first dimension up to the length.
-
- Args:
- t: the input tensor, assuming the rank is at least 1.
- length: a tensor of shape [1] or an integer, indicating the first dimension
- of the input tensor t after clipping, assuming length <= t.shape[0].
-
- Returns:
- clipped_t: the clipped tensor, whose first dimension is length. If the
- length is an integer, the first dimension of clipped_t is set to length
- statically.
- """
- clipped_t = tf.gather(t, tf.range(length))
- if not _is_tensor(length):
- clipped_t = _set_dim_0(clipped_t, length)
- return clipped_t
-
-
-def pad_or_clip_tensor(t, length):
- """Pad or clip the input tensor along the first dimension.
-
- Args:
- t: the input tensor, assuming the rank is at least 1.
- length: a tensor of shape [1] or an integer, indicating the first dimension
- of the input tensor t after processing.
-
- Returns:
- processed_t: the processed tensor, whose first dimension is length. If the
- length is an integer, the first dimension of the processed tensor is set
- to length statically.
- """
- processed_t = tf.cond(
- tf.greater(tf.shape(t)[0], length),
- lambda: clip_tensor(t, length),
- lambda: pad_tensor(t, length))
- if not _is_tensor(length):
- processed_t = _set_dim_0(processed_t, length)
- return processed_t
-
-
-def combined_static_and_dynamic_shape(tensor):
- """Returns a list containing static and dynamic values for the dimensions.
-
- Returns a list of static and dynamic values for shape dimensions. This is
- useful to preserve static shapes when available in reshape operation.
-
- Args:
- tensor: A tensor of any type.
-
- Returns:
- A list of size tensor.shape.ndims containing integers or a scalar tensor.
- """
- static_shape = tensor.shape.as_list()
- dynamic_shape = tf.shape(tensor)
- combined_shape = []
- for index, dim in enumerate(static_shape):
- if dim is not None:
- combined_shape.append(dim)
- else:
- combined_shape.append(dynamic_shape[index])
- return combined_shape
diff --git a/object_detection/utils/shape_utils_test.py b/object_detection/utils/shape_utils_test.py
deleted file mode 100644
index abeacac8..00000000
--- a/object_detection/utils/shape_utils_test.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.shape_utils."""
-
-import tensorflow as tf
-
-from object_detection.utils import shape_utils
-
-
-class UtilTest(tf.test.TestCase):
-
- def test_pad_tensor_using_integer_input(self):
- t1 = tf.constant([1], dtype=tf.int32)
- pad_t1 = shape_utils.pad_tensor(t1, 2)
- t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
- pad_t2 = shape_utils.pad_tensor(t2, 2)
-
- self.assertEqual(2, pad_t1.get_shape()[0])
- self.assertEqual(2, pad_t2.get_shape()[0])
-
- with self.test_session() as sess:
- pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
- self.assertAllEqual([1, 0], pad_t1_result)
- self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
-
- def test_pad_tensor_using_tensor_input(self):
- t1 = tf.constant([1], dtype=tf.int32)
- pad_t1 = shape_utils.pad_tensor(t1, tf.constant(2))
- t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
- pad_t2 = shape_utils.pad_tensor(t2, tf.constant(2))
-
- with self.test_session() as sess:
- pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
- self.assertAllEqual([1, 0], pad_t1_result)
- self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
-
- def test_clip_tensor_using_integer_input(self):
- t1 = tf.constant([1, 2, 3], dtype=tf.int32)
- clip_t1 = shape_utils.clip_tensor(t1, 2)
- t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
- clip_t2 = shape_utils.clip_tensor(t2, 2)
-
- self.assertEqual(2, clip_t1.get_shape()[0])
- self.assertEqual(2, clip_t2.get_shape()[0])
-
- with self.test_session() as sess:
- clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
- self.assertAllEqual([1, 2], clip_t1_result)
- self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
-
- def test_clip_tensor_using_tensor_input(self):
- t1 = tf.constant([1, 2, 3], dtype=tf.int32)
- clip_t1 = shape_utils.clip_tensor(t1, tf.constant(2))
- t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
- clip_t2 = shape_utils.clip_tensor(t2, tf.constant(2))
-
- with self.test_session() as sess:
- clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
- self.assertAllEqual([1, 2], clip_t1_result)
- self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
-
- def test_pad_or_clip_tensor_using_integer_input(self):
- t1 = tf.constant([1], dtype=tf.int32)
- tt1 = shape_utils.pad_or_clip_tensor(t1, 2)
- t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
- tt2 = shape_utils.pad_or_clip_tensor(t2, 2)
-
- t3 = tf.constant([1, 2, 3], dtype=tf.int32)
- tt3 = shape_utils.clip_tensor(t3, 2)
- t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
- tt4 = shape_utils.clip_tensor(t4, 2)
-
- self.assertEqual(2, tt1.get_shape()[0])
- self.assertEqual(2, tt2.get_shape()[0])
- self.assertEqual(2, tt3.get_shape()[0])
- self.assertEqual(2, tt4.get_shape()[0])
-
- with self.test_session() as sess:
- tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
- [tt1, tt2, tt3, tt4])
- self.assertAllEqual([1, 0], tt1_result)
- self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
- self.assertAllEqual([1, 2], tt3_result)
- self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
-
- def test_pad_or_clip_tensor_using_tensor_input(self):
- t1 = tf.constant([1], dtype=tf.int32)
- tt1 = shape_utils.pad_or_clip_tensor(t1, tf.constant(2))
- t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
- tt2 = shape_utils.pad_or_clip_tensor(t2, tf.constant(2))
-
- t3 = tf.constant([1, 2, 3], dtype=tf.int32)
- tt3 = shape_utils.clip_tensor(t3, tf.constant(2))
- t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
- tt4 = shape_utils.clip_tensor(t4, tf.constant(2))
-
- with self.test_session() as sess:
- tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
- [tt1, tt2, tt3, tt4])
- self.assertAllEqual([1, 0], tt1_result)
- self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
- self.assertAllEqual([1, 2], tt3_result)
- self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
-
- def test_combines_static_dynamic_shape(self):
- tensor = tf.placeholder(tf.float32, shape=(None, 2, 3))
- combined_shape = shape_utils.combined_static_and_dynamic_shape(
- tensor)
- self.assertTrue(tf.contrib.framework.is_tensor(combined_shape[0]))
- self.assertListEqual(combined_shape[1:], [2, 3])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/static_shape.py b/object_detection/utils/static_shape.py
deleted file mode 100644
index 8e4e522f..00000000
--- a/object_detection/utils/static_shape.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Helper functions to access TensorShape values.
-
-The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
-"""
-
-
-def get_batch_size(tensor_shape):
- """Returns batch size from the tensor shape.
-
- Args:
- tensor_shape: A rank 4 TensorShape.
-
- Returns:
- An integer representing the batch size of the tensor.
- """
- tensor_shape.assert_has_rank(rank=4)
- return tensor_shape[0].value
-
-
-def get_height(tensor_shape):
- """Returns height from the tensor shape.
-
- Args:
- tensor_shape: A rank 4 TensorShape.
-
- Returns:
- An integer representing the height of the tensor.
- """
- tensor_shape.assert_has_rank(rank=4)
- return tensor_shape[1].value
-
-
-def get_width(tensor_shape):
- """Returns width from the tensor shape.
-
- Args:
- tensor_shape: A rank 4 TensorShape.
-
- Returns:
- An integer representing the width of the tensor.
- """
- tensor_shape.assert_has_rank(rank=4)
- return tensor_shape[2].value
-
-
-def get_depth(tensor_shape):
- """Returns depth from the tensor shape.
-
- Args:
- tensor_shape: A rank 4 TensorShape.
-
- Returns:
- An integer representing the depth of the tensor.
- """
- tensor_shape.assert_has_rank(rank=4)
- return tensor_shape[3].value
diff --git a/object_detection/utils/static_shape_test.py b/object_detection/utils/static_shape_test.py
deleted file mode 100644
index 99307e93..00000000
--- a/object_detection/utils/static_shape_test.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.static_shape."""
-
-import tensorflow as tf
-
-from object_detection.utils import static_shape
-
-
-class StaticShapeTest(tf.test.TestCase):
-
- def test_return_correct_batchSize(self):
- tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
- self.assertEqual(32, static_shape.get_batch_size(tensor_shape))
-
- def test_return_correct_height(self):
- tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
- self.assertEqual(299, static_shape.get_height(tensor_shape))
-
- def test_return_correct_width(self):
- tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
- self.assertEqual(384, static_shape.get_width(tensor_shape))
-
- def test_return_correct_depth(self):
- tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
- self.assertEqual(3, static_shape.get_depth(tensor_shape))
-
- def test_die_on_tensor_shape_with_rank_three(self):
- tensor_shape = tf.TensorShape(dims=[32, 299, 384])
- with self.assertRaises(ValueError):
- static_shape.get_batch_size(tensor_shape)
- static_shape.get_height(tensor_shape)
- static_shape.get_width(tensor_shape)
- static_shape.get_depth(tensor_shape)
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/test_utils.py b/object_detection/utils/test_utils.py
deleted file mode 100644
index e6277ea5..00000000
--- a/object_detection/utils/test_utils.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Contains functions which are convenient for unit testing."""
-import numpy as np
-import tensorflow as tf
-
-from object_detection.core import anchor_generator
-from object_detection.core import box_coder
-from object_detection.core import box_list
-from object_detection.core import box_predictor
-from object_detection.core import matcher
-from object_detection.utils import shape_utils
-
-
-class MockBoxCoder(box_coder.BoxCoder):
- """Simple `difference` BoxCoder."""
-
- @property
- def code_size(self):
- return 4
-
- def _encode(self, boxes, anchors):
- return boxes.get() - anchors.get()
-
- def _decode(self, rel_codes, anchors):
- return box_list.BoxList(rel_codes + anchors.get())
-
-
-class MockBoxPredictor(box_predictor.BoxPredictor):
- """Simple box predictor that ignores inputs and outputs all zeros."""
-
- def __init__(self, is_training, num_classes):
- super(MockBoxPredictor, self).__init__(is_training, num_classes)
-
- def _predict(self, image_features, num_predictions_per_location):
- combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
- image_features)
- batch_size = combined_feature_shape[0]
- num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
- code_size = 4
- zero = tf.reduce_sum(0 * image_features)
- box_encodings = zero + tf.zeros(
- (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
- class_predictions_with_background = zero + tf.zeros(
- (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
- return {box_predictor.BOX_ENCODINGS: box_encodings,
- box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
- class_predictions_with_background}
-
-
-class MockAnchorGenerator(anchor_generator.AnchorGenerator):
- """Mock anchor generator."""
-
- def name_scope(self):
- return 'MockAnchorGenerator'
-
- def num_anchors_per_location(self):
- return [1]
-
- def _generate(self, feature_map_shape_list):
- num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list])
- return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
-
-
-class MockMatcher(matcher.Matcher):
- """Simple matcher that matches first anchor to first groundtruth box."""
-
- def _match(self, similarity_matrix):
- return tf.constant([0, -1, -1, -1], dtype=tf.int32)
-
-
-def create_diagonal_gradient_image(height, width, depth):
- """Creates pyramid image. Useful for testing.
-
- For example, pyramid_image(5, 6, 1) looks like:
- # [[[ 5. 4. 3. 2. 1. 0.]
- # [ 6. 5. 4. 3. 2. 1.]
- # [ 7. 6. 5. 4. 3. 2.]
- # [ 8. 7. 6. 5. 4. 3.]
- # [ 9. 8. 7. 6. 5. 4.]]]
-
- Args:
- height: height of image
- width: width of image
- depth: depth of image
-
- Returns:
- pyramid image
- """
- row = np.arange(height)
- col = np.arange(width)[::-1]
- image_layer = np.expand_dims(row, 1) + col
- image_layer = np.expand_dims(image_layer, 2)
-
- image = image_layer
- for i in range(1, depth):
- image = np.concatenate((image, image_layer * pow(10, i)), 2)
-
- return image.astype(np.float32)
-
-
-def create_random_boxes(num_boxes, max_height, max_width):
- """Creates random bounding boxes of specific maximum height and width.
-
- Args:
- num_boxes: number of boxes.
- max_height: maximum height of boxes.
- max_width: maximum width of boxes.
-
- Returns:
- boxes: numpy array of shape [num_boxes, 4]. Each row is in form
- [y_min, x_min, y_max, x_max].
- """
-
- y_1 = np.random.uniform(size=(1, num_boxes)) * max_height
- y_2 = np.random.uniform(size=(1, num_boxes)) * max_height
- x_1 = np.random.uniform(size=(1, num_boxes)) * max_width
- x_2 = np.random.uniform(size=(1, num_boxes)) * max_width
-
- boxes = np.zeros(shape=(num_boxes, 4))
- boxes[:, 0] = np.minimum(y_1, y_2)
- boxes[:, 1] = np.minimum(x_1, x_2)
- boxes[:, 2] = np.maximum(y_1, y_2)
- boxes[:, 3] = np.maximum(x_1, x_2)
-
- return boxes.astype(np.float32)
diff --git a/object_detection/utils/test_utils_test.py b/object_detection/utils/test_utils_test.py
deleted file mode 100644
index 1a4799c6..00000000
--- a/object_detection/utils/test_utils_test.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.test_utils."""
-
-import numpy as np
-import tensorflow as tf
-
-from object_detection.utils import test_utils
-
-
-class TestUtilsTest(tf.test.TestCase):
-
- def test_diagonal_gradient_image(self):
- """Tests if a good pyramid image is created."""
- pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2)
-
- # Test which is easy to understand.
- expected_first_channel = np.array([[3, 2, 1, 0],
- [4, 3, 2, 1],
- [5, 4, 3, 2]], dtype=np.float32)
- self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]),
- expected_first_channel)
-
- # Actual test.
- expected_image = np.array([[[3, 30],
- [2, 20],
- [1, 10],
- [0, 0]],
- [[4, 40],
- [3, 30],
- [2, 20],
- [1, 10]],
- [[5, 50],
- [4, 40],
- [3, 30],
- [2, 20]]], dtype=np.float32)
-
- self.assertAllEqual(pyramid_image, expected_image)
-
- def test_random_boxes(self):
- """Tests if valid random boxes are created."""
- num_boxes = 1000
- max_height = 3
- max_width = 5
- boxes = test_utils.create_random_boxes(num_boxes,
- max_height,
- max_width)
-
- true_column = np.ones(shape=(num_boxes)) == 1
- self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column)
- self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column)
-
- self.assertTrue(boxes[:, 0].min() >= 0)
- self.assertTrue(boxes[:, 1].min() >= 0)
- self.assertTrue(boxes[:, 2].max() <= max_height)
- self.assertTrue(boxes[:, 3].max() <= max_width)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/variables_helper.py b/object_detection/utils/variables_helper.py
deleted file mode 100644
index b27f814f..00000000
--- a/object_detection/utils/variables_helper.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Helper functions for manipulating collections of variables during training.
-"""
-import logging
-import re
-
-import tensorflow as tf
-
-slim = tf.contrib.slim
-
-
-# TODO: Consider replacing with tf.contrib.filter_variables in
-# tensorflow/contrib/framework/python/ops/variables.py
-def filter_variables(variables, filter_regex_list, invert=False):
- """Filters out the variables matching the filter_regex.
-
- Filter out the variables whose name matches the any of the regular
- expressions in filter_regex_list and returns the remaining variables.
- Optionally, if invert=True, the complement set is returned.
-
- Args:
- variables: a list of tensorflow variables.
- filter_regex_list: a list of string regular expressions.
- invert: (boolean). If True, returns the complement of the filter set; that
- is, all variables matching filter_regex are kept and all others discarded.
-
- Returns:
- a list of filtered variables.
- """
- kept_vars = []
- variables_to_ignore_patterns = filter(None, filter_regex_list)
- for var in variables:
- add = True
- for pattern in variables_to_ignore_patterns:
- if re.match(pattern, var.op.name):
- add = False
- break
- if add != invert:
- kept_vars.append(var)
- return kept_vars
-
-
-def multiply_gradients_matching_regex(grads_and_vars, regex_list, multiplier):
- """Multiply gradients whose variable names match a regular expression.
-
- Args:
- grads_and_vars: A list of gradient to variable pairs (tuples).
- regex_list: A list of string regular expressions.
- multiplier: A (float) multiplier to apply to each gradient matching the
- regular expression.
-
- Returns:
- grads_and_vars: A list of gradient to variable pairs (tuples).
- """
- variables = [pair[1] for pair in grads_and_vars]
- matching_vars = filter_variables(variables, regex_list, invert=True)
- for var in matching_vars:
- logging.info('Applying multiplier %f to variable [%s]',
- multiplier, var.op.name)
- grad_multipliers = {var: float(multiplier) for var in matching_vars}
- return slim.learning.multiply_gradients(grads_and_vars,
- grad_multipliers)
-
-
-def freeze_gradients_matching_regex(grads_and_vars, regex_list):
- """Freeze gradients whose variable names match a regular expression.
-
- Args:
- grads_and_vars: A list of gradient to variable pairs (tuples).
- regex_list: A list of string regular expressions.
-
- Returns:
- grads_and_vars: A list of gradient to variable pairs (tuples) that do not
- contain the variables and gradients matching the regex.
- """
- variables = [pair[1] for pair in grads_and_vars]
- matching_vars = filter_variables(variables, regex_list, invert=True)
- kept_grads_and_vars = [pair for pair in grads_and_vars
- if pair[1] not in matching_vars]
- for var in matching_vars:
- logging.info('Freezing variable [%s]', var.op.name)
- return kept_grads_and_vars
-
-
-def get_variables_available_in_checkpoint(variables, checkpoint_path):
- """Returns the subset of variables available in the checkpoint.
-
- Inspects given checkpoint and returns the subset of variables that are
- available in it.
-
- TODO: force input and output to be a dictionary.
-
- Args:
- variables: a list or dictionary of variables to find in checkpoint.
- checkpoint_path: path to the checkpoint to restore variables from.
-
- Returns:
- A list or dictionary of variables.
- Raises:
- ValueError: if `variables` is not a list or dict.
- """
- if isinstance(variables, list):
- variable_names_map = {variable.op.name: variable for variable in variables}
- elif isinstance(variables, dict):
- variable_names_map = variables
- else:
- raise ValueError('`variables` is expected to be a list or dict.')
- ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
- ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys()
- vars_in_ckpt = {}
- for variable_name, variable in sorted(variable_names_map.items()):
- if variable_name in ckpt_vars:
- vars_in_ckpt[variable_name] = variable
- else:
- logging.warning('Variable [%s] not available in checkpoint',
- variable_name)
- if isinstance(variables, list):
- return vars_in_ckpt.values()
- return vars_in_ckpt
diff --git a/object_detection/utils/variables_helper_test.py b/object_detection/utils/variables_helper_test.py
deleted file mode 100644
index c04b1191..00000000
--- a/object_detection/utils/variables_helper_test.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.utils.variables_helper."""
-import os
-
-import tensorflow as tf
-
-from object_detection.utils import variables_helper
-
-
-class FilterVariablesTest(tf.test.TestCase):
-
- def _create_variables(self):
- return [tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights'),
- tf.Variable(1.0, name='FeatureExtractor/InceptionV3/biases'),
- tf.Variable(1.0, name='StackProposalGenerator/weights'),
- tf.Variable(1.0, name='StackProposalGenerator/biases')]
-
- def test_return_all_variables_when_empty_regex(self):
- variables = self._create_variables()
- out_variables = variables_helper.filter_variables(variables, [''])
- self.assertItemsEqual(out_variables, variables)
-
- def test_return_variables_which_do_not_match_single_regex(self):
- variables = self._create_variables()
- out_variables = variables_helper.filter_variables(variables,
- ['FeatureExtractor/.*'])
- self.assertItemsEqual(out_variables, variables[2:])
-
- def test_return_variables_which_do_not_match_any_regex_in_list(self):
- variables = self._create_variables()
- out_variables = variables_helper.filter_variables(variables, [
- 'FeatureExtractor.*biases', 'StackProposalGenerator.*biases'
- ])
- self.assertItemsEqual(out_variables, [variables[0], variables[2]])
-
- def test_return_variables_matching_empty_regex_list(self):
- variables = self._create_variables()
- out_variables = variables_helper.filter_variables(
- variables, [''], invert=True)
- self.assertItemsEqual(out_variables, [])
-
- def test_return_variables_matching_some_regex_in_list(self):
- variables = self._create_variables()
- out_variables = variables_helper.filter_variables(
- variables,
- ['FeatureExtractor.*biases', 'StackProposalGenerator.*biases'],
- invert=True)
- self.assertItemsEqual(out_variables, [variables[1], variables[3]])
-
-
-class MultiplyGradientsMatchingRegexTest(tf.test.TestCase):
-
- def _create_grads_and_vars(self):
- return [(tf.constant(1.0),
- tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
- (tf.constant(2.0),
- tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
- (tf.constant(3.0),
- tf.Variable(3.0, name='StackProposalGenerator/weights')),
- (tf.constant(4.0),
- tf.Variable(4.0, name='StackProposalGenerator/biases'))]
-
- def test_multiply_all_feature_extractor_variables(self):
- grads_and_vars = self._create_grads_and_vars()
- regex_list = ['FeatureExtractor/.*']
- multiplier = 0.0
- grads_and_vars = variables_helper.multiply_gradients_matching_regex(
- grads_and_vars, regex_list, multiplier)
- exp_output = [(0.0, 1.0), (0.0, 2.0), (3.0, 3.0), (4.0, 4.0)]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- output = sess.run(grads_and_vars)
- self.assertItemsEqual(output, exp_output)
-
- def test_multiply_all_bias_variables(self):
- grads_and_vars = self._create_grads_and_vars()
- regex_list = ['.*/biases']
- multiplier = 0.0
- grads_and_vars = variables_helper.multiply_gradients_matching_regex(
- grads_and_vars, regex_list, multiplier)
- exp_output = [(1.0, 1.0), (0.0, 2.0), (3.0, 3.0), (0.0, 4.0)]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- output = sess.run(grads_and_vars)
- self.assertItemsEqual(output, exp_output)
-
-
-class FreezeGradientsMatchingRegexTest(tf.test.TestCase):
-
- def _create_grads_and_vars(self):
- return [(tf.constant(1.0),
- tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
- (tf.constant(2.0),
- tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
- (tf.constant(3.0),
- tf.Variable(3.0, name='StackProposalGenerator/weights')),
- (tf.constant(4.0),
- tf.Variable(4.0, name='StackProposalGenerator/biases'))]
-
- def test_freeze_all_feature_extractor_variables(self):
- grads_and_vars = self._create_grads_and_vars()
- regex_list = ['FeatureExtractor/.*']
- grads_and_vars = variables_helper.freeze_gradients_matching_regex(
- grads_and_vars, regex_list)
- exp_output = [(3.0, 3.0), (4.0, 4.0)]
- init_op = tf.global_variables_initializer()
- with self.test_session() as sess:
- sess.run(init_op)
- output = sess.run(grads_and_vars)
- self.assertItemsEqual(output, exp_output)
-
-
-class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
-
- def test_return_all_variables_from_checkpoint(self):
- variables = [
- tf.Variable(1.0, name='weights'),
- tf.Variable(1.0, name='biases')
- ]
- checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver(variables)
- with self.test_session() as sess:
- sess.run(init_op)
- saver.save(sess, checkpoint_path)
- out_variables = variables_helper.get_variables_available_in_checkpoint(
- variables, checkpoint_path)
- self.assertItemsEqual(out_variables, variables)
-
- def test_return_variables_available_in_checkpoint(self):
- checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
- graph1_variables = [
- tf.Variable(1.0, name='weights'),
- ]
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver(graph1_variables)
- with self.test_session() as sess:
- sess.run(init_op)
- saver.save(sess, checkpoint_path)
-
- graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')]
- out_variables = variables_helper.get_variables_available_in_checkpoint(
- graph2_variables, checkpoint_path)
- self.assertItemsEqual(out_variables, graph1_variables)
-
- def test_return_variables_available_an_checkpoint_with_dict_inputs(self):
- checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
- graph1_variables = [
- tf.Variable(1.0, name='ckpt_weights'),
- ]
- init_op = tf.global_variables_initializer()
- saver = tf.train.Saver(graph1_variables)
- with self.test_session() as sess:
- sess.run(init_op)
- saver.save(sess, checkpoint_path)
-
- graph2_variables_dict = {
- 'ckpt_weights': tf.Variable(1.0, name='weights'),
- 'ckpt_biases': tf.Variable(1.0, name='biases')
- }
- out_variables = variables_helper.get_variables_available_in_checkpoint(
- graph2_variables_dict, checkpoint_path)
- self.assertTrue(isinstance(out_variables, dict))
- self.assertItemsEqual(out_variables.keys(), ['ckpt_weights'])
- self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights')
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/object_detection/utils/visualization_utils.py b/object_detection/utils/visualization_utils.py
deleted file mode 100644
index 1bce2ca4..00000000
--- a/object_detection/utils/visualization_utils.py
+++ /dev/null
@@ -1,524 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A set of functions that are used for visualization.
-
-These functions often receive an image, perform some visualization on the image.
-The functions do not return a value, instead they modify the image itself.
-
-"""
-import collections
-import functools
-import matplotlib.pyplot as plt
-import numpy as np
-import PIL.Image as Image
-import PIL.ImageColor as ImageColor
-import PIL.ImageDraw as ImageDraw
-import PIL.ImageFont as ImageFont
-import six
-import tensorflow as tf
-
-
-_TITLE_LEFT_MARGIN = 10
-_TITLE_TOP_MARGIN = 10
-STANDARD_COLORS = [
- 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
- 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
- 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
- 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
- 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
- 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
- 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
- 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
- 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
- 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
- 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
- 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
- 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
- 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
- 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
- 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
- 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
- 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
- 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
- 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
- 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
- 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
- 'WhiteSmoke', 'Yellow', 'YellowGreen'
-]
-
-
-def save_image_array_as_png(image, output_path):
- """Saves an image (represented as a numpy array) to PNG.
-
- Args:
- image: a numpy array with shape [height, width, 3].
- output_path: path to which image should be written.
- """
- image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
- with tf.gfile.Open(output_path, 'w') as fid:
- image_pil.save(fid, 'PNG')
-
-
-def encode_image_array_as_png_str(image):
- """Encodes a numpy array into a PNG string.
-
- Args:
- image: a numpy array with shape [height, width, 3].
-
- Returns:
- PNG encoded image string.
- """
- image_pil = Image.fromarray(np.uint8(image))
- output = six.BytesIO()
- image_pil.save(output, format='PNG')
- png_string = output.getvalue()
- output.close()
- return png_string
-
-
-def draw_bounding_box_on_image_array(image,
- ymin,
- xmin,
- ymax,
- xmax,
- color='red',
- thickness=4,
- display_str_list=(),
- use_normalized_coordinates=True):
- """Adds a bounding box to an image (numpy array).
-
- Args:
- image: a numpy array with shape [height, width, 3].
- ymin: ymin of bounding box in normalized coordinates (same below).
- xmin: xmin of bounding box.
- ymax: ymax of bounding box.
- xmax: xmax of bounding box.
- color: color to draw bounding box. Default is red.
- thickness: line thickness. Default value is 4.
- display_str_list: list of strings to display in box
- (each to be shown on its own line).
- use_normalized_coordinates: If True (default), treat coordinates
- ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
- coordinates as absolute.
- """
- image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
- draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
- thickness, display_str_list,
- use_normalized_coordinates)
- np.copyto(image, np.array(image_pil))
-
-
-def draw_bounding_box_on_image(image,
- ymin,
- xmin,
- ymax,
- xmax,
- color='red',
- thickness=4,
- display_str_list=(),
- use_normalized_coordinates=True):
- """Adds a bounding box to an image.
-
- Each string in display_str_list is displayed on a separate line above the
- bounding box in black text on a rectangle filled with the input 'color'.
- If the top of the bounding box extends to the edge of the image, the strings
- are displayed below the bounding box.
-
- Args:
- image: a PIL.Image object.
- ymin: ymin of bounding box.
- xmin: xmin of bounding box.
- ymax: ymax of bounding box.
- xmax: xmax of bounding box.
- color: color to draw bounding box. Default is red.
- thickness: line thickness. Default value is 4.
- display_str_list: list of strings to display in box
- (each to be shown on its own line).
- use_normalized_coordinates: If True (default), treat coordinates
- ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
- coordinates as absolute.
- """
- draw = ImageDraw.Draw(image)
- im_width, im_height = image.size
- if use_normalized_coordinates:
- (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
- ymin * im_height, ymax * im_height)
- else:
- (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
- draw.line([(left, top), (left, bottom), (right, bottom),
- (right, top), (left, top)], width=thickness, fill=color)
- try:
- font = ImageFont.truetype('arial.ttf', 24)
- except IOError:
- font = ImageFont.load_default()
-
- # If the total height of the display strings added to the top of the bounding
- # box exceeds the top of the image, stack the strings below the bounding box
- # instead of above.
- display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
- # Each display_str has a top and bottom margin of 0.05x.
- total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
-
- if top > total_display_str_height:
- text_bottom = top
- else:
- text_bottom = bottom + total_display_str_height
- # Reverse list and print from bottom to top.
- for display_str in display_str_list[::-1]:
- text_width, text_height = font.getsize(display_str)
- margin = np.ceil(0.05 * text_height)
- draw.rectangle(
- [(left, text_bottom - text_height - 2 * margin), (left + text_width,
- text_bottom)],
- fill=color)
- draw.text(
- (left + margin, text_bottom - text_height - margin),
- display_str,
- fill='black',
- font=font)
- text_bottom -= text_height - 2 * margin
-
-
-def draw_bounding_boxes_on_image_array(image,
- boxes,
- color='red',
- thickness=4,
- display_str_list_list=()):
- """Draws bounding boxes on image (numpy array).
-
- Args:
- image: a numpy array object.
- boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
- The coordinates are in normalized format between [0, 1].
- color: color to draw bounding box. Default is red.
- thickness: line thickness. Default value is 4.
- display_str_list_list: list of list of strings.
- a list of strings for each bounding box.
- The reason to pass a list of strings for a
- bounding box is that it might contain
- multiple labels.
-
- Raises:
- ValueError: if boxes is not a [N, 4] array
- """
- image_pil = Image.fromarray(image)
- draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
- display_str_list_list)
- np.copyto(image, np.array(image_pil))
-
-
-def draw_bounding_boxes_on_image(image,
- boxes,
- color='red',
- thickness=4,
- display_str_list_list=()):
- """Draws bounding boxes on image.
-
- Args:
- image: a PIL.Image object.
- boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
- The coordinates are in normalized format between [0, 1].
- color: color to draw bounding box. Default is red.
- thickness: line thickness. Default value is 4.
- display_str_list_list: list of list of strings.
- a list of strings for each bounding box.
- The reason to pass a list of strings for a
- bounding box is that it might contain
- multiple labels.
-
- Raises:
- ValueError: if boxes is not a [N, 4] array
- """
- boxes_shape = boxes.shape
- if not boxes_shape:
- return
- if len(boxes_shape) != 2 or boxes_shape[1] != 4:
- raise ValueError('Input must be of size [N, 4]')
- for i in range(boxes_shape[0]):
- display_str_list = ()
- if display_str_list_list:
- display_str_list = display_str_list_list[i]
- draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
- boxes[i, 3], color, thickness, display_str_list)
-
-
-def draw_bounding_boxes_on_image_tensors(images,
- boxes,
- classes,
- scores,
- category_index,
- max_boxes_to_draw=20,
- min_score_thresh=0.2):
- """Draws bounding boxes on batch of image tensors.
-
- Args:
- images: A 4D uint8 image tensor of shape [N, H, W, C].
- boxes: [N, max_detections, 4] float32 tensor of detection boxes.
- classes: [N, max_detections] int tensor of detection classes. Note that
- classes are 1-indexed.
- scores: [N, max_detections] float32 tensor of detection scores.
- category_index: a dict that maps integer ids to category dicts. e.g.
- {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
- max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
- min_score_thresh: Minimum score threshold for visualization. Default 0.2.
-
- Returns:
- 4D image tensor of type uint8, with boxes drawn on top.
- """
- visualize_boxes_fn = functools.partial(
- visualize_boxes_and_labels_on_image_array,
- category_index=category_index,
- instance_masks=None,
- keypoints=None,
- use_normalized_coordinates=True,
- max_boxes_to_draw=max_boxes_to_draw,
- min_score_thresh=min_score_thresh,
- agnostic_mode=False,
- line_thickness=4)
-
- def draw_boxes(image_boxes_classes_scores):
- """Draws boxes on image."""
- (image, boxes, classes, scores) = image_boxes_classes_scores
- image_with_boxes = tf.py_func(visualize_boxes_fn,
- [image, boxes, classes, scores], tf.uint8)
- return image_with_boxes
-
- images = tf.map_fn(
- draw_boxes, (images, boxes, classes, scores),
- dtype=tf.uint8,
- back_prop=False)
- return images
-
-
-def draw_keypoints_on_image_array(image,
- keypoints,
- color='red',
- radius=2,
- use_normalized_coordinates=True):
- """Draws keypoints on an image (numpy array).
-
- Args:
- image: a numpy array with shape [height, width, 3].
- keypoints: a numpy array with shape [num_keypoints, 2].
- color: color to draw the keypoints with. Default is red.
- radius: keypoint radius. Default value is 2.
- use_normalized_coordinates: if True (default), treat keypoint values as
- relative to the image. Otherwise treat them as absolute.
- """
- image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
- draw_keypoints_on_image(image_pil, keypoints, color, radius,
- use_normalized_coordinates)
- np.copyto(image, np.array(image_pil))
-
-
-def draw_keypoints_on_image(image,
- keypoints,
- color='red',
- radius=2,
- use_normalized_coordinates=True):
- """Draws keypoints on an image.
-
- Args:
- image: a PIL.Image object.
- keypoints: a numpy array with shape [num_keypoints, 2].
- color: color to draw the keypoints with. Default is red.
- radius: keypoint radius. Default value is 2.
- use_normalized_coordinates: if True (default), treat keypoint values as
- relative to the image. Otherwise treat them as absolute.
- """
- draw = ImageDraw.Draw(image)
- im_width, im_height = image.size
- keypoints_x = [k[1] for k in keypoints]
- keypoints_y = [k[0] for k in keypoints]
- if use_normalized_coordinates:
- keypoints_x = tuple([im_width * x for x in keypoints_x])
- keypoints_y = tuple([im_height * y for y in keypoints_y])
- for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
- draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
- (keypoint_x + radius, keypoint_y + radius)],
- outline=color, fill=color)
-
-
-def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
- """Draws mask on an image.
-
- Args:
- image: uint8 numpy array with shape (img_height, img_height, 3)
- mask: a uint8 numpy array of shape (img_height, img_height) with
- values between either 0 or 1.
- color: color to draw the keypoints with. Default is red.
- alpha: transparency value between 0 and 1. (default: 0.7)
-
- Raises:
- ValueError: On incorrect data type for image or masks.
- """
- if image.dtype != np.uint8:
- raise ValueError('`image` not of type np.uint8')
- if mask.dtype != np.uint8:
- raise ValueError('`mask` not of type np.uint8')
- if np.any(np.logical_and(mask != 1, mask != 0)):
- raise ValueError('`mask` elements should be in [0, 1]')
- rgb = ImageColor.getrgb(color)
- pil_image = Image.fromarray(image)
-
- solid_color = np.expand_dims(
- np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
- pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
- pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
- pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
- np.copyto(image, np.array(pil_image.convert('RGB')))
-
-
-def visualize_boxes_and_labels_on_image_array(image,
- boxes,
- classes,
- scores,
- category_index,
- instance_masks=None,
- keypoints=None,
- use_normalized_coordinates=False,
- max_boxes_to_draw=20,
- min_score_thresh=.5,
- agnostic_mode=False,
- line_thickness=4):
- """Overlay labeled boxes on an image with formatted scores and label names.
-
- This function groups boxes that correspond to the same location
- and creates a display string for each detection and overlays these
- on the image. Note that this function modifies the image in place, and returns
- that same image.
-
- Args:
- image: uint8 numpy array with shape (img_height, img_width, 3)
- boxes: a numpy array of shape [N, 4]
- classes: a numpy array of shape [N]. Note that class indices are 1-based,
- and match the keys in the label map.
- scores: a numpy array of shape [N] or None. If scores=None, then
- this function assumes that the boxes to be plotted are groundtruth
- boxes and plot all boxes as black with no classes or scores.
- category_index: a dict containing category dictionaries (each holding
- category index `id` and category name `name`) keyed by category indices.
- instance_masks: a numpy array of shape [N, image_height, image_width], can
- be None
- keypoints: a numpy array of shape [N, num_keypoints, 2], can
- be None
- use_normalized_coordinates: whether boxes is to be interpreted as
- normalized coordinates or not.
- max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
- all boxes.
- min_score_thresh: minimum score threshold for a box to be visualized
- agnostic_mode: boolean (default: False) controlling whether to evaluate in
- class-agnostic mode or not. This mode will display scores but ignore
- classes.
- line_thickness: integer (default: 4) controlling line width of the boxes.
-
- Returns:
- uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
- """
- # Create a display string (and color) for every box location, group any boxes
- # that correspond to the same location.
- box_to_display_str_map = collections.defaultdict(list)
- box_to_color_map = collections.defaultdict(str)
- box_to_instance_masks_map = {}
- box_to_keypoints_map = collections.defaultdict(list)
- if not max_boxes_to_draw:
- max_boxes_to_draw = boxes.shape[0]
- for i in range(min(max_boxes_to_draw, boxes.shape[0])):
- if scores is None or scores[i] > min_score_thresh:
- box = tuple(boxes[i].tolist())
- if instance_masks is not None:
- box_to_instance_masks_map[box] = instance_masks[i]
- if keypoints is not None:
- box_to_keypoints_map[box].extend(keypoints[i])
- if scores is None:
- box_to_color_map[box] = 'black'
- else:
- if not agnostic_mode:
- if classes[i] in category_index.keys():
- class_name = category_index[classes[i]]['name']
- else:
- class_name = 'N/A'
- display_str = '{}: {}%'.format(
- class_name,
- int(100*scores[i]))
- else:
- display_str = 'score: {}%'.format(int(100 * scores[i]))
- box_to_display_str_map[box].append(display_str)
- if agnostic_mode:
- box_to_color_map[box] = 'DarkOrange'
- else:
- box_to_color_map[box] = STANDARD_COLORS[
- classes[i] % len(STANDARD_COLORS)]
-
- # Draw all boxes onto image.
- for box, color in box_to_color_map.items():
- ymin, xmin, ymax, xmax = box
- if instance_masks is not None:
- draw_mask_on_image_array(
- image,
- box_to_instance_masks_map[box],
- color=color
- )
- draw_bounding_box_on_image_array(
- image,
- ymin,
- xmin,
- ymax,
- xmax,
- color=color,
- thickness=line_thickness,
- display_str_list=box_to_display_str_map[box],
- use_normalized_coordinates=use_normalized_coordinates)
- if keypoints is not None:
- draw_keypoints_on_image_array(
- image,
- box_to_keypoints_map[box],
- color=color,
- radius=line_thickness / 2,
- use_normalized_coordinates=use_normalized_coordinates)
-
- return image
-
-
-def add_cdf_image_summary(values, name):
- """Adds a tf.summary.image for a CDF plot of the values.
-
- Normalizes `values` such that they sum to 1, plots the cumulative distribution
- function and creates a tf image summary.
-
- Args:
- values: a 1-D float32 tensor containing the values.
- name: name for the image summary.
- """
- def cdf_plot(values):
- """Numpy function to plot CDF."""
- normalized_values = values / np.sum(values)
- sorted_values = np.sort(normalized_values)
- cumulative_values = np.cumsum(sorted_values)
- fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
- / cumulative_values.size)
- fig = plt.figure(frameon=False)
- ax = fig.add_subplot('111')
- ax.plot(fraction_of_examples, cumulative_values)
- ax.set_ylabel('cumulative normalized values')
- ax.set_xlabel('fraction of examples')
- fig.canvas.draw()
- width, height = fig.get_size_inches() * fig.get_dpi()
- image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
- 1, height, width, 3)
- return image
- cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
- tf.summary.image(name, cdf_plot)
diff --git a/object_detection/utils/visualization_utils_test.py b/object_detection/utils/visualization_utils_test.py
deleted file mode 100644
index dffe1cd6..00000000
--- a/object_detection/utils/visualization_utils_test.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for image.understanding.object_detection.core.visualization_utils.
-
-Testing with visualization in the following colab:
-https://drive.google.com/a/google.com/file/d/0B5HnKS_hMsNARERpU3MtU3I5RFE/view?usp=sharing
-
-"""
-
-import os
-
-import numpy as np
-import PIL.Image as Image
-import tensorflow as tf
-
-from object_detection.utils import visualization_utils
-
-_TESTDATA_PATH = 'object_detection/test_images'
-
-
-class VisualizationUtilsTest(tf.test.TestCase):
-
- def create_colorful_test_image(self):
- """This function creates an image that can be used to test vis functions.
-
- It makes an image composed of four colored rectangles.
-
- Returns:
- colorful test numpy array image.
- """
- ch255 = np.full([100, 200, 1], 255, dtype=np.uint8)
- ch128 = np.full([100, 200, 1], 128, dtype=np.uint8)
- ch0 = np.full([100, 200, 1], 0, dtype=np.uint8)
- imr = np.concatenate((ch255, ch128, ch128), axis=2)
- img = np.concatenate((ch255, ch255, ch0), axis=2)
- imb = np.concatenate((ch255, ch0, ch255), axis=2)
- imw = np.concatenate((ch128, ch128, ch128), axis=2)
- imu = np.concatenate((imr, img), axis=1)
- imd = np.concatenate((imb, imw), axis=1)
- image = np.concatenate((imu, imd), axis=0)
- return image
-
- def test_draw_bounding_box_on_image(self):
- test_image = self.create_colorful_test_image()
- test_image = Image.fromarray(test_image)
- width_original, height_original = test_image.size
- ymin = 0.25
- ymax = 0.75
- xmin = 0.4
- xmax = 0.6
-
- visualization_utils.draw_bounding_box_on_image(test_image, ymin, xmin, ymax,
- xmax)
- width_final, height_final = test_image.size
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_bounding_box_on_image_array(self):
- test_image = self.create_colorful_test_image()
- width_original = test_image.shape[0]
- height_original = test_image.shape[1]
- ymin = 0.25
- ymax = 0.75
- xmin = 0.4
- xmax = 0.6
-
- visualization_utils.draw_bounding_box_on_image_array(
- test_image, ymin, xmin, ymax, xmax)
- width_final = test_image.shape[0]
- height_final = test_image.shape[1]
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_bounding_boxes_on_image(self):
- test_image = self.create_colorful_test_image()
- test_image = Image.fromarray(test_image)
- width_original, height_original = test_image.size
- boxes = np.array([[0.25, 0.75, 0.4, 0.6],
- [0.1, 0.1, 0.9, 0.9]])
-
- visualization_utils.draw_bounding_boxes_on_image(test_image, boxes)
- width_final, height_final = test_image.size
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_bounding_boxes_on_image_array(self):
- test_image = self.create_colorful_test_image()
- width_original = test_image.shape[0]
- height_original = test_image.shape[1]
- boxes = np.array([[0.25, 0.75, 0.4, 0.6],
- [0.1, 0.1, 0.9, 0.9]])
-
- visualization_utils.draw_bounding_boxes_on_image_array(test_image, boxes)
- width_final = test_image.shape[0]
- height_final = test_image.shape[1]
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_bounding_boxes_on_image_tensors(self):
- """Tests that bounding box utility produces reasonable results."""
- category_index = {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}}
-
- fname = os.path.join(_TESTDATA_PATH, 'image1.jpg')
- image_np = np.array(Image.open(fname))
- images_np = np.stack((image_np, image_np), axis=0)
-
- with tf.Graph().as_default():
- images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
- boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]],
- [[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]])
- classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64)
- scores = tf.constant([[0.8, 0.1], [0.6, 0.5]])
- images_with_boxes = (
- visualization_utils.draw_bounding_boxes_on_image_tensors(
- images_tensor,
- boxes,
- classes,
- scores,
- category_index,
- min_score_thresh=0.2))
-
- with self.test_session() as sess:
- sess.run(tf.global_variables_initializer())
-
- # Write output images for visualization.
- images_with_boxes_np = sess.run(images_with_boxes)
- self.assertEqual(images_np.shape, images_with_boxes_np.shape)
- for i in range(images_with_boxes_np.shape[0]):
- img_name = 'image_' + str(i) + '.png'
- output_file = os.path.join(self.get_temp_dir(), img_name)
- print 'Writing output image %d to %s' % (i, output_file)
- image_pil = Image.fromarray(images_with_boxes_np[i, ...])
- image_pil.save(output_file)
-
- def test_draw_keypoints_on_image(self):
- test_image = self.create_colorful_test_image()
- test_image = Image.fromarray(test_image)
- width_original, height_original = test_image.size
- keypoints = [[0.25, 0.75], [0.4, 0.6], [0.1, 0.1], [0.9, 0.9]]
-
- visualization_utils.draw_keypoints_on_image(test_image, keypoints)
- width_final, height_final = test_image.size
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_keypoints_on_image_array(self):
- test_image = self.create_colorful_test_image()
- width_original = test_image.shape[0]
- height_original = test_image.shape[1]
- keypoints = [[0.25, 0.75], [0.4, 0.6], [0.1, 0.1], [0.9, 0.9]]
-
- visualization_utils.draw_keypoints_on_image_array(test_image, keypoints)
- width_final = test_image.shape[0]
- height_final = test_image.shape[1]
-
- self.assertEqual(width_original, width_final)
- self.assertEqual(height_original, height_final)
-
- def test_draw_mask_on_image_array(self):
- test_image = np.asarray([[[0, 0, 0], [0, 0, 0]],
- [[0, 0, 0], [0, 0, 0]]], dtype=np.uint8)
- mask = np.asarray([[0, 1],
- [1, 1]], dtype=np.uint8)
- expected_result = np.asarray([[[0, 0, 0], [0, 0, 127]],
- [[0, 0, 127], [0, 0, 127]]], dtype=np.uint8)
- visualization_utils.draw_mask_on_image_array(test_image, mask,
- color='Blue', alpha=.5)
- self.assertAllEqual(test_image, expected_result)
-
- def test_add_cdf_image_summary(self):
- values = [0.1, 0.2, 0.3, 0.4, 0.42, 0.44, 0.46, 0.48, 0.50]
- visualization_utils.add_cdf_image_summary(values, 'PositiveAnchorLoss')
- cdf_image_summary = tf.get_collection(key=tf.GraphKeys.SUMMARIES)[0]
- with self.test_session():
- cdf_image_summary.eval()
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/test_extract_towncentre.py b/test_extract_towncentre.py
new file mode 100644
index 00000000..0bb7c233
--- /dev/null
+++ b/test_extract_towncentre.py
@@ -0,0 +1,52 @@
+import unittest
+from extract_towncentre import validate_video_path, process_video_cmd_args
+import tempfile
+from unittest.mock import Mock
+
+class TestExtractTownCentre(unittest.TestCase):
+ def test_validate_video_path_invalid(self):
+ file = "/tmp/xyz"
+ result = validate_video_path("/tmp/xyz")
+ self.assertIsNotNone(result, "validation should not produce a None result")
+ status, msg = result
+ self.assertFalse(status, "validation should fail for an non-existent path")
+ self.assertEqual(msg, f"{file} does not exist")
+
+ def test_validate_video_path_valid(self):
+ file = tempfile.NamedTemporaryFile(suffix=".vid")
+ result = validate_video_path(file.name)
+ self.assertIsNotNone(result, "validation should not produce a None result")
+ status, msg = result
+ self.assertTrue(status, f"validation should succeed for valid path {file.name}")
+ self.assertEqual(msg, f"Processing {file.name}...")
+ file.close()
+
+ def test_process_video_cmd_args_no_cmd_args(self):
+ extract_mock = Mock()
+ # when no args are passed, only the script name is present
+ argv = ['script/path']
+ process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,)
+ extract_mock.video2im.assert_called_once()
+ extract_mock.validate_video_path.assert_not_called()
+
+ def test_process_video_cmd_args_with_invalid_cmd_args(self):
+ extract_mock = Mock()
+ mock_path = 'video-file'
+ extract_mock.validate_video_path = Mock(return_value=(False, f"Bad {mock_path}"))
+ argv = ['script/path', mock_path]
+ process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,)
+ extract_mock.video2im.assert_not_called()
+ extract_mock.validate_video_path.assert_called_once_with(mock_path)
+
+ def test_process_video_cmd_args_with_valid_cmd_args(self):
+ extract_mock = Mock()
+ mock_path = 'video-file'
+ extract_mock.validate_video_path = Mock(return_value=(True, f"Good {mock_path}"))
+ argv = ['script/path', mock_path]
+ process_video_cmd_args(argv, extract_mock.validate_video_path, extract_mock.video2im,)
+ extract_mock.video2im.assert_called_once_with(src=mock_path)
+ extract_mock.validate_video_path.assert_called_once_with(mock_path)
+
+
+if __name__ == '__main__':
+ unittest.main()